From 9fa45070a2e59a871e1cd3370173369f3a4f61e2 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 4 Sep 2018 11:48:26 +0100 Subject: locking/atomics: Switch to generated fallbacks As a step to ensuring the atomic* APIs are consistent, switch to fallbacks generated by gen-atomic-fallback.sh. These are checked in rather than generated with Kbuild, since: * This allows inspection of the atomics with git grep and ctags on a pristine tree, which Linus strongly prefers being able to do. * The fallbacks are not affected by machine details or configuration options, so it is not necessary to regenerate them to take these into account. * These are included by files required *very* early in the build process (e.g. for generating bounds.h), and we'd rather not complicate the top-level Kbuild file with dependencies. The new fallback header should be equivalent to the old fallbacks in , but: * It is formatted a little differently due to scripting ensuring things are more regular than they used to be. * Fallbacks are now expanded in-place as static inline functions rather than macros. * The prototypes for fallbacks are arragned consistently with the return type on a separate line to try to keep to a sensible line length. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Signed-off-by: Peter Zijlstra (Intel) Acked-by: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: catalin.marinas@arm.com Cc: linuxdrivers@attotech.com Cc: dvyukov@google.com Cc: Boqun Feng Cc: arnd@arndb.de Cc: aryabinin@virtuozzo.com Cc: glider@google.com Link: http://lkml.kernel.org/r/20180904104830.2975-3-mark.rutland@arm.com Signed-off-by: Ingo Molnar --- include/linux/atomic-fallback.h | 2294 +++++++++++++++++++++++++++++++++++++++ include/linux/atomic.h | 1241 +-------------------- 2 files changed, 2295 insertions(+), 1240 deletions(-) create mode 100644 include/linux/atomic-fallback.h (limited to 'include/linux') diff --git a/include/linux/atomic-fallback.h b/include/linux/atomic-fallback.h new file mode 100644 index 000000000000..1c02c0112fbb --- /dev/null +++ b/include/linux/atomic-fallback.h @@ -0,0 +1,2294 @@ +// SPDX-License-Identifier: GPL-2.0 + +// Generated by scripts/atomic/gen-atomic-fallback.sh +// DO NOT MODIFY THIS FILE DIRECTLY + +#ifndef _LINUX_ATOMIC_FALLBACK_H +#define _LINUX_ATOMIC_FALLBACK_H + +#ifndef xchg_relaxed +#define xchg_relaxed xchg +#define xchg_acquire xchg +#define xchg_release xchg +#else /* xchg_relaxed */ + +#ifndef xchg_acquire +#define xchg_acquire(...) \ + __atomic_op_acquire(xchg, __VA_ARGS__) +#endif + +#ifndef xchg_release +#define xchg_release(...) \ + __atomic_op_release(xchg, __VA_ARGS__) +#endif + +#ifndef xchg +#define xchg(...) \ + __atomic_op_fence(xchg, __VA_ARGS__) +#endif + +#endif /* xchg_relaxed */ + +#ifndef cmpxchg_relaxed +#define cmpxchg_relaxed cmpxchg +#define cmpxchg_acquire cmpxchg +#define cmpxchg_release cmpxchg +#else /* cmpxchg_relaxed */ + +#ifndef cmpxchg_acquire +#define cmpxchg_acquire(...) \ + __atomic_op_acquire(cmpxchg, __VA_ARGS__) +#endif + +#ifndef cmpxchg_release +#define cmpxchg_release(...) \ + __atomic_op_release(cmpxchg, __VA_ARGS__) +#endif + +#ifndef cmpxchg +#define cmpxchg(...) \ + __atomic_op_fence(cmpxchg, __VA_ARGS__) +#endif + +#endif /* cmpxchg_relaxed */ + +#ifndef cmpxchg64_relaxed +#define cmpxchg64_relaxed cmpxchg64 +#define cmpxchg64_acquire cmpxchg64 +#define cmpxchg64_release cmpxchg64 +#else /* cmpxchg64_relaxed */ + +#ifndef cmpxchg64_acquire +#define cmpxchg64_acquire(...) \ + __atomic_op_acquire(cmpxchg64, __VA_ARGS__) +#endif + +#ifndef cmpxchg64_release +#define cmpxchg64_release(...) \ + __atomic_op_release(cmpxchg64, __VA_ARGS__) +#endif + +#ifndef cmpxchg64 +#define cmpxchg64(...) \ + __atomic_op_fence(cmpxchg64, __VA_ARGS__) +#endif + +#endif /* cmpxchg64_relaxed */ + +#ifndef atomic_read_acquire +static inline int +atomic_read_acquire(const atomic_t *v) +{ + return smp_load_acquire(&(v)->counter); +} +#define atomic_read_acquire atomic_read_acquire +#endif + +#ifndef atomic_set_release +static inline void +atomic_set_release(atomic_t *v, int i) +{ + smp_store_release(&(v)->counter, i); +} +#define atomic_set_release atomic_set_release +#endif + +#ifndef atomic_add_return_relaxed +#define atomic_add_return_acquire atomic_add_return +#define atomic_add_return_release atomic_add_return +#define atomic_add_return_relaxed atomic_add_return +#else /* atomic_add_return_relaxed */ + +#ifndef atomic_add_return_acquire +static inline int +atomic_add_return_acquire(int i, atomic_t *v) +{ + int ret = atomic_add_return_relaxed(i, v); + __atomic_acquire_fence(); + return ret; +} +#define atomic_add_return_acquire atomic_add_return_acquire +#endif + +#ifndef atomic_add_return_release +static inline int +atomic_add_return_release(int i, atomic_t *v) +{ + __atomic_release_fence(); + return atomic_add_return_relaxed(i, v); +} +#define atomic_add_return_release atomic_add_return_release +#endif + +#ifndef atomic_add_return +static inline int +atomic_add_return(int i, atomic_t *v) +{ + int ret; + __atomic_pre_full_fence(); + ret = atomic_add_return_relaxed(i, v); + __atomic_post_full_fence(); + return ret; +} +#define atomic_add_return atomic_add_return +#endif + +#endif /* atomic_add_return_relaxed */ + +#ifndef atomic_fetch_add_relaxed +#define atomic_fetch_add_acquire atomic_fetch_add +#define atomic_fetch_add_release atomic_fetch_add +#define atomic_fetch_add_relaxed atomic_fetch_add +#else /* atomic_fetch_add_relaxed */ + +#ifndef atomic_fetch_add_acquire +static inline int +atomic_fetch_add_acquire(int i, atomic_t *v) +{ + int ret = atomic_fetch_add_relaxed(i, v); + __atomic_acquire_fence(); + return ret; +} +#define atomic_fetch_add_acquire atomic_fetch_add_acquire +#endif + +#ifndef atomic_fetch_add_release +static inline int +atomic_fetch_add_release(int i, atomic_t *v) +{ + __atomic_release_fence(); + return atomic_fetch_add_relaxed(i, v); +} +#define atomic_fetch_add_release atomic_fetch_add_release +#endif + +#ifndef atomic_fetch_add +static inline int +atomic_fetch_add(int i, atomic_t *v) +{ + int ret; + __atomic_pre_full_fence(); + ret = atomic_fetch_add_relaxed(i, v); + __atomic_post_full_fence(); + return ret; +} +#define atomic_fetch_add atomic_fetch_add +#endif + +#endif /* atomic_fetch_add_relaxed */ + +#ifndef atomic_sub_return_relaxed +#define atomic_sub_return_acquire atomic_sub_return +#define atomic_sub_return_release atomic_sub_return +#define atomic_sub_return_relaxed atomic_sub_return +#else /* atomic_sub_return_relaxed */ + +#ifndef atomic_sub_return_acquire +static inline int +atomic_sub_return_acquire(int i, atomic_t *v) +{ + int ret = atomic_sub_return_relaxed(i, v); + __atomic_acquire_fence(); + return ret; +} +#define atomic_sub_return_acquire atomic_sub_return_acquire +#endif + +#ifndef atomic_sub_return_release +static inline int +atomic_sub_return_release(int i, atomic_t *v) +{ + __atomic_release_fence(); + return atomic_sub_return_relaxed(i, v); +} +#define atomic_sub_return_release atomic_sub_return_release +#endif + +#ifndef atomic_sub_return +static inline int +atomic_sub_return(int i, atomic_t *v) +{ + int ret; + __atomic_pre_full_fence(); + ret = atomic_sub_return_relaxed(i, v); + __atomic_post_full_fence(); + return ret; +} +#define atomic_sub_return atomic_sub_return +#endif + +#endif /* atomic_sub_return_relaxed */ + +#ifndef atomic_fetch_sub_relaxed +#define atomic_fetch_sub_acquire atomic_fetch_sub +#define atomic_fetch_sub_release atomic_fetch_sub +#define atomic_fetch_sub_relaxed atomic_fetch_sub +#else /* atomic_fetch_sub_relaxed */ + +#ifndef atomic_fetch_sub_acquire +static inline int +atomic_fetch_sub_acquire(int i, atomic_t *v) +{ + int ret = atomic_fetch_sub_relaxed(i, v); + __atomic_acquire_fence(); + return ret; +} +#define atomic_fetch_sub_acquire atomic_fetch_sub_acquire +#endif + +#ifndef atomic_fetch_sub_release +static inline int +atomic_fetch_sub_release(int i, atomic_t *v) +{ + __atomic_release_fence(); + return atomic_fetch_sub_relaxed(i, v); +} +#define atomic_fetch_sub_release atomic_fetch_sub_release +#endif + +#ifndef atomic_fetch_sub +static inline int +atomic_fetch_sub(int i, atomic_t *v) +{ + int ret; + __atomic_pre_full_fence(); + ret = atomic_fetch_sub_relaxed(i, v); + __atomic_post_full_fence(); + return ret; +} +#define atomic_fetch_sub atomic_fetch_sub +#endif + +#endif /* atomic_fetch_sub_relaxed */ + +#ifndef atomic_inc +static inline void +atomic_inc(atomic_t *v) +{ + atomic_add(1, v); +} +#define atomic_inc atomic_inc +#endif + +#ifndef atomic_inc_return_relaxed +#ifdef atomic_inc_return +#define atomic_inc_return_acquire atomic_inc_return +#define atomic_inc_return_release atomic_inc_return +#define atomic_inc_return_relaxed atomic_inc_return +#endif /* atomic_inc_return */ + +#ifndef atomic_inc_return +static inline int +atomic_inc_return(atomic_t *v) +{ + return atomic_add_return(1, v); +} +#define atomic_inc_return atomic_inc_return +#endif + +#ifndef atomic_inc_return_acquire +static inline int +atomic_inc_return_acquire(atomic_t *v) +{ + return atomic_add_return_acquire(1, v); +} +#define atomic_inc_return_acquire atomic_inc_return_acquire +#endif + +#ifndef atomic_inc_return_release +static inline int +atomic_inc_return_release(atomic_t *v) +{ + return atomic_add_return_release(1, v); +} +#define atomic_inc_return_release atomic_inc_return_release +#endif + +#ifndef atomic_inc_return_relaxed +static inline int +atomic_inc_return_relaxed(atomic_t *v) +{ + return atomic_add_return_relaxed(1, v); +} +#define atomic_inc_return_relaxed atomic_inc_return_relaxed +#endif + +#else /* atomic_inc_return_relaxed */ + +#ifndef atomic_inc_return_acquire +static inline int +atomic_inc_return_acquire(atomic_t *v) +{ + int ret = atomic_inc_return_relaxed(v); + __atomic_acquire_fence(); + return ret; +} +#define atomic_inc_return_acquire atomic_inc_return_acquire +#endif + +#ifndef atomic_inc_return_release +static inline int +atomic_inc_return_release(atomic_t *v) +{ + __atomic_release_fence(); + return atomic_inc_return_relaxed(v); +} +#define atomic_inc_return_release atomic_inc_return_release +#endif + +#ifndef atomic_inc_return +static inline int +atomic_inc_return(atomic_t *v) +{ + int ret; + __atomic_pre_full_fence(); + ret = atomic_inc_return_relaxed(v); + __atomic_post_full_fence(); + return ret; +} +#define atomic_inc_return atomic_inc_return +#endif + +#endif /* atomic_inc_return_relaxed */ + +#ifndef atomic_fetch_inc_relaxed +#ifdef atomic_fetch_inc +#define atomic_fetch_inc_acquire atomic_fetch_inc +#define atomic_fetch_inc_release atomic_fetch_inc +#define atomic_fetch_inc_relaxed atomic_fetch_inc +#endif /* atomic_fetch_inc */ + +#ifndef atomic_fetch_inc +static inline int +atomic_fetch_inc(atomic_t *v) +{ + return atomic_fetch_add(1, v); +} +#define atomic_fetch_inc atomic_fetch_inc +#endif + +#ifndef atomic_fetch_inc_acquire +static inline int +atomic_fetch_inc_acquire(atomic_t *v) +{ + return atomic_fetch_add_acquire(1, v); +} +#define atomic_fetch_inc_acquire atomic_fetch_inc_acquire +#endif + +#ifndef atomic_fetch_inc_release +static inline int +atomic_fetch_inc_release(atomic_t *v) +{ + return atomic_fetch_add_release(1, v); +} +#define atomic_fetch_inc_release atomic_fetch_inc_release +#endif + +#ifndef atomic_fetch_inc_relaxed +static inline int +atomic_fetch_inc_relaxed(atomic_t *v) +{ + return atomic_fetch_add_relaxed(1, v); +} +#define atomic_fetch_inc_relaxed atomic_fetch_inc_relaxed +#endif + +#else /* atomic_fetch_inc_relaxed */ + +#ifndef atomic_fetch_inc_acquire +static inline int +atomic_fetch_inc_acquire(atomic_t *v) +{ + int ret = atomic_fetch_inc_relaxed(v); + __atomic_acquire_fence(); + return ret; +} +#define atomic_fetch_inc_acquire atomic_fetch_inc_acquire +#endif + +#ifndef atomic_fetch_inc_release +static inline int +atomic_fetch_inc_release(atomic_t *v) +{ + __atomic_release_fence(); + return atomic_fetch_inc_relaxed(v); +} +#define atomic_fetch_inc_release atomic_fetch_inc_release +#endif + +#ifndef atomic_fetch_inc +static inline int +atomic_fetch_inc(atomic_t *v) +{ + int ret; + __atomic_pre_full_fence(); + ret = atomic_fetch_inc_relaxed(v); + __atomic_post_full_fence(); + return ret; +} +#define atomic_fetch_inc atomic_fetch_inc +#endif + +#endif /* atomic_fetch_inc_relaxed */ + +#ifndef atomic_dec +static inline void +atomic_dec(atomic_t *v) +{ + atomic_sub(1, v); +} +#define atomic_dec atomic_dec +#endif + +#ifndef atomic_dec_return_relaxed +#ifdef atomic_dec_return +#define atomic_dec_return_acquire atomic_dec_return +#define atomic_dec_return_release atomic_dec_return +#define atomic_dec_return_relaxed atomic_dec_return +#endif /* atomic_dec_return */ + +#ifndef atomic_dec_return +static inline int +atomic_dec_return(atomic_t *v) +{ + return atomic_sub_return(1, v); +} +#define atomic_dec_return atomic_dec_return +#endif + +#ifndef atomic_dec_return_acquire +static inline int +atomic_dec_return_acquire(atomic_t *v) +{ + return atomic_sub_return_acquire(1, v); +} +#define atomic_dec_return_acquire atomic_dec_return_acquire +#endif + +#ifndef atomic_dec_return_release +static inline int +atomic_dec_return_release(atomic_t *v) +{ + return atomic_sub_return_release(1, v); +} +#define atomic_dec_return_release atomic_dec_return_release +#endif + +#ifndef atomic_dec_return_relaxed +static inline int +atomic_dec_return_relaxed(atomic_t *v) +{ + return atomic_sub_return_relaxed(1, v); +} +#define atomic_dec_return_relaxed atomic_dec_return_relaxed +#endif + +#else /* atomic_dec_return_relaxed */ + +#ifndef atomic_dec_return_acquire +static inline int +atomic_dec_return_acquire(atomic_t *v) +{ + int ret = atomic_dec_return_relaxed(v); + __atomic_acquire_fence(); + return ret; +} +#define atomic_dec_return_acquire atomic_dec_return_acquire +#endif + +#ifndef atomic_dec_return_release +static inline int +atomic_dec_return_release(atomic_t *v) +{ + __atomic_release_fence(); + return atomic_dec_return_relaxed(v); +} +#define atomic_dec_return_release atomic_dec_return_release +#endif + +#ifndef atomic_dec_return +static inline int +atomic_dec_return(atomic_t *v) +{ + int ret; + __atomic_pre_full_fence(); + ret = atomic_dec_return_relaxed(v); + __atomic_post_full_fence(); + return ret; +} +#define atomic_dec_return atomic_dec_return +#endif + +#endif /* atomic_dec_return_relaxed */ + +#ifndef atomic_fetch_dec_relaxed +#ifdef atomic_fetch_dec +#define atomic_fetch_dec_acquire atomic_fetch_dec +#define atomic_fetch_dec_release atomic_fetch_dec +#define atomic_fetch_dec_relaxed atomic_fetch_dec +#endif /* atomic_fetch_dec */ + +#ifndef atomic_fetch_dec +static inline int +atomic_fetch_dec(atomic_t *v) +{ + return atomic_fetch_sub(1, v); +} +#define atomic_fetch_dec atomic_fetch_dec +#endif + +#ifndef atomic_fetch_dec_acquire +static inline int +atomic_fetch_dec_acquire(atomic_t *v) +{ + return atomic_fetch_sub_acquire(1, v); +} +#define atomic_fetch_dec_acquire atomic_fetch_dec_acquire +#endif + +#ifndef atomic_fetch_dec_release +static inline int +atomic_fetch_dec_release(atomic_t *v) +{ + return atomic_fetch_sub_release(1, v); +} +#define atomic_fetch_dec_release atomic_fetch_dec_release +#endif + +#ifndef atomic_fetch_dec_relaxed +static inline int +atomic_fetch_dec_relaxed(atomic_t *v) +{ + return atomic_fetch_sub_relaxed(1, v); +} +#define atomic_fetch_dec_relaxed atomic_fetch_dec_relaxed +#endif + +#else /* atomic_fetch_dec_relaxed */ + +#ifndef atomic_fetch_dec_acquire +static inline int +atomic_fetch_dec_acquire(atomic_t *v) +{ + int ret = atomic_fetch_dec_relaxed(v); + __atomic_acquire_fence(); + return ret; +} +#define atomic_fetch_dec_acquire atomic_fetch_dec_acquire +#endif + +#ifndef atomic_fetch_dec_release +static inline int +atomic_fetch_dec_release(atomic_t *v) +{ + __atomic_release_fence(); + return atomic_fetch_dec_relaxed(v); +} +#define atomic_fetch_dec_release atomic_fetch_dec_release +#endif + +#ifndef atomic_fetch_dec +static inline int +atomic_fetch_dec(atomic_t *v) +{ + int ret; + __atomic_pre_full_fence(); + ret = atomic_fetch_dec_relaxed(v); + __atomic_post_full_fence(); + return ret; +} +#define atomic_fetch_dec atomic_fetch_dec +#endif + +#endif /* atomic_fetch_dec_relaxed */ + +#ifndef atomic_fetch_and_relaxed +#define atomic_fetch_and_acquire atomic_fetch_and +#define atomic_fetch_and_release atomic_fetch_and +#define atomic_fetch_and_relaxed atomic_fetch_and +#else /* atomic_fetch_and_relaxed */ + +#ifndef atomic_fetch_and_acquire +static inline int +atomic_fetch_and_acquire(int i, atomic_t *v) +{ + int ret = atomic_fetch_and_relaxed(i, v); + __atomic_acquire_fence(); + return ret; +} +#define atomic_fetch_and_acquire atomic_fetch_and_acquire +#endif + +#ifndef atomic_fetch_and_release +static inline int +atomic_fetch_and_release(int i, atomic_t *v) +{ + __atomic_release_fence(); + return atomic_fetch_and_relaxed(i, v); +} +#define atomic_fetch_and_release atomic_fetch_and_release +#endif + +#ifndef atomic_fetch_and +static inline int +atomic_fetch_and(int i, atomic_t *v) +{ + int ret; + __atomic_pre_full_fence(); + ret = atomic_fetch_and_relaxed(i, v); + __atomic_post_full_fence(); + return ret; +} +#define atomic_fetch_and atomic_fetch_and +#endif + +#endif /* atomic_fetch_and_relaxed */ + +#ifndef atomic_andnot +static inline void +atomic_andnot(int i, atomic_t *v) +{ + atomic_and(~i, v); +} +#define atomic_andnot atomic_andnot +#endif + +#ifndef atomic_fetch_andnot_relaxed +#ifdef atomic_fetch_andnot +#define atomic_fetch_andnot_acquire atomic_fetch_andnot +#define atomic_fetch_andnot_release atomic_fetch_andnot +#define atomic_fetch_andnot_relaxed atomic_fetch_andnot +#endif /* atomic_fetch_andnot */ + +#ifndef atomic_fetch_andnot +static inline int +atomic_fetch_andnot(int i, atomic_t *v) +{ + return atomic_fetch_and(~i, v); +} +#define atomic_fetch_andnot atomic_fetch_andnot +#endif + +#ifndef atomic_fetch_andnot_acquire +static inline int +atomic_fetch_andnot_acquire(int i, atomic_t *v) +{ + return atomic_fetch_and_acquire(~i, v); +} +#define atomic_fetch_andnot_acquire atomic_fetch_andnot_acquire +#endif + +#ifndef atomic_fetch_andnot_release +static inline int +atomic_fetch_andnot_release(int i, atomic_t *v) +{ + return atomic_fetch_and_release(~i, v); +} +#define atomic_fetch_andnot_release atomic_fetch_andnot_release +#endif + +#ifndef atomic_fetch_andnot_relaxed +static inline int +atomic_fetch_andnot_relaxed(int i, atomic_t *v) +{ + return atomic_fetch_and_relaxed(~i, v); +} +#define atomic_fetch_andnot_relaxed atomic_fetch_andnot_relaxed +#endif + +#else /* atomic_fetch_andnot_relaxed */ + +#ifndef atomic_fetch_andnot_acquire +static inline int +atomic_fetch_andnot_acquire(int i, atomic_t *v) +{ + int ret = atomic_fetch_andnot_relaxed(i, v); + __atomic_acquire_fence(); + return ret; +} +#define atomic_fetch_andnot_acquire atomic_fetch_andnot_acquire +#endif + +#ifndef atomic_fetch_andnot_release +static inline int +atomic_fetch_andnot_release(int i, atomic_t *v) +{ + __atomic_release_fence(); + return atomic_fetch_andnot_relaxed(i, v); +} +#define atomic_fetch_andnot_release atomic_fetch_andnot_release +#endif + +#ifndef atomic_fetch_andnot +static inline int +atomic_fetch_andnot(int i, atomic_t *v) +{ + int ret; + __atomic_pre_full_fence(); + ret = atomic_fetch_andnot_relaxed(i, v); + __atomic_post_full_fence(); + return ret; +} +#define atomic_fetch_andnot atomic_fetch_andnot +#endif + +#endif /* atomic_fetch_andnot_relaxed */ + +#ifndef atomic_fetch_or_relaxed +#define atomic_fetch_or_acquire atomic_fetch_or +#define atomic_fetch_or_release atomic_fetch_or +#define atomic_fetch_or_relaxed atomic_fetch_or +#else /* atomic_fetch_or_relaxed */ + +#ifndef atomic_fetch_or_acquire +static inline int +atomic_fetch_or_acquire(int i, atomic_t *v) +{ + int ret = atomic_fetch_or_relaxed(i, v); + __atomic_acquire_fence(); + return ret; +} +#define atomic_fetch_or_acquire atomic_fetch_or_acquire +#endif + +#ifndef atomic_fetch_or_release +static inline int +atomic_fetch_or_release(int i, atomic_t *v) +{ + __atomic_release_fence(); + return atomic_fetch_or_relaxed(i, v); +} +#define atomic_fetch_or_release atomic_fetch_or_release +#endif + +#ifndef atomic_fetch_or +static inline int +atomic_fetch_or(int i, atomic_t *v) +{ + int ret; + __atomic_pre_full_fence(); + ret = atomic_fetch_or_relaxed(i, v); + __atomic_post_full_fence(); + return ret; +} +#define atomic_fetch_or atomic_fetch_or +#endif + +#endif /* atomic_fetch_or_relaxed */ + +#ifndef atomic_fetch_xor_relaxed +#define atomic_fetch_xor_acquire atomic_fetch_xor +#define atomic_fetch_xor_release atomic_fetch_xor +#define atomic_fetch_xor_relaxed atomic_fetch_xor +#else /* atomic_fetch_xor_relaxed */ + +#ifndef atomic_fetch_xor_acquire +static inline int +atomic_fetch_xor_acquire(int i, atomic_t *v) +{ + int ret = atomic_fetch_xor_relaxed(i, v); + __atomic_acquire_fence(); + return ret; +} +#define atomic_fetch_xor_acquire atomic_fetch_xor_acquire +#endif + +#ifndef atomic_fetch_xor_release +static inline int +atomic_fetch_xor_release(int i, atomic_t *v) +{ + __atomic_release_fence(); + return atomic_fetch_xor_relaxed(i, v); +} +#define atomic_fetch_xor_release atomic_fetch_xor_release +#endif + +#ifndef atomic_fetch_xor +static inline int +atomic_fetch_xor(int i, atomic_t *v) +{ + int ret; + __atomic_pre_full_fence(); + ret = atomic_fetch_xor_relaxed(i, v); + __atomic_post_full_fence(); + return ret; +} +#define atomic_fetch_xor atomic_fetch_xor +#endif + +#endif /* atomic_fetch_xor_relaxed */ + +#ifndef atomic_xchg_relaxed +#define atomic_xchg_acquire atomic_xchg +#define atomic_xchg_release atomic_xchg +#define atomic_xchg_relaxed atomic_xchg +#else /* atomic_xchg_relaxed */ + +#ifndef atomic_xchg_acquire +static inline int +atomic_xchg_acquire(atomic_t *v, int i) +{ + int ret = atomic_xchg_relaxed(v, i); + __atomic_acquire_fence(); + return ret; +} +#define atomic_xchg_acquire atomic_xchg_acquire +#endif + +#ifndef atomic_xchg_release +static inline int +atomic_xchg_release(atomic_t *v, int i) +{ + __atomic_release_fence(); + return atomic_xchg_relaxed(v, i); +} +#define atomic_xchg_release atomic_xchg_release +#endif + +#ifndef atomic_xchg +static inline int +atomic_xchg(atomic_t *v, int i) +{ + int ret; + __atomic_pre_full_fence(); + ret = atomic_xchg_relaxed(v, i); + __atomic_post_full_fence(); + return ret; +} +#define atomic_xchg atomic_xchg +#endif + +#endif /* atomic_xchg_relaxed */ + +#ifndef atomic_cmpxchg_relaxed +#define atomic_cmpxchg_acquire atomic_cmpxchg +#define atomic_cmpxchg_release atomic_cmpxchg +#define atomic_cmpxchg_relaxed atomic_cmpxchg +#else /* atomic_cmpxchg_relaxed */ + +#ifndef atomic_cmpxchg_acquire +static inline int +atomic_cmpxchg_acquire(atomic_t *v, int old, int new) +{ + int ret = atomic_cmpxchg_relaxed(v, old, new); + __atomic_acquire_fence(); + return ret; +} +#define atomic_cmpxchg_acquire atomic_cmpxchg_acquire +#endif + +#ifndef atomic_cmpxchg_release +static inline int +atomic_cmpxchg_release(atomic_t *v, int old, int new) +{ + __atomic_release_fence(); + return atomic_cmpxchg_relaxed(v, old, new); +} +#define atomic_cmpxchg_release atomic_cmpxchg_release +#endif + +#ifndef atomic_cmpxchg +static inline int +atomic_cmpxchg(atomic_t *v, int old, int new) +{ + int ret; + __atomic_pre_full_fence(); + ret = atomic_cmpxchg_relaxed(v, old, new); + __atomic_post_full_fence(); + return ret; +} +#define atomic_cmpxchg atomic_cmpxchg +#endif + +#endif /* atomic_cmpxchg_relaxed */ + +#ifndef atomic_try_cmpxchg_relaxed +#ifdef atomic_try_cmpxchg +#define atomic_try_cmpxchg_acquire atomic_try_cmpxchg +#define atomic_try_cmpxchg_release atomic_try_cmpxchg +#define atomic_try_cmpxchg_relaxed atomic_try_cmpxchg +#endif /* atomic_try_cmpxchg */ + +#ifndef atomic_try_cmpxchg +static inline bool +atomic_try_cmpxchg(atomic_t *v, int *old, int new) +{ + int r, o = *old; + r = atomic_cmpxchg(v, o, new); + if (unlikely(r != o)) + *old = r; + return likely(r == o); +} +#define atomic_try_cmpxchg atomic_try_cmpxchg +#endif + +#ifndef atomic_try_cmpxchg_acquire +static inline bool +atomic_try_cmpxchg_acquire(atomic_t *v, int *old, int new) +{ + int r, o = *old; + r = atomic_cmpxchg_acquire(v, o, new); + if (unlikely(r != o)) + *old = r; + return likely(r == o); +} +#define atomic_try_cmpxchg_acquire atomic_try_cmpxchg_acquire +#endif + +#ifndef atomic_try_cmpxchg_release +static inline bool +atomic_try_cmpxchg_release(atomic_t *v, int *old, int new) +{ + int r, o = *old; + r = atomic_cmpxchg_release(v, o, new); + if (unlikely(r != o)) + *old = r; + return likely(r == o); +} +#define atomic_try_cmpxchg_release atomic_try_cmpxchg_release +#endif + +#ifndef atomic_try_cmpxchg_relaxed +static inline bool +atomic_try_cmpxchg_relaxed(atomic_t *v, int *old, int new) +{ + int r, o = *old; + r = atomic_cmpxchg_relaxed(v, o, new); + if (unlikely(r != o)) + *old = r; + return likely(r == o); +} +#define atomic_try_cmpxchg_relaxed atomic_try_cmpxchg_relaxed +#endif + +#else /* atomic_try_cmpxchg_relaxed */ + +#ifndef atomic_try_cmpxchg_acquire +static inline bool +atomic_try_cmpxchg_acquire(atomic_t *v, int *old, int new) +{ + bool ret = atomic_try_cmpxchg_relaxed(v, old, new); + __atomic_acquire_fence(); + return ret; +} +#define atomic_try_cmpxchg_acquire atomic_try_cmpxchg_acquire +#endif + +#ifndef atomic_try_cmpxchg_release +static inline bool +atomic_try_cmpxchg_release(atomic_t *v, int *old, int new) +{ + __atomic_release_fence(); + return atomic_try_cmpxchg_relaxed(v, old, new); +} +#define atomic_try_cmpxchg_release atomic_try_cmpxchg_release +#endif + +#ifndef atomic_try_cmpxchg +static inline bool +atomic_try_cmpxchg(atomic_t *v, int *old, int new) +{ + bool ret; + __atomic_pre_full_fence(); + ret = atomic_try_cmpxchg_relaxed(v, old, new); + __atomic_post_full_fence(); + return ret; +} +#define atomic_try_cmpxchg atomic_try_cmpxchg +#endif + +#endif /* atomic_try_cmpxchg_relaxed */ + +#ifndef atomic_sub_and_test +/** + * atomic_sub_and_test - subtract value from variable and test result + * @i: integer value to subtract + * @v: pointer of type atomic_t + * + * Atomically subtracts @i from @v and returns + * true if the result is zero, or false for all + * other cases. + */ +static inline bool +atomic_sub_and_test(int i, atomic_t *v) +{ + return atomic_sub_return(i, v) == 0; +} +#define atomic_sub_and_test atomic_sub_and_test +#endif + +#ifndef atomic_dec_and_test +/** + * atomic_dec_and_test - decrement and test + * @v: pointer of type atomic_t + * + * Atomically decrements @v by 1 and + * returns true if the result is 0, or false for all other + * cases. + */ +static inline bool +atomic_dec_and_test(atomic_t *v) +{ + return atomic_dec_return(v) == 0; +} +#define atomic_dec_and_test atomic_dec_and_test +#endif + +#ifndef atomic_inc_and_test +/** + * atomic_inc_and_test - increment and test + * @v: pointer of type atomic_t + * + * Atomically increments @v by 1 + * and returns true if the result is zero, or false for all + * other cases. + */ +static inline bool +atomic_inc_and_test(atomic_t *v) +{ + return atomic_inc_return(v) == 0; +} +#define atomic_inc_and_test atomic_inc_and_test +#endif + +#ifndef atomic_add_negative +/** + * atomic_add_negative - add and test if negative + * @i: integer value to add + * @v: pointer of type atomic_t + * + * Atomically adds @i to @v and returns true + * if the result is negative, or false when + * result is greater than or equal to zero. + */ +static inline bool +atomic_add_negative(int i, atomic_t *v) +{ + return atomic_add_return(i, v) < 0; +} +#define atomic_add_negative atomic_add_negative +#endif + +#ifndef atomic_fetch_add_unless +/** + * atomic_fetch_add_unless - add unless the number is already a given value + * @v: pointer of type atomic_t + * @a: the amount to add to v... + * @u: ...unless v is equal to u. + * + * Atomically adds @a to @v, so long as @v was not already @u. + * Returns original value of @v + */ +static inline int +atomic_fetch_add_unless(atomic_t *v, int a, int u) +{ + int c = atomic_read(v); + + do { + if (unlikely(c == u)) + break; + } while (!atomic_try_cmpxchg(v, &c, c + a)); + + return c; +} +#define atomic_fetch_add_unless atomic_fetch_add_unless +#endif + +#ifndef atomic_add_unless +/** + * atomic_add_unless - add unless the number is already a given value + * @v: pointer of type atomic_t + * @a: the amount to add to v... + * @u: ...unless v is equal to u. + * + * Atomically adds @a to @v, if @v was not already @u. + * Returns true if the addition was done. + */ +static inline bool +atomic_add_unless(atomic_t *v, int a, int u) +{ + return atomic_fetch_add_unless(v, a, u) != u; +} +#define atomic_add_unless atomic_add_unless +#endif + +#ifndef atomic_inc_not_zero +/** + * atomic_inc_not_zero - increment unless the number is zero + * @v: pointer of type atomic_t + * + * Atomically increments @v by 1, if @v is non-zero. + * Returns true if the increment was done. + */ +static inline bool +atomic_inc_not_zero(atomic_t *v) +{ + return atomic_add_unless(v, 1, 0); +} +#define atomic_inc_not_zero atomic_inc_not_zero +#endif + +#ifndef atomic_inc_unless_negative +static inline bool +atomic_inc_unless_negative(atomic_t *v) +{ + int c = atomic_read(v); + + do { + if (unlikely(c < 0)) + return false; + } while (!atomic_try_cmpxchg(v, &c, c + 1)); + + return true; +} +#define atomic_inc_unless_negative atomic_inc_unless_negative +#endif + +#ifndef atomic_dec_unless_positive +static inline bool +atomic_dec_unless_positive(atomic_t *v) +{ + int c = atomic_read(v); + + do { + if (unlikely(c > 0)) + return false; + } while (!atomic_try_cmpxchg(v, &c, c - 1)); + + return true; +} +#define atomic_dec_unless_positive atomic_dec_unless_positive +#endif + +#ifndef atomic_dec_if_positive +static inline int +atomic_dec_if_positive(atomic_t *v) +{ + int dec, c = atomic_read(v); + + do { + dec = c - 1; + if (unlikely(dec < 0)) + break; + } while (!atomic_try_cmpxchg(v, &c, dec)); + + return dec; +} +#define atomic_dec_if_positive atomic_dec_if_positive +#endif + +#define atomic_cond_read_acquire(v, c) smp_cond_load_acquire(&(v)->counter, (c)) +#define atomic_cond_read_relaxed(v, c) smp_cond_load_relaxed(&(v)->counter, (c)) + +#ifdef CONFIG_GENERIC_ATOMIC64 +#include +#endif + +#ifndef atomic64_read_acquire +static inline s64 +atomic64_read_acquire(const atomic64_t *v) +{ + return smp_load_acquire(&(v)->counter); +} +#define atomic64_read_acquire atomic64_read_acquire +#endif + +#ifndef atomic64_set_release +static inline void +atomic64_set_release(atomic64_t *v, s64 i) +{ + smp_store_release(&(v)->counter, i); +} +#define atomic64_set_release atomic64_set_release +#endif + +#ifndef atomic64_add_return_relaxed +#define atomic64_add_return_acquire atomic64_add_return +#define atomic64_add_return_release atomic64_add_return +#define atomic64_add_return_relaxed atomic64_add_return +#else /* atomic64_add_return_relaxed */ + +#ifndef atomic64_add_return_acquire +static inline s64 +atomic64_add_return_acquire(s64 i, atomic64_t *v) +{ + s64 ret = atomic64_add_return_relaxed(i, v); + __atomic_acquire_fence(); + return ret; +} +#define atomic64_add_return_acquire atomic64_add_return_acquire +#endif + +#ifndef atomic64_add_return_release +static inline s64 +atomic64_add_return_release(s64 i, atomic64_t *v) +{ + __atomic_release_fence(); + return atomic64_add_return_relaxed(i, v); +} +#define atomic64_add_return_release atomic64_add_return_release +#endif + +#ifndef atomic64_add_return +static inline s64 +atomic64_add_return(s64 i, atomic64_t *v) +{ + s64 ret; + __atomic_pre_full_fence(); + ret = atomic64_add_return_relaxed(i, v); + __atomic_post_full_fence(); + return ret; +} +#define atomic64_add_return atomic64_add_return +#endif + +#endif /* atomic64_add_return_relaxed */ + +#ifndef atomic64_fetch_add_relaxed +#define atomic64_fetch_add_acquire atomic64_fetch_add +#define atomic64_fetch_add_release atomic64_fetch_add +#define atomic64_fetch_add_relaxed atomic64_fetch_add +#else /* atomic64_fetch_add_relaxed */ + +#ifndef atomic64_fetch_add_acquire +static inline s64 +atomic64_fetch_add_acquire(s64 i, atomic64_t *v) +{ + s64 ret = atomic64_fetch_add_relaxed(i, v); + __atomic_acquire_fence(); + return ret; +} +#define atomic64_fetch_add_acquire atomic64_fetch_add_acquire +#endif + +#ifndef atomic64_fetch_add_release +static inline s64 +atomic64_fetch_add_release(s64 i, atomic64_t *v) +{ + __atomic_release_fence(); + return atomic64_fetch_add_relaxed(i, v); +} +#define atomic64_fetch_add_release atomic64_fetch_add_release +#endif + +#ifndef atomic64_fetch_add +static inline s64 +atomic64_fetch_add(s64 i, atomic64_t *v) +{ + s64 ret; + __atomic_pre_full_fence(); + ret = atomic64_fetch_add_relaxed(i, v); + __atomic_post_full_fence(); + return ret; +} +#define atomic64_fetch_add atomic64_fetch_add +#endif + +#endif /* atomic64_fetch_add_relaxed */ + +#ifndef atomic64_sub_return_relaxed +#define atomic64_sub_return_acquire atomic64_sub_return +#define atomic64_sub_return_release atomic64_sub_return +#define atomic64_sub_return_relaxed atomic64_sub_return +#else /* atomic64_sub_return_relaxed */ + +#ifndef atomic64_sub_return_acquire +static inline s64 +atomic64_sub_return_acquire(s64 i, atomic64_t *v) +{ + s64 ret = atomic64_sub_return_relaxed(i, v); + __atomic_acquire_fence(); + return ret; +} +#define atomic64_sub_return_acquire atomic64_sub_return_acquire +#endif + +#ifndef atomic64_sub_return_release +static inline s64 +atomic64_sub_return_release(s64 i, atomic64_t *v) +{ + __atomic_release_fence(); + return atomic64_sub_return_relaxed(i, v); +} +#define atomic64_sub_return_release atomic64_sub_return_release +#endif + +#ifndef atomic64_sub_return +static inline s64 +atomic64_sub_return(s64 i, atomic64_t *v) +{ + s64 ret; + __atomic_pre_full_fence(); + ret = atomic64_sub_return_relaxed(i, v); + __atomic_post_full_fence(); + return ret; +} +#define atomic64_sub_return atomic64_sub_return +#endif + +#endif /* atomic64_sub_return_relaxed */ + +#ifndef atomic64_fetch_sub_relaxed +#define atomic64_fetch_sub_acquire atomic64_fetch_sub +#define atomic64_fetch_sub_release atomic64_fetch_sub +#define atomic64_fetch_sub_relaxed atomic64_fetch_sub +#else /* atomic64_fetch_sub_relaxed */ + +#ifndef atomic64_fetch_sub_acquire +static inline s64 +atomic64_fetch_sub_acquire(s64 i, atomic64_t *v) +{ + s64 ret = atomic64_fetch_sub_relaxed(i, v); + __atomic_acquire_fence(); + return ret; +} +#define atomic64_fetch_sub_acquire atomic64_fetch_sub_acquire +#endif + +#ifndef atomic64_fetch_sub_release +static inline s64 +atomic64_fetch_sub_release(s64 i, atomic64_t *v) +{ + __atomic_release_fence(); + return atomic64_fetch_sub_relaxed(i, v); +} +#define atomic64_fetch_sub_release atomic64_fetch_sub_release +#endif + +#ifndef atomic64_fetch_sub +static inline s64 +atomic64_fetch_sub(s64 i, atomic64_t *v) +{ + s64 ret; + __atomic_pre_full_fence(); + ret = atomic64_fetch_sub_relaxed(i, v); + __atomic_post_full_fence(); + return ret; +} +#define atomic64_fetch_sub atomic64_fetch_sub +#endif + +#endif /* atomic64_fetch_sub_relaxed */ + +#ifndef atomic64_inc +static inline void +atomic64_inc(atomic64_t *v) +{ + atomic64_add(1, v); +} +#define atomic64_inc atomic64_inc +#endif + +#ifndef atomic64_inc_return_relaxed +#ifdef atomic64_inc_return +#define atomic64_inc_return_acquire atomic64_inc_return +#define atomic64_inc_return_release atomic64_inc_return +#define atomic64_inc_return_relaxed atomic64_inc_return +#endif /* atomic64_inc_return */ + +#ifndef atomic64_inc_return +static inline s64 +atomic64_inc_return(atomic64_t *v) +{ + return atomic64_add_return(1, v); +} +#define atomic64_inc_return atomic64_inc_return +#endif + +#ifndef atomic64_inc_return_acquire +static inline s64 +atomic64_inc_return_acquire(atomic64_t *v) +{ + return atomic64_add_return_acquire(1, v); +} +#define atomic64_inc_return_acquire atomic64_inc_return_acquire +#endif + +#ifndef atomic64_inc_return_release +static inline s64 +atomic64_inc_return_release(atomic64_t *v) +{ + return atomic64_add_return_release(1, v); +} +#define atomic64_inc_return_release atomic64_inc_return_release +#endif + +#ifndef atomic64_inc_return_relaxed +static inline s64 +atomic64_inc_return_relaxed(atomic64_t *v) +{ + return atomic64_add_return_relaxed(1, v); +} +#define atomic64_inc_return_relaxed atomic64_inc_return_relaxed +#endif + +#else /* atomic64_inc_return_relaxed */ + +#ifndef atomic64_inc_return_acquire +static inline s64 +atomic64_inc_return_acquire(atomic64_t *v) +{ + s64 ret = atomic64_inc_return_relaxed(v); + __atomic_acquire_fence(); + return ret; +} +#define atomic64_inc_return_acquire atomic64_inc_return_acquire +#endif + +#ifndef atomic64_inc_return_release +static inline s64 +atomic64_inc_return_release(atomic64_t *v) +{ + __atomic_release_fence(); + return atomic64_inc_return_relaxed(v); +} +#define atomic64_inc_return_release atomic64_inc_return_release +#endif + +#ifndef atomic64_inc_return +static inline s64 +atomic64_inc_return(atomic64_t *v) +{ + s64 ret; + __atomic_pre_full_fence(); + ret = atomic64_inc_return_relaxed(v); + __atomic_post_full_fence(); + return ret; +} +#define atomic64_inc_return atomic64_inc_return +#endif + +#endif /* atomic64_inc_return_relaxed */ + +#ifndef atomic64_fetch_inc_relaxed +#ifdef atomic64_fetch_inc +#define atomic64_fetch_inc_acquire atomic64_fetch_inc +#define atomic64_fetch_inc_release atomic64_fetch_inc +#define atomic64_fetch_inc_relaxed atomic64_fetch_inc +#endif /* atomic64_fetch_inc */ + +#ifndef atomic64_fetch_inc +static inline s64 +atomic64_fetch_inc(atomic64_t *v) +{ + return atomic64_fetch_add(1, v); +} +#define atomic64_fetch_inc atomic64_fetch_inc +#endif + +#ifndef atomic64_fetch_inc_acquire +static inline s64 +atomic64_fetch_inc_acquire(atomic64_t *v) +{ + return atomic64_fetch_add_acquire(1, v); +} +#define atomic64_fetch_inc_acquire atomic64_fetch_inc_acquire +#endif + +#ifndef atomic64_fetch_inc_release +static inline s64 +atomic64_fetch_inc_release(atomic64_t *v) +{ + return atomic64_fetch_add_release(1, v); +} +#define atomic64_fetch_inc_release atomic64_fetch_inc_release +#endif + +#ifndef atomic64_fetch_inc_relaxed +static inline s64 +atomic64_fetch_inc_relaxed(atomic64_t *v) +{ + return atomic64_fetch_add_relaxed(1, v); +} +#define atomic64_fetch_inc_relaxed atomic64_fetch_inc_relaxed +#endif + +#else /* atomic64_fetch_inc_relaxed */ + +#ifndef atomic64_fetch_inc_acquire +static inline s64 +atomic64_fetch_inc_acquire(atomic64_t *v) +{ + s64 ret = atomic64_fetch_inc_relaxed(v); + __atomic_acquire_fence(); + return ret; +} +#define atomic64_fetch_inc_acquire atomic64_fetch_inc_acquire +#endif + +#ifndef atomic64_fetch_inc_release +static inline s64 +atomic64_fetch_inc_release(atomic64_t *v) +{ + __atomic_release_fence(); + return atomic64_fetch_inc_relaxed(v); +} +#define atomic64_fetch_inc_release atomic64_fetch_inc_release +#endif + +#ifndef atomic64_fetch_inc +static inline s64 +atomic64_fetch_inc(atomic64_t *v) +{ + s64 ret; + __atomic_pre_full_fence(); + ret = atomic64_fetch_inc_relaxed(v); + __atomic_post_full_fence(); + return ret; +} +#define atomic64_fetch_inc atomic64_fetch_inc +#endif + +#endif /* atomic64_fetch_inc_relaxed */ + +#ifndef atomic64_dec +static inline void +atomic64_dec(atomic64_t *v) +{ + atomic64_sub(1, v); +} +#define atomic64_dec atomic64_dec +#endif + +#ifndef atomic64_dec_return_relaxed +#ifdef atomic64_dec_return +#define atomic64_dec_return_acquire atomic64_dec_return +#define atomic64_dec_return_release atomic64_dec_return +#define atomic64_dec_return_relaxed atomic64_dec_return +#endif /* atomic64_dec_return */ + +#ifndef atomic64_dec_return +static inline s64 +atomic64_dec_return(atomic64_t *v) +{ + return atomic64_sub_return(1, v); +} +#define atomic64_dec_return atomic64_dec_return +#endif + +#ifndef atomic64_dec_return_acquire +static inline s64 +atomic64_dec_return_acquire(atomic64_t *v) +{ + return atomic64_sub_return_acquire(1, v); +} +#define atomic64_dec_return_acquire atomic64_dec_return_acquire +#endif + +#ifndef atomic64_dec_return_release +static inline s64 +atomic64_dec_return_release(atomic64_t *v) +{ + return atomic64_sub_return_release(1, v); +} +#define atomic64_dec_return_release atomic64_dec_return_release +#endif + +#ifndef atomic64_dec_return_relaxed +static inline s64 +atomic64_dec_return_relaxed(atomic64_t *v) +{ + return atomic64_sub_return_relaxed(1, v); +} +#define atomic64_dec_return_relaxed atomic64_dec_return_relaxed +#endif + +#else /* atomic64_dec_return_relaxed */ + +#ifndef atomic64_dec_return_acquire +static inline s64 +atomic64_dec_return_acquire(atomic64_t *v) +{ + s64 ret = atomic64_dec_return_relaxed(v); + __atomic_acquire_fence(); + return ret; +} +#define atomic64_dec_return_acquire atomic64_dec_return_acquire +#endif + +#ifndef atomic64_dec_return_release +static inline s64 +atomic64_dec_return_release(atomic64_t *v) +{ + __atomic_release_fence(); + return atomic64_dec_return_relaxed(v); +} +#define atomic64_dec_return_release atomic64_dec_return_release +#endif + +#ifndef atomic64_dec_return +static inline s64 +atomic64_dec_return(atomic64_t *v) +{ + s64 ret; + __atomic_pre_full_fence(); + ret = atomic64_dec_return_relaxed(v); + __atomic_post_full_fence(); + return ret; +} +#define atomic64_dec_return atomic64_dec_return +#endif + +#endif /* atomic64_dec_return_relaxed */ + +#ifndef atomic64_fetch_dec_relaxed +#ifdef atomic64_fetch_dec +#define atomic64_fetch_dec_acquire atomic64_fetch_dec +#define atomic64_fetch_dec_release atomic64_fetch_dec +#define atomic64_fetch_dec_relaxed atomic64_fetch_dec +#endif /* atomic64_fetch_dec */ + +#ifndef atomic64_fetch_dec +static inline s64 +atomic64_fetch_dec(atomic64_t *v) +{ + return atomic64_fetch_sub(1, v); +} +#define atomic64_fetch_dec atomic64_fetch_dec +#endif + +#ifndef atomic64_fetch_dec_acquire +static inline s64 +atomic64_fetch_dec_acquire(atomic64_t *v) +{ + return atomic64_fetch_sub_acquire(1, v); +} +#define atomic64_fetch_dec_acquire atomic64_fetch_dec_acquire +#endif + +#ifndef atomic64_fetch_dec_release +static inline s64 +atomic64_fetch_dec_release(atomic64_t *v) +{ + return atomic64_fetch_sub_release(1, v); +} +#define atomic64_fetch_dec_release atomic64_fetch_dec_release +#endif + +#ifndef atomic64_fetch_dec_relaxed +static inline s64 +atomic64_fetch_dec_relaxed(atomic64_t *v) +{ + return atomic64_fetch_sub_relaxed(1, v); +} +#define atomic64_fetch_dec_relaxed atomic64_fetch_dec_relaxed +#endif + +#else /* atomic64_fetch_dec_relaxed */ + +#ifndef atomic64_fetch_dec_acquire +static inline s64 +atomic64_fetch_dec_acquire(atomic64_t *v) +{ + s64 ret = atomic64_fetch_dec_relaxed(v); + __atomic_acquire_fence(); + return ret; +} +#define atomic64_fetch_dec_acquire atomic64_fetch_dec_acquire +#endif + +#ifndef atomic64_fetch_dec_release +static inline s64 +atomic64_fetch_dec_release(atomic64_t *v) +{ + __atomic_release_fence(); + return atomic64_fetch_dec_relaxed(v); +} +#define atomic64_fetch_dec_release atomic64_fetch_dec_release +#endif + +#ifndef atomic64_fetch_dec +static inline s64 +atomic64_fetch_dec(atomic64_t *v) +{ + s64 ret; + __atomic_pre_full_fence(); + ret = atomic64_fetch_dec_relaxed(v); + __atomic_post_full_fence(); + return ret; +} +#define atomic64_fetch_dec atomic64_fetch_dec +#endif + +#endif /* atomic64_fetch_dec_relaxed */ + +#ifndef atomic64_fetch_and_relaxed +#define atomic64_fetch_and_acquire atomic64_fetch_and +#define atomic64_fetch_and_release atomic64_fetch_and +#define atomic64_fetch_and_relaxed atomic64_fetch_and +#else /* atomic64_fetch_and_relaxed */ + +#ifndef atomic64_fetch_and_acquire +static inline s64 +atomic64_fetch_and_acquire(s64 i, atomic64_t *v) +{ + s64 ret = atomic64_fetch_and_relaxed(i, v); + __atomic_acquire_fence(); + return ret; +} +#define atomic64_fetch_and_acquire atomic64_fetch_and_acquire +#endif + +#ifndef atomic64_fetch_and_release +static inline s64 +atomic64_fetch_and_release(s64 i, atomic64_t *v) +{ + __atomic_release_fence(); + return atomic64_fetch_and_relaxed(i, v); +} +#define atomic64_fetch_and_release atomic64_fetch_and_release +#endif + +#ifndef atomic64_fetch_and +static inline s64 +atomic64_fetch_and(s64 i, atomic64_t *v) +{ + s64 ret; + __atomic_pre_full_fence(); + ret = atomic64_fetch_and_relaxed(i, v); + __atomic_post_full_fence(); + return ret; +} +#define atomic64_fetch_and atomic64_fetch_and +#endif + +#endif /* atomic64_fetch_and_relaxed */ + +#ifndef atomic64_andnot +static inline void +atomic64_andnot(s64 i, atomic64_t *v) +{ + atomic64_and(~i, v); +} +#define atomic64_andnot atomic64_andnot +#endif + +#ifndef atomic64_fetch_andnot_relaxed +#ifdef atomic64_fetch_andnot +#define atomic64_fetch_andnot_acquire atomic64_fetch_andnot +#define atomic64_fetch_andnot_release atomic64_fetch_andnot +#define atomic64_fetch_andnot_relaxed atomic64_fetch_andnot +#endif /* atomic64_fetch_andnot */ + +#ifndef atomic64_fetch_andnot +static inline s64 +atomic64_fetch_andnot(s64 i, atomic64_t *v) +{ + return atomic64_fetch_and(~i, v); +} +#define atomic64_fetch_andnot atomic64_fetch_andnot +#endif + +#ifndef atomic64_fetch_andnot_acquire +static inline s64 +atomic64_fetch_andnot_acquire(s64 i, atomic64_t *v) +{ + return atomic64_fetch_and_acquire(~i, v); +} +#define atomic64_fetch_andnot_acquire atomic64_fetch_andnot_acquire +#endif + +#ifndef atomic64_fetch_andnot_release +static inline s64 +atomic64_fetch_andnot_release(s64 i, atomic64_t *v) +{ + return atomic64_fetch_and_release(~i, v); +} +#define atomic64_fetch_andnot_release atomic64_fetch_andnot_release +#endif + +#ifndef atomic64_fetch_andnot_relaxed +static inline s64 +atomic64_fetch_andnot_relaxed(s64 i, atomic64_t *v) +{ + return atomic64_fetch_and_relaxed(~i, v); +} +#define atomic64_fetch_andnot_relaxed atomic64_fetch_andnot_relaxed +#endif + +#else /* atomic64_fetch_andnot_relaxed */ + +#ifndef atomic64_fetch_andnot_acquire +static inline s64 +atomic64_fetch_andnot_acquire(s64 i, atomic64_t *v) +{ + s64 ret = atomic64_fetch_andnot_relaxed(i, v); + __atomic_acquire_fence(); + return ret; +} +#define atomic64_fetch_andnot_acquire atomic64_fetch_andnot_acquire +#endif + +#ifndef atomic64_fetch_andnot_release +static inline s64 +atomic64_fetch_andnot_release(s64 i, atomic64_t *v) +{ + __atomic_release_fence(); + return atomic64_fetch_andnot_relaxed(i, v); +} +#define atomic64_fetch_andnot_release atomic64_fetch_andnot_release +#endif + +#ifndef atomic64_fetch_andnot +static inline s64 +atomic64_fetch_andnot(s64 i, atomic64_t *v) +{ + s64 ret; + __atomic_pre_full_fence(); + ret = atomic64_fetch_andnot_relaxed(i, v); + __atomic_post_full_fence(); + return ret; +} +#define atomic64_fetch_andnot atomic64_fetch_andnot +#endif + +#endif /* atomic64_fetch_andnot_relaxed */ + +#ifndef atomic64_fetch_or_relaxed +#define atomic64_fetch_or_acquire atomic64_fetch_or +#define atomic64_fetch_or_release atomic64_fetch_or +#define atomic64_fetch_or_relaxed atomic64_fetch_or +#else /* atomic64_fetch_or_relaxed */ + +#ifndef atomic64_fetch_or_acquire +static inline s64 +atomic64_fetch_or_acquire(s64 i, atomic64_t *v) +{ + s64 ret = atomic64_fetch_or_relaxed(i, v); + __atomic_acquire_fence(); + return ret; +} +#define atomic64_fetch_or_acquire atomic64_fetch_or_acquire +#endif + +#ifndef atomic64_fetch_or_release +static inline s64 +atomic64_fetch_or_release(s64 i, atomic64_t *v) +{ + __atomic_release_fence(); + return atomic64_fetch_or_relaxed(i, v); +} +#define atomic64_fetch_or_release atomic64_fetch_or_release +#endif + +#ifndef atomic64_fetch_or +static inline s64 +atomic64_fetch_or(s64 i, atomic64_t *v) +{ + s64 ret; + __atomic_pre_full_fence(); + ret = atomic64_fetch_or_relaxed(i, v); + __atomic_post_full_fence(); + return ret; +} +#define atomic64_fetch_or atomic64_fetch_or +#endif + +#endif /* atomic64_fetch_or_relaxed */ + +#ifndef atomic64_fetch_xor_relaxed +#define atomic64_fetch_xor_acquire atomic64_fetch_xor +#define atomic64_fetch_xor_release atomic64_fetch_xor +#define atomic64_fetch_xor_relaxed atomic64_fetch_xor +#else /* atomic64_fetch_xor_relaxed */ + +#ifndef atomic64_fetch_xor_acquire +static inline s64 +atomic64_fetch_xor_acquire(s64 i, atomic64_t *v) +{ + s64 ret = atomic64_fetch_xor_relaxed(i, v); + __atomic_acquire_fence(); + return ret; +} +#define atomic64_fetch_xor_acquire atomic64_fetch_xor_acquire +#endif + +#ifndef atomic64_fetch_xor_release +static inline s64 +atomic64_fetch_xor_release(s64 i, atomic64_t *v) +{ + __atomic_release_fence(); + return atomic64_fetch_xor_relaxed(i, v); +} +#define atomic64_fetch_xor_release atomic64_fetch_xor_release +#endif + +#ifndef atomic64_fetch_xor +static inline s64 +atomic64_fetch_xor(s64 i, atomic64_t *v) +{ + s64 ret; + __atomic_pre_full_fence(); + ret = atomic64_fetch_xor_relaxed(i, v); + __atomic_post_full_fence(); + return ret; +} +#define atomic64_fetch_xor atomic64_fetch_xor +#endif + +#endif /* atomic64_fetch_xor_relaxed */ + +#ifndef atomic64_xchg_relaxed +#define atomic64_xchg_acquire atomic64_xchg +#define atomic64_xchg_release atomic64_xchg +#define atomic64_xchg_relaxed atomic64_xchg +#else /* atomic64_xchg_relaxed */ + +#ifndef atomic64_xchg_acquire +static inline s64 +atomic64_xchg_acquire(atomic64_t *v, s64 i) +{ + s64 ret = atomic64_xchg_relaxed(v, i); + __atomic_acquire_fence(); + return ret; +} +#define atomic64_xchg_acquire atomic64_xchg_acquire +#endif + +#ifndef atomic64_xchg_release +static inline s64 +atomic64_xchg_release(atomic64_t *v, s64 i) +{ + __atomic_release_fence(); + return atomic64_xchg_relaxed(v, i); +} +#define atomic64_xchg_release atomic64_xchg_release +#endif + +#ifndef atomic64_xchg +static inline s64 +atomic64_xchg(atomic64_t *v, s64 i) +{ + s64 ret; + __atomic_pre_full_fence(); + ret = atomic64_xchg_relaxed(v, i); + __atomic_post_full_fence(); + return ret; +} +#define atomic64_xchg atomic64_xchg +#endif + +#endif /* atomic64_xchg_relaxed */ + +#ifndef atomic64_cmpxchg_relaxed +#define atomic64_cmpxchg_acquire atomic64_cmpxchg +#define atomic64_cmpxchg_release atomic64_cmpxchg +#define atomic64_cmpxchg_relaxed atomic64_cmpxchg +#else /* atomic64_cmpxchg_relaxed */ + +#ifndef atomic64_cmpxchg_acquire +static inline s64 +atomic64_cmpxchg_acquire(atomic64_t *v, s64 old, s64 new) +{ + s64 ret = atomic64_cmpxchg_relaxed(v, old, new); + __atomic_acquire_fence(); + return ret; +} +#define atomic64_cmpxchg_acquire atomic64_cmpxchg_acquire +#endif + +#ifndef atomic64_cmpxchg_release +static inline s64 +atomic64_cmpxchg_release(atomic64_t *v, s64 old, s64 new) +{ + __atomic_release_fence(); + return atomic64_cmpxchg_relaxed(v, old, new); +} +#define atomic64_cmpxchg_release atomic64_cmpxchg_release +#endif + +#ifndef atomic64_cmpxchg +static inline s64 +atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new) +{ + s64 ret; + __atomic_pre_full_fence(); + ret = atomic64_cmpxchg_relaxed(v, old, new); + __atomic_post_full_fence(); + return ret; +} +#define atomic64_cmpxchg atomic64_cmpxchg +#endif + +#endif /* atomic64_cmpxchg_relaxed */ + +#ifndef atomic64_try_cmpxchg_relaxed +#ifdef atomic64_try_cmpxchg +#define atomic64_try_cmpxchg_acquire atomic64_try_cmpxchg +#define atomic64_try_cmpxchg_release atomic64_try_cmpxchg +#define atomic64_try_cmpxchg_relaxed atomic64_try_cmpxchg +#endif /* atomic64_try_cmpxchg */ + +#ifndef atomic64_try_cmpxchg +static inline bool +atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new) +{ + s64 r, o = *old; + r = atomic64_cmpxchg(v, o, new); + if (unlikely(r != o)) + *old = r; + return likely(r == o); +} +#define atomic64_try_cmpxchg atomic64_try_cmpxchg +#endif + +#ifndef atomic64_try_cmpxchg_acquire +static inline bool +atomic64_try_cmpxchg_acquire(atomic64_t *v, s64 *old, s64 new) +{ + s64 r, o = *old; + r = atomic64_cmpxchg_acquire(v, o, new); + if (unlikely(r != o)) + *old = r; + return likely(r == o); +} +#define atomic64_try_cmpxchg_acquire atomic64_try_cmpxchg_acquire +#endif + +#ifndef atomic64_try_cmpxchg_release +static inline bool +atomic64_try_cmpxchg_release(atomic64_t *v, s64 *old, s64 new) +{ + s64 r, o = *old; + r = atomic64_cmpxchg_release(v, o, new); + if (unlikely(r != o)) + *old = r; + return likely(r == o); +} +#define atomic64_try_cmpxchg_release atomic64_try_cmpxchg_release +#endif + +#ifndef atomic64_try_cmpxchg_relaxed +static inline bool +atomic64_try_cmpxchg_relaxed(atomic64_t *v, s64 *old, s64 new) +{ + s64 r, o = *old; + r = atomic64_cmpxchg_relaxed(v, o, new); + if (unlikely(r != o)) + *old = r; + return likely(r == o); +} +#define atomic64_try_cmpxchg_relaxed atomic64_try_cmpxchg_relaxed +#endif + +#else /* atomic64_try_cmpxchg_relaxed */ + +#ifndef atomic64_try_cmpxchg_acquire +static inline bool +atomic64_try_cmpxchg_acquire(atomic64_t *v, s64 *old, s64 new) +{ + bool ret = atomic64_try_cmpxchg_relaxed(v, old, new); + __atomic_acquire_fence(); + return ret; +} +#define atomic64_try_cmpxchg_acquire atomic64_try_cmpxchg_acquire +#endif + +#ifndef atomic64_try_cmpxchg_release +static inline bool +atomic64_try_cmpxchg_release(atomic64_t *v, s64 *old, s64 new) +{ + __atomic_release_fence(); + return atomic64_try_cmpxchg_relaxed(v, old, new); +} +#define atomic64_try_cmpxchg_release atomic64_try_cmpxchg_release +#endif + +#ifndef atomic64_try_cmpxchg +static inline bool +atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new) +{ + bool ret; + __atomic_pre_full_fence(); + ret = atomic64_try_cmpxchg_relaxed(v, old, new); + __atomic_post_full_fence(); + return ret; +} +#define atomic64_try_cmpxchg atomic64_try_cmpxchg +#endif + +#endif /* atomic64_try_cmpxchg_relaxed */ + +#ifndef atomic64_sub_and_test +/** + * atomic64_sub_and_test - subtract value from variable and test result + * @i: integer value to subtract + * @v: pointer of type atomic64_t + * + * Atomically subtracts @i from @v and returns + * true if the result is zero, or false for all + * other cases. + */ +static inline bool +atomic64_sub_and_test(s64 i, atomic64_t *v) +{ + return atomic64_sub_return(i, v) == 0; +} +#define atomic64_sub_and_test atomic64_sub_and_test +#endif + +#ifndef atomic64_dec_and_test +/** + * atomic64_dec_and_test - decrement and test + * @v: pointer of type atomic64_t + * + * Atomically decrements @v by 1 and + * returns true if the result is 0, or false for all other + * cases. + */ +static inline bool +atomic64_dec_and_test(atomic64_t *v) +{ + return atomic64_dec_return(v) == 0; +} +#define atomic64_dec_and_test atomic64_dec_and_test +#endif + +#ifndef atomic64_inc_and_test +/** + * atomic64_inc_and_test - increment and test + * @v: pointer of type atomic64_t + * + * Atomically increments @v by 1 + * and returns true if the result is zero, or false for all + * other cases. + */ +static inline bool +atomic64_inc_and_test(atomic64_t *v) +{ + return atomic64_inc_return(v) == 0; +} +#define atomic64_inc_and_test atomic64_inc_and_test +#endif + +#ifndef atomic64_add_negative +/** + * atomic64_add_negative - add and test if negative + * @i: integer value to add + * @v: pointer of type atomic64_t + * + * Atomically adds @i to @v and returns true + * if the result is negative, or false when + * result is greater than or equal to zero. + */ +static inline bool +atomic64_add_negative(s64 i, atomic64_t *v) +{ + return atomic64_add_return(i, v) < 0; +} +#define atomic64_add_negative atomic64_add_negative +#endif + +#ifndef atomic64_fetch_add_unless +/** + * atomic64_fetch_add_unless - add unless the number is already a given value + * @v: pointer of type atomic64_t + * @a: the amount to add to v... + * @u: ...unless v is equal to u. + * + * Atomically adds @a to @v, so long as @v was not already @u. + * Returns original value of @v + */ +static inline s64 +atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u) +{ + s64 c = atomic64_read(v); + + do { + if (unlikely(c == u)) + break; + } while (!atomic64_try_cmpxchg(v, &c, c + a)); + + return c; +} +#define atomic64_fetch_add_unless atomic64_fetch_add_unless +#endif + +#ifndef atomic64_add_unless +/** + * atomic64_add_unless - add unless the number is already a given value + * @v: pointer of type atomic64_t + * @a: the amount to add to v... + * @u: ...unless v is equal to u. + * + * Atomically adds @a to @v, if @v was not already @u. + * Returns true if the addition was done. + */ +static inline bool +atomic64_add_unless(atomic64_t *v, s64 a, s64 u) +{ + return atomic64_fetch_add_unless(v, a, u) != u; +} +#define atomic64_add_unless atomic64_add_unless +#endif + +#ifndef atomic64_inc_not_zero +/** + * atomic64_inc_not_zero - increment unless the number is zero + * @v: pointer of type atomic64_t + * + * Atomically increments @v by 1, if @v is non-zero. + * Returns true if the increment was done. + */ +static inline bool +atomic64_inc_not_zero(atomic64_t *v) +{ + return atomic64_add_unless(v, 1, 0); +} +#define atomic64_inc_not_zero atomic64_inc_not_zero +#endif + +#ifndef atomic64_inc_unless_negative +static inline bool +atomic64_inc_unless_negative(atomic64_t *v) +{ + s64 c = atomic64_read(v); + + do { + if (unlikely(c < 0)) + return false; + } while (!atomic64_try_cmpxchg(v, &c, c + 1)); + + return true; +} +#define atomic64_inc_unless_negative atomic64_inc_unless_negative +#endif + +#ifndef atomic64_dec_unless_positive +static inline bool +atomic64_dec_unless_positive(atomic64_t *v) +{ + s64 c = atomic64_read(v); + + do { + if (unlikely(c > 0)) + return false; + } while (!atomic64_try_cmpxchg(v, &c, c - 1)); + + return true; +} +#define atomic64_dec_unless_positive atomic64_dec_unless_positive +#endif + +#ifndef atomic64_dec_if_positive +static inline s64 +atomic64_dec_if_positive(atomic64_t *v) +{ + s64 dec, c = atomic64_read(v); + + do { + dec = c - 1; + if (unlikely(dec < 0)) + break; + } while (!atomic64_try_cmpxchg(v, &c, dec)); + + return dec; +} +#define atomic64_dec_if_positive atomic64_dec_if_positive +#endif + +#define atomic64_cond_read_acquire(v, c) smp_cond_load_acquire(&(v)->counter, (c)) +#define atomic64_cond_read_relaxed(v, c) smp_cond_load_relaxed(&(v)->counter, (c)) + +#endif /* _LINUX_ATOMIC_FALLBACK_H */ diff --git a/include/linux/atomic.h b/include/linux/atomic.h index 1e8e88bdaf09..4c0d009a46f0 100644 --- a/include/linux/atomic.h +++ b/include/linux/atomic.h @@ -25,14 +25,6 @@ * See Documentation/memory-barriers.txt for ACQUIRE/RELEASE definitions. */ -#ifndef atomic_read_acquire -#define atomic_read_acquire(v) smp_load_acquire(&(v)->counter) -#endif - -#ifndef atomic_set_release -#define atomic_set_release(v, i) smp_store_release(&(v)->counter, (i)) -#endif - /* * The idea here is to build acquire/release variants by adding explicit * barriers on top of the relaxed variant. In the case where the relaxed @@ -79,1238 +71,7 @@ __ret; \ }) -/* atomic_add_return_relaxed */ -#ifndef atomic_add_return_relaxed -#define atomic_add_return_relaxed atomic_add_return -#define atomic_add_return_acquire atomic_add_return -#define atomic_add_return_release atomic_add_return - -#else /* atomic_add_return_relaxed */ - -#ifndef atomic_add_return_acquire -#define atomic_add_return_acquire(...) \ - __atomic_op_acquire(atomic_add_return, __VA_ARGS__) -#endif - -#ifndef atomic_add_return_release -#define atomic_add_return_release(...) \ - __atomic_op_release(atomic_add_return, __VA_ARGS__) -#endif - -#ifndef atomic_add_return -#define atomic_add_return(...) \ - __atomic_op_fence(atomic_add_return, __VA_ARGS__) -#endif -#endif /* atomic_add_return_relaxed */ - -#ifndef atomic_inc -#define atomic_inc(v) atomic_add(1, (v)) -#endif - -/* atomic_inc_return_relaxed */ -#ifndef atomic_inc_return_relaxed - -#ifndef atomic_inc_return -#define atomic_inc_return(v) atomic_add_return(1, (v)) -#define atomic_inc_return_relaxed(v) atomic_add_return_relaxed(1, (v)) -#define atomic_inc_return_acquire(v) atomic_add_return_acquire(1, (v)) -#define atomic_inc_return_release(v) atomic_add_return_release(1, (v)) -#else /* atomic_inc_return */ -#define atomic_inc_return_relaxed atomic_inc_return -#define atomic_inc_return_acquire atomic_inc_return -#define atomic_inc_return_release atomic_inc_return -#endif /* atomic_inc_return */ - -#else /* atomic_inc_return_relaxed */ - -#ifndef atomic_inc_return_acquire -#define atomic_inc_return_acquire(...) \ - __atomic_op_acquire(atomic_inc_return, __VA_ARGS__) -#endif - -#ifndef atomic_inc_return_release -#define atomic_inc_return_release(...) \ - __atomic_op_release(atomic_inc_return, __VA_ARGS__) -#endif - -#ifndef atomic_inc_return -#define atomic_inc_return(...) \ - __atomic_op_fence(atomic_inc_return, __VA_ARGS__) -#endif -#endif /* atomic_inc_return_relaxed */ - -/* atomic_sub_return_relaxed */ -#ifndef atomic_sub_return_relaxed -#define atomic_sub_return_relaxed atomic_sub_return -#define atomic_sub_return_acquire atomic_sub_return -#define atomic_sub_return_release atomic_sub_return - -#else /* atomic_sub_return_relaxed */ - -#ifndef atomic_sub_return_acquire -#define atomic_sub_return_acquire(...) \ - __atomic_op_acquire(atomic_sub_return, __VA_ARGS__) -#endif - -#ifndef atomic_sub_return_release -#define atomic_sub_return_release(...) \ - __atomic_op_release(atomic_sub_return, __VA_ARGS__) -#endif - -#ifndef atomic_sub_return -#define atomic_sub_return(...) \ - __atomic_op_fence(atomic_sub_return, __VA_ARGS__) -#endif -#endif /* atomic_sub_return_relaxed */ - -#ifndef atomic_dec -#define atomic_dec(v) atomic_sub(1, (v)) -#endif - -/* atomic_dec_return_relaxed */ -#ifndef atomic_dec_return_relaxed - -#ifndef atomic_dec_return -#define atomic_dec_return(v) atomic_sub_return(1, (v)) -#define atomic_dec_return_relaxed(v) atomic_sub_return_relaxed(1, (v)) -#define atomic_dec_return_acquire(v) atomic_sub_return_acquire(1, (v)) -#define atomic_dec_return_release(v) atomic_sub_return_release(1, (v)) -#else /* atomic_dec_return */ -#define atomic_dec_return_relaxed atomic_dec_return -#define atomic_dec_return_acquire atomic_dec_return -#define atomic_dec_return_release atomic_dec_return -#endif /* atomic_dec_return */ - -#else /* atomic_dec_return_relaxed */ - -#ifndef atomic_dec_return_acquire -#define atomic_dec_return_acquire(...) \ - __atomic_op_acquire(atomic_dec_return, __VA_ARGS__) -#endif - -#ifndef atomic_dec_return_release -#define atomic_dec_return_release(...) \ - __atomic_op_release(atomic_dec_return, __VA_ARGS__) -#endif - -#ifndef atomic_dec_return -#define atomic_dec_return(...) \ - __atomic_op_fence(atomic_dec_return, __VA_ARGS__) -#endif -#endif /* atomic_dec_return_relaxed */ - - -/* atomic_fetch_add_relaxed */ -#ifndef atomic_fetch_add_relaxed -#define atomic_fetch_add_relaxed atomic_fetch_add -#define atomic_fetch_add_acquire atomic_fetch_add -#define atomic_fetch_add_release atomic_fetch_add - -#else /* atomic_fetch_add_relaxed */ - -#ifndef atomic_fetch_add_acquire -#define atomic_fetch_add_acquire(...) \ - __atomic_op_acquire(atomic_fetch_add, __VA_ARGS__) -#endif - -#ifndef atomic_fetch_add_release -#define atomic_fetch_add_release(...) \ - __atomic_op_release(atomic_fetch_add, __VA_ARGS__) -#endif - -#ifndef atomic_fetch_add -#define atomic_fetch_add(...) \ - __atomic_op_fence(atomic_fetch_add, __VA_ARGS__) -#endif -#endif /* atomic_fetch_add_relaxed */ - -/* atomic_fetch_inc_relaxed */ -#ifndef atomic_fetch_inc_relaxed - -#ifndef atomic_fetch_inc -#define atomic_fetch_inc(v) atomic_fetch_add(1, (v)) -#define atomic_fetch_inc_relaxed(v) atomic_fetch_add_relaxed(1, (v)) -#define atomic_fetch_inc_acquire(v) atomic_fetch_add_acquire(1, (v)) -#define atomic_fetch_inc_release(v) atomic_fetch_add_release(1, (v)) -#else /* atomic_fetch_inc */ -#define atomic_fetch_inc_relaxed atomic_fetch_inc -#define atomic_fetch_inc_acquire atomic_fetch_inc -#define atomic_fetch_inc_release atomic_fetch_inc -#endif /* atomic_fetch_inc */ - -#else /* atomic_fetch_inc_relaxed */ - -#ifndef atomic_fetch_inc_acquire -#define atomic_fetch_inc_acquire(...) \ - __atomic_op_acquire(atomic_fetch_inc, __VA_ARGS__) -#endif - -#ifndef atomic_fetch_inc_release -#define atomic_fetch_inc_release(...) \ - __atomic_op_release(atomic_fetch_inc, __VA_ARGS__) -#endif - -#ifndef atomic_fetch_inc -#define atomic_fetch_inc(...) \ - __atomic_op_fence(atomic_fetch_inc, __VA_ARGS__) -#endif -#endif /* atomic_fetch_inc_relaxed */ - -/* atomic_fetch_sub_relaxed */ -#ifndef atomic_fetch_sub_relaxed -#define atomic_fetch_sub_relaxed atomic_fetch_sub -#define atomic_fetch_sub_acquire atomic_fetch_sub -#define atomic_fetch_sub_release atomic_fetch_sub - -#else /* atomic_fetch_sub_relaxed */ - -#ifndef atomic_fetch_sub_acquire -#define atomic_fetch_sub_acquire(...) \ - __atomic_op_acquire(atomic_fetch_sub, __VA_ARGS__) -#endif - -#ifndef atomic_fetch_sub_release -#define atomic_fetch_sub_release(...) \ - __atomic_op_release(atomic_fetch_sub, __VA_ARGS__) -#endif - -#ifndef atomic_fetch_sub -#define atomic_fetch_sub(...) \ - __atomic_op_fence(atomic_fetch_sub, __VA_ARGS__) -#endif -#endif /* atomic_fetch_sub_relaxed */ - -/* atomic_fetch_dec_relaxed */ -#ifndef atomic_fetch_dec_relaxed - -#ifndef atomic_fetch_dec -#define atomic_fetch_dec(v) atomic_fetch_sub(1, (v)) -#define atomic_fetch_dec_relaxed(v) atomic_fetch_sub_relaxed(1, (v)) -#define atomic_fetch_dec_acquire(v) atomic_fetch_sub_acquire(1, (v)) -#define atomic_fetch_dec_release(v) atomic_fetch_sub_release(1, (v)) -#else /* atomic_fetch_dec */ -#define atomic_fetch_dec_relaxed atomic_fetch_dec -#define atomic_fetch_dec_acquire atomic_fetch_dec -#define atomic_fetch_dec_release atomic_fetch_dec -#endif /* atomic_fetch_dec */ - -#else /* atomic_fetch_dec_relaxed */ - -#ifndef atomic_fetch_dec_acquire -#define atomic_fetch_dec_acquire(...) \ - __atomic_op_acquire(atomic_fetch_dec, __VA_ARGS__) -#endif - -#ifndef atomic_fetch_dec_release -#define atomic_fetch_dec_release(...) \ - __atomic_op_release(atomic_fetch_dec, __VA_ARGS__) -#endif - -#ifndef atomic_fetch_dec -#define atomic_fetch_dec(...) \ - __atomic_op_fence(atomic_fetch_dec, __VA_ARGS__) -#endif -#endif /* atomic_fetch_dec_relaxed */ - -/* atomic_fetch_or_relaxed */ -#ifndef atomic_fetch_or_relaxed -#define atomic_fetch_or_relaxed atomic_fetch_or -#define atomic_fetch_or_acquire atomic_fetch_or -#define atomic_fetch_or_release atomic_fetch_or - -#else /* atomic_fetch_or_relaxed */ - -#ifndef atomic_fetch_or_acquire -#define atomic_fetch_or_acquire(...) \ - __atomic_op_acquire(atomic_fetch_or, __VA_ARGS__) -#endif - -#ifndef atomic_fetch_or_release -#define atomic_fetch_or_release(...) \ - __atomic_op_release(atomic_fetch_or, __VA_ARGS__) -#endif - -#ifndef atomic_fetch_or -#define atomic_fetch_or(...) \ - __atomic_op_fence(atomic_fetch_or, __VA_ARGS__) -#endif -#endif /* atomic_fetch_or_relaxed */ - -/* atomic_fetch_and_relaxed */ -#ifndef atomic_fetch_and_relaxed -#define atomic_fetch_and_relaxed atomic_fetch_and -#define atomic_fetch_and_acquire atomic_fetch_and -#define atomic_fetch_and_release atomic_fetch_and - -#else /* atomic_fetch_and_relaxed */ - -#ifndef atomic_fetch_and_acquire -#define atomic_fetch_and_acquire(...) \ - __atomic_op_acquire(atomic_fetch_and, __VA_ARGS__) -#endif - -#ifndef atomic_fetch_and_release -#define atomic_fetch_and_release(...) \ - __atomic_op_release(atomic_fetch_and, __VA_ARGS__) -#endif - -#ifndef atomic_fetch_and -#define atomic_fetch_and(...) \ - __atomic_op_fence(atomic_fetch_and, __VA_ARGS__) -#endif -#endif /* atomic_fetch_and_relaxed */ - -#ifndef atomic_andnot -#define atomic_andnot(i, v) atomic_and(~(int)(i), (v)) -#endif - -#ifndef atomic_fetch_andnot_relaxed - -#ifndef atomic_fetch_andnot -#define atomic_fetch_andnot(i, v) atomic_fetch_and(~(int)(i), (v)) -#define atomic_fetch_andnot_relaxed(i, v) atomic_fetch_and_relaxed(~(int)(i), (v)) -#define atomic_fetch_andnot_acquire(i, v) atomic_fetch_and_acquire(~(int)(i), (v)) -#define atomic_fetch_andnot_release(i, v) atomic_fetch_and_release(~(int)(i), (v)) -#else /* atomic_fetch_andnot */ -#define atomic_fetch_andnot_relaxed atomic_fetch_andnot -#define atomic_fetch_andnot_acquire atomic_fetch_andnot -#define atomic_fetch_andnot_release atomic_fetch_andnot -#endif /* atomic_fetch_andnot */ - -#else /* atomic_fetch_andnot_relaxed */ - -#ifndef atomic_fetch_andnot_acquire -#define atomic_fetch_andnot_acquire(...) \ - __atomic_op_acquire(atomic_fetch_andnot, __VA_ARGS__) -#endif - -#ifndef atomic_fetch_andnot_release -#define atomic_fetch_andnot_release(...) \ - __atomic_op_release(atomic_fetch_andnot, __VA_ARGS__) -#endif - -#ifndef atomic_fetch_andnot -#define atomic_fetch_andnot(...) \ - __atomic_op_fence(atomic_fetch_andnot, __VA_ARGS__) -#endif -#endif /* atomic_fetch_andnot_relaxed */ - -/* atomic_fetch_xor_relaxed */ -#ifndef atomic_fetch_xor_relaxed -#define atomic_fetch_xor_relaxed atomic_fetch_xor -#define atomic_fetch_xor_acquire atomic_fetch_xor -#define atomic_fetch_xor_release atomic_fetch_xor - -#else /* atomic_fetch_xor_relaxed */ - -#ifndef atomic_fetch_xor_acquire -#define atomic_fetch_xor_acquire(...) \ - __atomic_op_acquire(atomic_fetch_xor, __VA_ARGS__) -#endif - -#ifndef atomic_fetch_xor_release -#define atomic_fetch_xor_release(...) \ - __atomic_op_release(atomic_fetch_xor, __VA_ARGS__) -#endif - -#ifndef atomic_fetch_xor -#define atomic_fetch_xor(...) \ - __atomic_op_fence(atomic_fetch_xor, __VA_ARGS__) -#endif -#endif /* atomic_fetch_xor_relaxed */ - - -/* atomic_xchg_relaxed */ -#ifndef atomic_xchg_relaxed -#define atomic_xchg_relaxed atomic_xchg -#define atomic_xchg_acquire atomic_xchg -#define atomic_xchg_release atomic_xchg - -#else /* atomic_xchg_relaxed */ - -#ifndef atomic_xchg_acquire -#define atomic_xchg_acquire(...) \ - __atomic_op_acquire(atomic_xchg, __VA_ARGS__) -#endif - -#ifndef atomic_xchg_release -#define atomic_xchg_release(...) \ - __atomic_op_release(atomic_xchg, __VA_ARGS__) -#endif - -#ifndef atomic_xchg -#define atomic_xchg(...) \ - __atomic_op_fence(atomic_xchg, __VA_ARGS__) -#endif -#endif /* atomic_xchg_relaxed */ - -/* atomic_cmpxchg_relaxed */ -#ifndef atomic_cmpxchg_relaxed -#define atomic_cmpxchg_relaxed atomic_cmpxchg -#define atomic_cmpxchg_acquire atomic_cmpxchg -#define atomic_cmpxchg_release atomic_cmpxchg - -#else /* atomic_cmpxchg_relaxed */ - -#ifndef atomic_cmpxchg_acquire -#define atomic_cmpxchg_acquire(...) \ - __atomic_op_acquire(atomic_cmpxchg, __VA_ARGS__) -#endif - -#ifndef atomic_cmpxchg_release -#define atomic_cmpxchg_release(...) \ - __atomic_op_release(atomic_cmpxchg, __VA_ARGS__) -#endif - -#ifndef atomic_cmpxchg -#define atomic_cmpxchg(...) \ - __atomic_op_fence(atomic_cmpxchg, __VA_ARGS__) -#endif -#endif /* atomic_cmpxchg_relaxed */ - -#ifndef atomic_try_cmpxchg - -#define __atomic_try_cmpxchg(type, _p, _po, _n) \ -({ \ - typeof(_po) __po = (_po); \ - typeof(*(_po)) __r, __o = *__po; \ - __r = atomic_cmpxchg##type((_p), __o, (_n)); \ - if (unlikely(__r != __o)) \ - *__po = __r; \ - likely(__r == __o); \ -}) - -#define atomic_try_cmpxchg(_p, _po, _n) __atomic_try_cmpxchg(, _p, _po, _n) -#define atomic_try_cmpxchg_relaxed(_p, _po, _n) __atomic_try_cmpxchg(_relaxed, _p, _po, _n) -#define atomic_try_cmpxchg_acquire(_p, _po, _n) __atomic_try_cmpxchg(_acquire, _p, _po, _n) -#define atomic_try_cmpxchg_release(_p, _po, _n) __atomic_try_cmpxchg(_release, _p, _po, _n) - -#else /* atomic_try_cmpxchg */ -#define atomic_try_cmpxchg_relaxed atomic_try_cmpxchg -#define atomic_try_cmpxchg_acquire atomic_try_cmpxchg -#define atomic_try_cmpxchg_release atomic_try_cmpxchg -#endif /* atomic_try_cmpxchg */ - -/* cmpxchg_relaxed */ -#ifndef cmpxchg_relaxed -#define cmpxchg_relaxed cmpxchg -#define cmpxchg_acquire cmpxchg -#define cmpxchg_release cmpxchg - -#else /* cmpxchg_relaxed */ - -#ifndef cmpxchg_acquire -#define cmpxchg_acquire(...) \ - __atomic_op_acquire(cmpxchg, __VA_ARGS__) -#endif - -#ifndef cmpxchg_release -#define cmpxchg_release(...) \ - __atomic_op_release(cmpxchg, __VA_ARGS__) -#endif - -#ifndef cmpxchg -#define cmpxchg(...) \ - __atomic_op_fence(cmpxchg, __VA_ARGS__) -#endif -#endif /* cmpxchg_relaxed */ - -/* cmpxchg64_relaxed */ -#ifndef cmpxchg64_relaxed -#define cmpxchg64_relaxed cmpxchg64 -#define cmpxchg64_acquire cmpxchg64 -#define cmpxchg64_release cmpxchg64 - -#else /* cmpxchg64_relaxed */ - -#ifndef cmpxchg64_acquire -#define cmpxchg64_acquire(...) \ - __atomic_op_acquire(cmpxchg64, __VA_ARGS__) -#endif - -#ifndef cmpxchg64_release -#define cmpxchg64_release(...) \ - __atomic_op_release(cmpxchg64, __VA_ARGS__) -#endif - -#ifndef cmpxchg64 -#define cmpxchg64(...) \ - __atomic_op_fence(cmpxchg64, __VA_ARGS__) -#endif -#endif /* cmpxchg64_relaxed */ - -/* xchg_relaxed */ -#ifndef xchg_relaxed -#define xchg_relaxed xchg -#define xchg_acquire xchg -#define xchg_release xchg - -#else /* xchg_relaxed */ - -#ifndef xchg_acquire -#define xchg_acquire(...) __atomic_op_acquire(xchg, __VA_ARGS__) -#endif - -#ifndef xchg_release -#define xchg_release(...) __atomic_op_release(xchg, __VA_ARGS__) -#endif - -#ifndef xchg -#define xchg(...) __atomic_op_fence(xchg, __VA_ARGS__) -#endif -#endif /* xchg_relaxed */ - -/** - * atomic_fetch_add_unless - add unless the number is already a given value - * @v: pointer of type atomic_t - * @a: the amount to add to v... - * @u: ...unless v is equal to u. - * - * Atomically adds @a to @v, if @v was not already @u. - * Returns the original value of @v. - */ -#ifndef atomic_fetch_add_unless -static inline int atomic_fetch_add_unless(atomic_t *v, int a, int u) -{ - int c = atomic_read(v); - - do { - if (unlikely(c == u)) - break; - } while (!atomic_try_cmpxchg(v, &c, c + a)); - - return c; -} -#endif - -/** - * atomic_add_unless - add unless the number is already a given value - * @v: pointer of type atomic_t - * @a: the amount to add to v... - * @u: ...unless v is equal to u. - * - * Atomically adds @a to @v, if @v was not already @u. - * Returns true if the addition was done. - */ -static inline bool atomic_add_unless(atomic_t *v, int a, int u) -{ - return atomic_fetch_add_unless(v, a, u) != u; -} - -/** - * atomic_inc_not_zero - increment unless the number is zero - * @v: pointer of type atomic_t - * - * Atomically increments @v by 1, if @v is non-zero. - * Returns true if the increment was done. - */ -#ifndef atomic_inc_not_zero -#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0) -#endif - -/** - * atomic_inc_and_test - increment and test - * @v: pointer of type atomic_t - * - * Atomically increments @v by 1 - * and returns true if the result is zero, or false for all - * other cases. - */ -#ifndef atomic_inc_and_test -static inline bool atomic_inc_and_test(atomic_t *v) -{ - return atomic_inc_return(v) == 0; -} -#endif - -/** - * atomic_dec_and_test - decrement and test - * @v: pointer of type atomic_t - * - * Atomically decrements @v by 1 and - * returns true if the result is 0, or false for all other - * cases. - */ -#ifndef atomic_dec_and_test -static inline bool atomic_dec_and_test(atomic_t *v) -{ - return atomic_dec_return(v) == 0; -} -#endif - -/** - * atomic_sub_and_test - subtract value from variable and test result - * @i: integer value to subtract - * @v: pointer of type atomic_t - * - * Atomically subtracts @i from @v and returns - * true if the result is zero, or false for all - * other cases. - */ -#ifndef atomic_sub_and_test -static inline bool atomic_sub_and_test(int i, atomic_t *v) -{ - return atomic_sub_return(i, v) == 0; -} -#endif - -/** - * atomic_add_negative - add and test if negative - * @i: integer value to add - * @v: pointer of type atomic_t - * - * Atomically adds @i to @v and returns true - * if the result is negative, or false when - * result is greater than or equal to zero. - */ -#ifndef atomic_add_negative -static inline bool atomic_add_negative(int i, atomic_t *v) -{ - return atomic_add_return(i, v) < 0; -} -#endif - -#ifndef atomic_inc_unless_negative -static inline bool atomic_inc_unless_negative(atomic_t *v) -{ - int c = atomic_read(v); - - do { - if (unlikely(c < 0)) - return false; - } while (!atomic_try_cmpxchg(v, &c, c + 1)); - - return true; -} -#endif - -#ifndef atomic_dec_unless_positive -static inline bool atomic_dec_unless_positive(atomic_t *v) -{ - int c = atomic_read(v); - - do { - if (unlikely(c > 0)) - return false; - } while (!atomic_try_cmpxchg(v, &c, c - 1)); - - return true; -} -#endif - -/* - * atomic_dec_if_positive - decrement by 1 if old value positive - * @v: pointer of type atomic_t - * - * The function returns the old value of *v minus 1, even if - * the atomic variable, v, was not decremented. - */ -#ifndef atomic_dec_if_positive -static inline int atomic_dec_if_positive(atomic_t *v) -{ - int dec, c = atomic_read(v); - - do { - dec = c - 1; - if (unlikely(dec < 0)) - break; - } while (!atomic_try_cmpxchg(v, &c, dec)); - - return dec; -} -#endif - -#define atomic_cond_read_relaxed(v, c) smp_cond_load_relaxed(&(v)->counter, (c)) -#define atomic_cond_read_acquire(v, c) smp_cond_load_acquire(&(v)->counter, (c)) - -#ifdef CONFIG_GENERIC_ATOMIC64 -#include -#endif - -#ifndef atomic64_read_acquire -#define atomic64_read_acquire(v) smp_load_acquire(&(v)->counter) -#endif - -#ifndef atomic64_set_release -#define atomic64_set_release(v, i) smp_store_release(&(v)->counter, (i)) -#endif - -/* atomic64_add_return_relaxed */ -#ifndef atomic64_add_return_relaxed -#define atomic64_add_return_relaxed atomic64_add_return -#define atomic64_add_return_acquire atomic64_add_return -#define atomic64_add_return_release atomic64_add_return - -#else /* atomic64_add_return_relaxed */ - -#ifndef atomic64_add_return_acquire -#define atomic64_add_return_acquire(...) \ - __atomic_op_acquire(atomic64_add_return, __VA_ARGS__) -#endif - -#ifndef atomic64_add_return_release -#define atomic64_add_return_release(...) \ - __atomic_op_release(atomic64_add_return, __VA_ARGS__) -#endif - -#ifndef atomic64_add_return -#define atomic64_add_return(...) \ - __atomic_op_fence(atomic64_add_return, __VA_ARGS__) -#endif -#endif /* atomic64_add_return_relaxed */ - -#ifndef atomic64_inc -#define atomic64_inc(v) atomic64_add(1, (v)) -#endif - -/* atomic64_inc_return_relaxed */ -#ifndef atomic64_inc_return_relaxed - -#ifndef atomic64_inc_return -#define atomic64_inc_return(v) atomic64_add_return(1, (v)) -#define atomic64_inc_return_relaxed(v) atomic64_add_return_relaxed(1, (v)) -#define atomic64_inc_return_acquire(v) atomic64_add_return_acquire(1, (v)) -#define atomic64_inc_return_release(v) atomic64_add_return_release(1, (v)) -#else /* atomic64_inc_return */ -#define atomic64_inc_return_relaxed atomic64_inc_return -#define atomic64_inc_return_acquire atomic64_inc_return -#define atomic64_inc_return_release atomic64_inc_return -#endif /* atomic64_inc_return */ - -#else /* atomic64_inc_return_relaxed */ - -#ifndef atomic64_inc_return_acquire -#define atomic64_inc_return_acquire(...) \ - __atomic_op_acquire(atomic64_inc_return, __VA_ARGS__) -#endif - -#ifndef atomic64_inc_return_release -#define atomic64_inc_return_release(...) \ - __atomic_op_release(atomic64_inc_return, __VA_ARGS__) -#endif - -#ifndef atomic64_inc_return -#define atomic64_inc_return(...) \ - __atomic_op_fence(atomic64_inc_return, __VA_ARGS__) -#endif -#endif /* atomic64_inc_return_relaxed */ - - -/* atomic64_sub_return_relaxed */ -#ifndef atomic64_sub_return_relaxed -#define atomic64_sub_return_relaxed atomic64_sub_return -#define atomic64_sub_return_acquire atomic64_sub_return -#define atomic64_sub_return_release atomic64_sub_return - -#else /* atomic64_sub_return_relaxed */ - -#ifndef atomic64_sub_return_acquire -#define atomic64_sub_return_acquire(...) \ - __atomic_op_acquire(atomic64_sub_return, __VA_ARGS__) -#endif - -#ifndef atomic64_sub_return_release -#define atomic64_sub_return_release(...) \ - __atomic_op_release(atomic64_sub_return, __VA_ARGS__) -#endif - -#ifndef atomic64_sub_return -#define atomic64_sub_return(...) \ - __atomic_op_fence(atomic64_sub_return, __VA_ARGS__) -#endif -#endif /* atomic64_sub_return_relaxed */ - -#ifndef atomic64_dec -#define atomic64_dec(v) atomic64_sub(1, (v)) -#endif - -/* atomic64_dec_return_relaxed */ -#ifndef atomic64_dec_return_relaxed - -#ifndef atomic64_dec_return -#define atomic64_dec_return(v) atomic64_sub_return(1, (v)) -#define atomic64_dec_return_relaxed(v) atomic64_sub_return_relaxed(1, (v)) -#define atomic64_dec_return_acquire(v) atomic64_sub_return_acquire(1, (v)) -#define atomic64_dec_return_release(v) atomic64_sub_return_release(1, (v)) -#else /* atomic64_dec_return */ -#define atomic64_dec_return_relaxed atomic64_dec_return -#define atomic64_dec_return_acquire atomic64_dec_return -#define atomic64_dec_return_release atomic64_dec_return -#endif /* atomic64_dec_return */ - -#else /* atomic64_dec_return_relaxed */ - -#ifndef atomic64_dec_return_acquire -#define atomic64_dec_return_acquire(...) \ - __atomic_op_acquire(atomic64_dec_return, __VA_ARGS__) -#endif - -#ifndef atomic64_dec_return_release -#define atomic64_dec_return_release(...) \ - __atomic_op_release(atomic64_dec_return, __VA_ARGS__) -#endif - -#ifndef atomic64_dec_return -#define atomic64_dec_return(...) \ - __atomic_op_fence(atomic64_dec_return, __VA_ARGS__) -#endif -#endif /* atomic64_dec_return_relaxed */ - - -/* atomic64_fetch_add_relaxed */ -#ifndef atomic64_fetch_add_relaxed -#define atomic64_fetch_add_relaxed atomic64_fetch_add -#define atomic64_fetch_add_acquire atomic64_fetch_add -#define atomic64_fetch_add_release atomic64_fetch_add - -#else /* atomic64_fetch_add_relaxed */ - -#ifndef atomic64_fetch_add_acquire -#define atomic64_fetch_add_acquire(...) \ - __atomic_op_acquire(atomic64_fetch_add, __VA_ARGS__) -#endif - -#ifndef atomic64_fetch_add_release -#define atomic64_fetch_add_release(...) \ - __atomic_op_release(atomic64_fetch_add, __VA_ARGS__) -#endif - -#ifndef atomic64_fetch_add -#define atomic64_fetch_add(...) \ - __atomic_op_fence(atomic64_fetch_add, __VA_ARGS__) -#endif -#endif /* atomic64_fetch_add_relaxed */ - -/* atomic64_fetch_inc_relaxed */ -#ifndef atomic64_fetch_inc_relaxed - -#ifndef atomic64_fetch_inc -#define atomic64_fetch_inc(v) atomic64_fetch_add(1, (v)) -#define atomic64_fetch_inc_relaxed(v) atomic64_fetch_add_relaxed(1, (v)) -#define atomic64_fetch_inc_acquire(v) atomic64_fetch_add_acquire(1, (v)) -#define atomic64_fetch_inc_release(v) atomic64_fetch_add_release(1, (v)) -#else /* atomic64_fetch_inc */ -#define atomic64_fetch_inc_relaxed atomic64_fetch_inc -#define atomic64_fetch_inc_acquire atomic64_fetch_inc -#define atomic64_fetch_inc_release atomic64_fetch_inc -#endif /* atomic64_fetch_inc */ - -#else /* atomic64_fetch_inc_relaxed */ - -#ifndef atomic64_fetch_inc_acquire -#define atomic64_fetch_inc_acquire(...) \ - __atomic_op_acquire(atomic64_fetch_inc, __VA_ARGS__) -#endif - -#ifndef atomic64_fetch_inc_release -#define atomic64_fetch_inc_release(...) \ - __atomic_op_release(atomic64_fetch_inc, __VA_ARGS__) -#endif - -#ifndef atomic64_fetch_inc -#define atomic64_fetch_inc(...) \ - __atomic_op_fence(atomic64_fetch_inc, __VA_ARGS__) -#endif -#endif /* atomic64_fetch_inc_relaxed */ - -/* atomic64_fetch_sub_relaxed */ -#ifndef atomic64_fetch_sub_relaxed -#define atomic64_fetch_sub_relaxed atomic64_fetch_sub -#define atomic64_fetch_sub_acquire atomic64_fetch_sub -#define atomic64_fetch_sub_release atomic64_fetch_sub - -#else /* atomic64_fetch_sub_relaxed */ - -#ifndef atomic64_fetch_sub_acquire -#define atomic64_fetch_sub_acquire(...) \ - __atomic_op_acquire(atomic64_fetch_sub, __VA_ARGS__) -#endif - -#ifndef atomic64_fetch_sub_release -#define atomic64_fetch_sub_release(...) \ - __atomic_op_release(atomic64_fetch_sub, __VA_ARGS__) -#endif - -#ifndef atomic64_fetch_sub -#define atomic64_fetch_sub(...) \ - __atomic_op_fence(atomic64_fetch_sub, __VA_ARGS__) -#endif -#endif /* atomic64_fetch_sub_relaxed */ - -/* atomic64_fetch_dec_relaxed */ -#ifndef atomic64_fetch_dec_relaxed - -#ifndef atomic64_fetch_dec -#define atomic64_fetch_dec(v) atomic64_fetch_sub(1, (v)) -#define atomic64_fetch_dec_relaxed(v) atomic64_fetch_sub_relaxed(1, (v)) -#define atomic64_fetch_dec_acquire(v) atomic64_fetch_sub_acquire(1, (v)) -#define atomic64_fetch_dec_release(v) atomic64_fetch_sub_release(1, (v)) -#else /* atomic64_fetch_dec */ -#define atomic64_fetch_dec_relaxed atomic64_fetch_dec -#define atomic64_fetch_dec_acquire atomic64_fetch_dec -#define atomic64_fetch_dec_release atomic64_fetch_dec -#endif /* atomic64_fetch_dec */ - -#else /* atomic64_fetch_dec_relaxed */ - -#ifndef atomic64_fetch_dec_acquire -#define atomic64_fetch_dec_acquire(...) \ - __atomic_op_acquire(atomic64_fetch_dec, __VA_ARGS__) -#endif - -#ifndef atomic64_fetch_dec_release -#define atomic64_fetch_dec_release(...) \ - __atomic_op_release(atomic64_fetch_dec, __VA_ARGS__) -#endif - -#ifndef atomic64_fetch_dec -#define atomic64_fetch_dec(...) \ - __atomic_op_fence(atomic64_fetch_dec, __VA_ARGS__) -#endif -#endif /* atomic64_fetch_dec_relaxed */ - -/* atomic64_fetch_or_relaxed */ -#ifndef atomic64_fetch_or_relaxed -#define atomic64_fetch_or_relaxed atomic64_fetch_or -#define atomic64_fetch_or_acquire atomic64_fetch_or -#define atomic64_fetch_or_release atomic64_fetch_or - -#else /* atomic64_fetch_or_relaxed */ - -#ifndef atomic64_fetch_or_acquire -#define atomic64_fetch_or_acquire(...) \ - __atomic_op_acquire(atomic64_fetch_or, __VA_ARGS__) -#endif - -#ifndef atomic64_fetch_or_release -#define atomic64_fetch_or_release(...) \ - __atomic_op_release(atomic64_fetch_or, __VA_ARGS__) -#endif - -#ifndef atomic64_fetch_or -#define atomic64_fetch_or(...) \ - __atomic_op_fence(atomic64_fetch_or, __VA_ARGS__) -#endif -#endif /* atomic64_fetch_or_relaxed */ - -/* atomic64_fetch_and_relaxed */ -#ifndef atomic64_fetch_and_relaxed -#define atomic64_fetch_and_relaxed atomic64_fetch_and -#define atomic64_fetch_and_acquire atomic64_fetch_and -#define atomic64_fetch_and_release atomic64_fetch_and - -#else /* atomic64_fetch_and_relaxed */ - -#ifndef atomic64_fetch_and_acquire -#define atomic64_fetch_and_acquire(...) \ - __atomic_op_acquire(atomic64_fetch_and, __VA_ARGS__) -#endif - -#ifndef atomic64_fetch_and_release -#define atomic64_fetch_and_release(...) \ - __atomic_op_release(atomic64_fetch_and, __VA_ARGS__) -#endif - -#ifndef atomic64_fetch_and -#define atomic64_fetch_and(...) \ - __atomic_op_fence(atomic64_fetch_and, __VA_ARGS__) -#endif -#endif /* atomic64_fetch_and_relaxed */ - -#ifndef atomic64_andnot -#define atomic64_andnot(i, v) atomic64_and(~(long long)(i), (v)) -#endif - -#ifndef atomic64_fetch_andnot_relaxed - -#ifndef atomic64_fetch_andnot -#define atomic64_fetch_andnot(i, v) atomic64_fetch_and(~(long long)(i), (v)) -#define atomic64_fetch_andnot_relaxed(i, v) atomic64_fetch_and_relaxed(~(long long)(i), (v)) -#define atomic64_fetch_andnot_acquire(i, v) atomic64_fetch_and_acquire(~(long long)(i), (v)) -#define atomic64_fetch_andnot_release(i, v) atomic64_fetch_and_release(~(long long)(i), (v)) -#else /* atomic64_fetch_andnot */ -#define atomic64_fetch_andnot_relaxed atomic64_fetch_andnot -#define atomic64_fetch_andnot_acquire atomic64_fetch_andnot -#define atomic64_fetch_andnot_release atomic64_fetch_andnot -#endif /* atomic64_fetch_andnot */ - -#else /* atomic64_fetch_andnot_relaxed */ - -#ifndef atomic64_fetch_andnot_acquire -#define atomic64_fetch_andnot_acquire(...) \ - __atomic_op_acquire(atomic64_fetch_andnot, __VA_ARGS__) -#endif - -#ifndef atomic64_fetch_andnot_release -#define atomic64_fetch_andnot_release(...) \ - __atomic_op_release(atomic64_fetch_andnot, __VA_ARGS__) -#endif - -#ifndef atomic64_fetch_andnot -#define atomic64_fetch_andnot(...) \ - __atomic_op_fence(atomic64_fetch_andnot, __VA_ARGS__) -#endif -#endif /* atomic64_fetch_andnot_relaxed */ - -/* atomic64_fetch_xor_relaxed */ -#ifndef atomic64_fetch_xor_relaxed -#define atomic64_fetch_xor_relaxed atomic64_fetch_xor -#define atomic64_fetch_xor_acquire atomic64_fetch_xor -#define atomic64_fetch_xor_release atomic64_fetch_xor - -#else /* atomic64_fetch_xor_relaxed */ - -#ifndef atomic64_fetch_xor_acquire -#define atomic64_fetch_xor_acquire(...) \ - __atomic_op_acquire(atomic64_fetch_xor, __VA_ARGS__) -#endif - -#ifndef atomic64_fetch_xor_release -#define atomic64_fetch_xor_release(...) \ - __atomic_op_release(atomic64_fetch_xor, __VA_ARGS__) -#endif - -#ifndef atomic64_fetch_xor -#define atomic64_fetch_xor(...) \ - __atomic_op_fence(atomic64_fetch_xor, __VA_ARGS__) -#endif -#endif /* atomic64_fetch_xor_relaxed */ - - -/* atomic64_xchg_relaxed */ -#ifndef atomic64_xchg_relaxed -#define atomic64_xchg_relaxed atomic64_xchg -#define atomic64_xchg_acquire atomic64_xchg -#define atomic64_xchg_release atomic64_xchg - -#else /* atomic64_xchg_relaxed */ - -#ifndef atomic64_xchg_acquire -#define atomic64_xchg_acquire(...) \ - __atomic_op_acquire(atomic64_xchg, __VA_ARGS__) -#endif - -#ifndef atomic64_xchg_release -#define atomic64_xchg_release(...) \ - __atomic_op_release(atomic64_xchg, __VA_ARGS__) -#endif - -#ifndef atomic64_xchg -#define atomic64_xchg(...) \ - __atomic_op_fence(atomic64_xchg, __VA_ARGS__) -#endif -#endif /* atomic64_xchg_relaxed */ - -/* atomic64_cmpxchg_relaxed */ -#ifndef atomic64_cmpxchg_relaxed -#define atomic64_cmpxchg_relaxed atomic64_cmpxchg -#define atomic64_cmpxchg_acquire atomic64_cmpxchg -#define atomic64_cmpxchg_release atomic64_cmpxchg - -#else /* atomic64_cmpxchg_relaxed */ - -#ifndef atomic64_cmpxchg_acquire -#define atomic64_cmpxchg_acquire(...) \ - __atomic_op_acquire(atomic64_cmpxchg, __VA_ARGS__) -#endif - -#ifndef atomic64_cmpxchg_release -#define atomic64_cmpxchg_release(...) \ - __atomic_op_release(atomic64_cmpxchg, __VA_ARGS__) -#endif - -#ifndef atomic64_cmpxchg -#define atomic64_cmpxchg(...) \ - __atomic_op_fence(atomic64_cmpxchg, __VA_ARGS__) -#endif -#endif /* atomic64_cmpxchg_relaxed */ - -#ifndef atomic64_try_cmpxchg - -#define __atomic64_try_cmpxchg(type, _p, _po, _n) \ -({ \ - typeof(_po) __po = (_po); \ - typeof(*(_po)) __r, __o = *__po; \ - __r = atomic64_cmpxchg##type((_p), __o, (_n)); \ - if (unlikely(__r != __o)) \ - *__po = __r; \ - likely(__r == __o); \ -}) - -#define atomic64_try_cmpxchg(_p, _po, _n) __atomic64_try_cmpxchg(, _p, _po, _n) -#define atomic64_try_cmpxchg_relaxed(_p, _po, _n) __atomic64_try_cmpxchg(_relaxed, _p, _po, _n) -#define atomic64_try_cmpxchg_acquire(_p, _po, _n) __atomic64_try_cmpxchg(_acquire, _p, _po, _n) -#define atomic64_try_cmpxchg_release(_p, _po, _n) __atomic64_try_cmpxchg(_release, _p, _po, _n) - -#else /* atomic64_try_cmpxchg */ -#define atomic64_try_cmpxchg_relaxed atomic64_try_cmpxchg -#define atomic64_try_cmpxchg_acquire atomic64_try_cmpxchg -#define atomic64_try_cmpxchg_release atomic64_try_cmpxchg -#endif /* atomic64_try_cmpxchg */ - -/** - * atomic64_fetch_add_unless - add unless the number is already a given value - * @v: pointer of type atomic64_t - * @a: the amount to add to v... - * @u: ...unless v is equal to u. - * - * Atomically adds @a to @v, if @v was not already @u. - * Returns the original value of @v. - */ -#ifndef atomic64_fetch_add_unless -static inline long long atomic64_fetch_add_unless(atomic64_t *v, long long a, - long long u) -{ - long long c = atomic64_read(v); - - do { - if (unlikely(c == u)) - break; - } while (!atomic64_try_cmpxchg(v, &c, c + a)); - - return c; -} -#endif - -/** - * atomic64_add_unless - add unless the number is already a given value - * @v: pointer of type atomic_t - * @a: the amount to add to v... - * @u: ...unless v is equal to u. - * - * Atomically adds @a to @v, if @v was not already @u. - * Returns true if the addition was done. - */ -static inline bool atomic64_add_unless(atomic64_t *v, long long a, long long u) -{ - return atomic64_fetch_add_unless(v, a, u) != u; -} - -/** - * atomic64_inc_not_zero - increment unless the number is zero - * @v: pointer of type atomic64_t - * - * Atomically increments @v by 1, if @v is non-zero. - * Returns true if the increment was done. - */ -#ifndef atomic64_inc_not_zero -#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0) -#endif - -/** - * atomic64_inc_and_test - increment and test - * @v: pointer of type atomic64_t - * - * Atomically increments @v by 1 - * and returns true if the result is zero, or false for all - * other cases. - */ -#ifndef atomic64_inc_and_test -static inline bool atomic64_inc_and_test(atomic64_t *v) -{ - return atomic64_inc_return(v) == 0; -} -#endif - -/** - * atomic64_dec_and_test - decrement and test - * @v: pointer of type atomic64_t - * - * Atomically decrements @v by 1 and - * returns true if the result is 0, or false for all other - * cases. - */ -#ifndef atomic64_dec_and_test -static inline bool atomic64_dec_and_test(atomic64_t *v) -{ - return atomic64_dec_return(v) == 0; -} -#endif - -/** - * atomic64_sub_and_test - subtract value from variable and test result - * @i: integer value to subtract - * @v: pointer of type atomic64_t - * - * Atomically subtracts @i from @v and returns - * true if the result is zero, or false for all - * other cases. - */ -#ifndef atomic64_sub_and_test -static inline bool atomic64_sub_and_test(long long i, atomic64_t *v) -{ - return atomic64_sub_return(i, v) == 0; -} -#endif - -/** - * atomic64_add_negative - add and test if negative - * @i: integer value to add - * @v: pointer of type atomic64_t - * - * Atomically adds @i to @v and returns true - * if the result is negative, or false when - * result is greater than or equal to zero. - */ -#ifndef atomic64_add_negative -static inline bool atomic64_add_negative(long long i, atomic64_t *v) -{ - return atomic64_add_return(i, v) < 0; -} -#endif - -#ifndef atomic64_inc_unless_negative -static inline bool atomic64_inc_unless_negative(atomic64_t *v) -{ - long long c = atomic64_read(v); - - do { - if (unlikely(c < 0)) - return false; - } while (!atomic64_try_cmpxchg(v, &c, c + 1)); - - return true; -} -#endif - -#ifndef atomic64_dec_unless_positive -static inline bool atomic64_dec_unless_positive(atomic64_t *v) -{ - long long c = atomic64_read(v); - - do { - if (unlikely(c > 0)) - return false; - } while (!atomic64_try_cmpxchg(v, &c, c - 1)); - - return true; -} -#endif - -/* - * atomic64_dec_if_positive - decrement by 1 if old value positive - * @v: pointer of type atomic64_t - * - * The function returns the old value of *v minus 1, even if - * the atomic64 variable, v, was not decremented. - */ -#ifndef atomic64_dec_if_positive -static inline long long atomic64_dec_if_positive(atomic64_t *v) -{ - long long dec, c = atomic64_read(v); - - do { - dec = c - 1; - if (unlikely(dec < 0)) - break; - } while (!atomic64_try_cmpxchg(v, &c, dec)); - - return dec; -} -#endif - -#define atomic64_cond_read_relaxed(v, c) smp_cond_load_relaxed(&(v)->counter, (c)) -#define atomic64_cond_read_acquire(v, c) smp_cond_load_acquire(&(v)->counter, (c)) +#include #include -- cgit v1.2.3 From 8fc5c73554db0ac18c0c6ac5b2099ab917f83bdf Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 9 Nov 2018 12:43:07 -0800 Subject: acpi/nfit, device-dax: Identify differentiated memory with a unique numa-node MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Persistent memory, as described by the ACPI NFIT (NVDIMM Firmware Interface Table), is the first known instance of a memory range described by a unique "target" proximity domain. Where "initiator" and "target" proximity domains is an approach that the ACPI HMAT (Heterogeneous Memory Attributes Table) uses to described the unique performance properties of a memory range relative to a given initiator (e.g. CPU or DMA device). Currently the numa-node for a /dev/pmemX block-device or /dev/daxX.Y char-device follows the traditional notion of 'numa-node' where the attribute conveys the closest online numa-node. That numa-node attribute is useful for cpu-binding and memory-binding processes *near* the device. However, when the memory range backing a 'pmem', or 'dax' device is onlined (memory hot-add) the memory-only-numa-node representing that address needs to be differentiated from the set of online nodes. In other words, the numa-node association of the device depends on whether you can bind processes *near* the cpu-numa-node in the offline device-case, or bind process *on* the memory-range directly after the backing address range is onlined. Allow for the case that platform firmware describes persistent memory with a unique proximity domain, i.e. when it is distinct from the proximity of DRAM and CPUs that are on the same socket. Plumb the Linux numa-node translation of that proximity through the libnvdimm region device to namespaces that are in device-dax mode. With this in place the proposed kmem driver [1] can optionally discover a unique numa-node number for the address range as it transitions the memory from an offline state managed by a device-driver to an online memory range managed by the core-mm. [1]: https://lore.kernel.org/lkml/20181022201317.8558C1D8@viggo.jf.intel.com Reported-by: Fan Du Cc: Michael Ellerman Cc: "Oliver O'Halloran" Cc: Dave Hansen Cc: Jérôme Glisse Reviewed-by: Yang Shi Signed-off-by: Dan Williams --- arch/powerpc/platforms/pseries/papr_scm.c | 1 + drivers/acpi/nfit/core.c | 8 ++++++-- drivers/acpi/numa.c | 1 + drivers/dax/bus.c | 4 +++- drivers/dax/bus.h | 3 ++- drivers/dax/dax-private.h | 4 ++++ drivers/dax/pmem/core.c | 4 +++- drivers/nvdimm/e820.c | 1 + drivers/nvdimm/nd.h | 2 +- drivers/nvdimm/of_pmem.c | 1 + drivers/nvdimm/region_devs.c | 1 + include/linux/acpi.h | 5 +++++ include/linux/libnvdimm.h | 1 + 13 files changed, 30 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index 7d6457ab5d34..8806ac822627 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -236,6 +236,7 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p) memset(&ndr_desc, 0, sizeof(ndr_desc)); ndr_desc.attr_groups = region_attr_groups; ndr_desc.numa_node = dev_to_node(&p->pdev->dev); + ndr_desc.target_node = ndr_desc.numa_node; ndr_desc.res = &p->res; ndr_desc.of_node = p->dn; ndr_desc.provider_data = p; diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index 011d3db19c80..475899974c70 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -2869,11 +2869,15 @@ static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc, ndr_desc->res = &res; ndr_desc->provider_data = nfit_spa; ndr_desc->attr_groups = acpi_nfit_region_attribute_groups; - if (spa->flags & ACPI_NFIT_PROXIMITY_VALID) + if (spa->flags & ACPI_NFIT_PROXIMITY_VALID) { ndr_desc->numa_node = acpi_map_pxm_to_online_node( spa->proximity_domain); - else + ndr_desc->target_node = acpi_map_pxm_to_node( + spa->proximity_domain); + } else { ndr_desc->numa_node = NUMA_NO_NODE; + ndr_desc->target_node = NUMA_NO_NODE; + } /* * Persistence domain bits are hierarchical, if diff --git a/drivers/acpi/numa.c b/drivers/acpi/numa.c index 274699463b4f..b9d86babb13a 100644 --- a/drivers/acpi/numa.c +++ b/drivers/acpi/numa.c @@ -84,6 +84,7 @@ int acpi_map_pxm_to_node(int pxm) return node; } +EXPORT_SYMBOL(acpi_map_pxm_to_node); /** * acpi_map_pxm_to_online_node - Map proximity ID to online node diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c index 568168500217..c620ad52d7e5 100644 --- a/drivers/dax/bus.c +++ b/drivers/dax/bus.c @@ -214,7 +214,7 @@ static void dax_region_unregister(void *region) } struct dax_region *alloc_dax_region(struct device *parent, int region_id, - struct resource *res, unsigned int align, + struct resource *res, int target_node, unsigned int align, unsigned long pfn_flags) { struct dax_region *dax_region; @@ -244,6 +244,7 @@ struct dax_region *alloc_dax_region(struct device *parent, int region_id, dax_region->id = region_id; dax_region->align = align; dax_region->dev = parent; + dax_region->target_node = target_node; if (sysfs_create_groups(&parent->kobj, dax_region_attribute_groups)) { kfree(dax_region); return NULL; @@ -348,6 +349,7 @@ struct dev_dax *__devm_create_dev_dax(struct dax_region *dax_region, int id, dev_dax->dax_dev = dax_dev; dev_dax->region = dax_region; + dev_dax->target_node = dax_region->target_node; kref_get(&dax_region->kref); inode = dax_inode(dax_dev); diff --git a/drivers/dax/bus.h b/drivers/dax/bus.h index ce977552ffb5..8619e3299943 100644 --- a/drivers/dax/bus.h +++ b/drivers/dax/bus.h @@ -10,7 +10,8 @@ struct dax_device; struct dax_region; void dax_region_put(struct dax_region *dax_region); struct dax_region *alloc_dax_region(struct device *parent, int region_id, - struct resource *res, unsigned int align, unsigned long flags); + struct resource *res, int target_node, unsigned int align, + unsigned long flags); enum dev_dax_subsys { DEV_DAX_BUS, diff --git a/drivers/dax/dax-private.h b/drivers/dax/dax-private.h index a82ce48f5884..a45612148ca0 100644 --- a/drivers/dax/dax-private.h +++ b/drivers/dax/dax-private.h @@ -26,6 +26,7 @@ void dax_bus_exit(void); /** * struct dax_region - mapping infrastructure for dax devices * @id: kernel-wide unique region for a memory range + * @target_node: effective numa node if this memory range is onlined * @kref: to pin while other agents have a need to do lookups * @dev: parent device backing this region * @align: allocation and mapping alignment for child dax devices @@ -34,6 +35,7 @@ void dax_bus_exit(void); */ struct dax_region { int id; + int target_node; struct kref kref; struct device *dev; unsigned int align; @@ -46,6 +48,7 @@ struct dax_region { * data while the device is activated in the driver. * @region - parent region * @dax_dev - core dax functionality + * @target_node: effective numa node if dev_dax memory range is onlined * @dev - device core * @pgmap - pgmap for memmap setup / lifetime (driver owned) * @ref: pgmap reference count (driver owned) @@ -54,6 +57,7 @@ struct dax_region { struct dev_dax { struct dax_region *region; struct dax_device *dax_dev; + int target_node; struct device dev; struct dev_pagemap pgmap; struct percpu_ref ref; diff --git a/drivers/dax/pmem/core.c b/drivers/dax/pmem/core.c index bdcff1b14e95..f71019ce0647 100644 --- a/drivers/dax/pmem/core.c +++ b/drivers/dax/pmem/core.c @@ -20,6 +20,7 @@ struct dev_dax *__dax_pmem_probe(struct device *dev, enum dev_dax_subsys subsys) struct nd_namespace_common *ndns; struct nd_dax *nd_dax = to_nd_dax(dev); struct nd_pfn *nd_pfn = &nd_dax->nd_pfn; + struct nd_region *nd_region = to_nd_region(dev->parent); ndns = nvdimm_namespace_common_probe(dev); if (IS_ERR(ndns)) @@ -52,7 +53,8 @@ struct dev_dax *__dax_pmem_probe(struct device *dev, enum dev_dax_subsys subsys) memcpy(&res, &pgmap.res, sizeof(res)); res.start += offset; dax_region = alloc_dax_region(dev, region_id, &res, - le32_to_cpu(pfn_sb->align), PFN_DEV|PFN_MAP); + nd_region->target_node, le32_to_cpu(pfn_sb->align), + PFN_DEV|PFN_MAP); if (!dax_region) return ERR_PTR(-ENOMEM); diff --git a/drivers/nvdimm/e820.c b/drivers/nvdimm/e820.c index 521eaf53a52a..36be9b619187 100644 --- a/drivers/nvdimm/e820.c +++ b/drivers/nvdimm/e820.c @@ -47,6 +47,7 @@ static int e820_register_one(struct resource *res, void *data) ndr_desc.res = res; ndr_desc.attr_groups = e820_pmem_region_attribute_groups; ndr_desc.numa_node = e820_range_to_nid(res->start); + ndr_desc.target_node = ndr_desc.numa_node; set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags); if (!nvdimm_pmem_region_create(nvdimm_bus, &ndr_desc)) return -ENXIO; diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h index cfde992684e7..0b3d7595b3cb 100644 --- a/drivers/nvdimm/nd.h +++ b/drivers/nvdimm/nd.h @@ -153,7 +153,7 @@ struct nd_region { u16 ndr_mappings; u64 ndr_size; u64 ndr_start; - int id, num_lanes, ro, numa_node; + int id, num_lanes, ro, numa_node, target_node; void *provider_data; struct kernfs_node *bb_state; struct badblocks bb; diff --git a/drivers/nvdimm/of_pmem.c b/drivers/nvdimm/of_pmem.c index 0a701837dfc0..ecaaa27438e2 100644 --- a/drivers/nvdimm/of_pmem.c +++ b/drivers/nvdimm/of_pmem.c @@ -68,6 +68,7 @@ static int of_pmem_region_probe(struct platform_device *pdev) memset(&ndr_desc, 0, sizeof(ndr_desc)); ndr_desc.attr_groups = region_attr_groups; ndr_desc.numa_node = dev_to_node(&pdev->dev); + ndr_desc.target_node = ndr_desc.numa_node; ndr_desc.res = &pdev->resource[i]; ndr_desc.of_node = np; set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags); diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c index e2818f94f292..caf2f3129ccd 100644 --- a/drivers/nvdimm/region_devs.c +++ b/drivers/nvdimm/region_devs.c @@ -1065,6 +1065,7 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus, nd_region->flags = ndr_desc->flags; nd_region->ro = ro; nd_region->numa_node = ndr_desc->numa_node; + nd_region->target_node = ndr_desc->target_node; ida_init(&nd_region->ns_ida); ida_init(&nd_region->btt_ida); ida_init(&nd_region->pfn_ida); diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 87715f20b69a..eddf2736e5a6 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -400,12 +400,17 @@ extern bool acpi_osi_is_win8(void); #ifdef CONFIG_ACPI_NUMA int acpi_map_pxm_to_online_node(int pxm); +int acpi_map_pxm_to_node(int pxm); int acpi_get_node(acpi_handle handle); #else static inline int acpi_map_pxm_to_online_node(int pxm) { return 0; } +static inline int acpi_map_pxm_to_node(int pxm) +{ + return 0; +} static inline int acpi_get_node(acpi_handle handle) { return 0; diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index 5440f11b0907..56bc545ad3b2 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -128,6 +128,7 @@ struct nd_region_desc { void *provider_data; int num_lanes; int numa_node; + int target_node; unsigned long flags; struct device_node *of_node; }; -- cgit v1.2.3 From ebc40be2b8eec093abbbd87658a6726ff84a61f5 Mon Sep 17 00:00:00 2001 From: Fabien Dessenne Date: Wed, 7 Nov 2018 11:18:34 +0100 Subject: remoteproc: fix kernel-doc comment for parse_fw Fix the kernel-doc comment for "parse_fw" and fix a typo. Signed-off-by: Fabien Dessenne Signed-off-by: Bjorn Andersson --- include/linux/remoteproc.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h index 507a2b524208..68e72f33c705 100644 --- a/include/linux/remoteproc.h +++ b/include/linux/remoteproc.h @@ -345,9 +345,9 @@ struct firmware; * @stop: power off the device * @kick: kick a virtqueue (virtqueue id given as a parameter) * @da_to_va: optional platform hook to perform address translations - * @load_rsc_table: load resource table from firmware image + * @parse_fw: parse firmware to extract information (e.g. resource table) * @find_loaded_rsc_table: find the loaded resouce table - * @load: load firmeware to memory, where the remote processor + * @load: load firmware to memory, where the remote processor * expects to find it * @sanity_check: sanity check the fw image * @get_boot_addr: get boot address to entry point specified in firmware -- cgit v1.2.3 From d7dba6be0f31ae61f5f3296aa130f45d57d30f74 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 7 Jan 2019 13:07:36 +0200 Subject: dmaengine: dw: Remove misleading is_private property The commit a9ddb575d6d6 ("dmaengine: dw_dmac: Enhance device tree support") introduces is_private property in uncertain understanding what does it mean. First of all, documentation defines DMA_PRIVATE capability as Documentation/crypto/async-tx-api.txt: The DMA_PRIVATE capability flag is used to tag dma devices that should not be used by the general-purpose allocator. It can be set at initialization time if it is known that a channel will always be private. Alternatively, it is set when dma_request_channel() finds an unused "public" channel. A couple caveats to note when implementing a driver and consumer: 1/ Once a channel has been privately allocated it will no longer be considered by the general-purpose allocator even after a call to dma_release_channel(). 2/ Since capabilities are specified at the device level a dma_device with multiple channels will either have all channels public, or all channels private. Documentation/driver-api/dmaengine/provider.rst: - DMA_PRIVATE The devices only supports slave transfers, and as such isn't available for async transfers. The capability had been introduced by the commit 59b5ec21446b ("dmaengine: introduce dma_request_channel and private channels") and some code didn't changed from that times ever. Taking into consideration above and the fact that on all known platforms Synopsys DesignWare DMA engine is attached to serve slave transfers, the DMA_PRIVATE capability must be enabled for this device unconditionally. Otherwise, as rightfully noticed in drivers/dma/at_xdmac.c: /* * Without DMA_PRIVATE the driver is not able to allocate more than * one channel, second allocation fails in private_candidate. */ because of of a caveats mentioned in above documentation excerpts. So, remove conditional around DMA_PRIVATE followed by removal leftovers. If someone wonders, DMA_PRIVATE can be not used if and only if the all channels of the DMA controller are supposed to serve memory-to-memory like operations. For example, EP93xx has two controllers, one of which can only perform memory-to-memory transfers Note, this change doesn't affect dmatest to be able to test such controllers. Cc: Greg Kroah-Hartman (maintainer:SERIAL DRIVERS) Cc: Dan Williams Signed-off-by: Andy Shevchenko Acked-by: Greg Kroah-Hartman Signed-off-by: Vinod Koul --- Documentation/devicetree/bindings/dma/snps-dma.txt | 2 -- drivers/dma/dw/core.c | 4 +--- drivers/dma/dw/pci.c | 1 - drivers/dma/dw/platform.c | 3 --- drivers/tty/serial/8250/8250_lpss.c | 1 - include/linux/platform_data/dma-dw.h | 3 --- 6 files changed, 1 insertion(+), 13 deletions(-) (limited to 'include/linux') diff --git a/Documentation/devicetree/bindings/dma/snps-dma.txt b/Documentation/devicetree/bindings/dma/snps-dma.txt index db757df7057d..0bedceed1963 100644 --- a/Documentation/devicetree/bindings/dma/snps-dma.txt +++ b/Documentation/devicetree/bindings/dma/snps-dma.txt @@ -23,8 +23,6 @@ Deprecated properties: Optional properties: -- is_private: The device channels should be marked as private and not for by the - general purpose DMA channel allocator. False if not passed. - multi-block: Multi block transfers supported by hardware. Array property with one cell per channel. 0: not supported, 1 (default): supported. - snps,dma-protection-control: AHB HPROT[3:1] protection setting. diff --git a/drivers/dma/dw/core.c b/drivers/dma/dw/core.c index dc053e62f894..e25503986680 100644 --- a/drivers/dma/dw/core.c +++ b/drivers/dma/dw/core.c @@ -1227,7 +1227,6 @@ int dw_dma_probe(struct dw_dma_chip *chip) pdata->block_size = dma_readl(dw, MAX_BLK_SIZE); /* Fill platform data with the default values */ - pdata->is_private = true; pdata->is_memcpy = true; pdata->chan_allocation_order = CHAN_ALLOCATION_ASCENDING; pdata->chan_priority = CHAN_PRIORITY_ASCENDING; @@ -1340,8 +1339,7 @@ int dw_dma_probe(struct dw_dma_chip *chip) /* Set capabilities */ dma_cap_set(DMA_SLAVE, dw->dma.cap_mask); - if (pdata->is_private) - dma_cap_set(DMA_PRIVATE, dw->dma.cap_mask); + dma_cap_set(DMA_PRIVATE, dw->dma.cap_mask); if (pdata->is_memcpy) dma_cap_set(DMA_MEMCPY, dw->dma.cap_mask); diff --git a/drivers/dma/dw/pci.c b/drivers/dma/dw/pci.c index 313ba10c6224..570498faadc3 100644 --- a/drivers/dma/dw/pci.c +++ b/drivers/dma/dw/pci.c @@ -17,7 +17,6 @@ static struct dw_dma_platform_data mrfld_pdata = { .nr_channels = 8, - .is_private = true, .is_memcpy = true, .is_idma32 = true, .chan_allocation_order = CHAN_ALLOCATION_ASCENDING, diff --git a/drivers/dma/dw/platform.c b/drivers/dma/dw/platform.c index 31ff8113c3de..6dd8cd1820c1 100644 --- a/drivers/dma/dw/platform.c +++ b/drivers/dma/dw/platform.c @@ -128,9 +128,6 @@ dw_dma_parse_dt(struct platform_device *pdev) pdata->nr_masters = nr_masters; pdata->nr_channels = nr_channels; - if (of_property_read_bool(np, "is_private")) - pdata->is_private = true; - /* * All known devices, which use DT for configuration, support * memory-to-memory transfers. So enable it by default. diff --git a/drivers/tty/serial/8250/8250_lpss.c b/drivers/tty/serial/8250/8250_lpss.c index 98dbc796353f..53ca9ba6ab4b 100644 --- a/drivers/tty/serial/8250/8250_lpss.c +++ b/drivers/tty/serial/8250/8250_lpss.c @@ -153,7 +153,6 @@ static int byt_serial_setup(struct lpss8250 *lpss, struct uart_port *port) #ifdef CONFIG_SERIAL_8250_DMA static const struct dw_dma_platform_data qrk_serial_dma_pdata = { .nr_channels = 2, - .is_private = true, .chan_allocation_order = CHAN_ALLOCATION_ASCENDING, .chan_priority = CHAN_PRIORITY_ASCENDING, .block_size = 4095, diff --git a/include/linux/platform_data/dma-dw.h b/include/linux/platform_data/dma-dw.h index 1a1d58ebffbf..d443025c5c72 100644 --- a/include/linux/platform_data/dma-dw.h +++ b/include/linux/platform_data/dma-dw.h @@ -38,8 +38,6 @@ struct dw_dma_slave { /** * struct dw_dma_platform_data - Controller configuration parameters * @nr_channels: Number of channels supported by hardware (max 8) - * @is_private: The device channels should be marked as private and not for - * by the general purpose DMA channel allocator. * @is_memcpy: The device channels do support memory-to-memory transfers. * @is_idma32: The type of the DMA controller is iDMA32 * @chan_allocation_order: Allocate channels starting from 0 or 7 @@ -53,7 +51,6 @@ struct dw_dma_slave { */ struct dw_dma_platform_data { unsigned int nr_channels; - bool is_private; bool is_memcpy; bool is_idma32; #define CHAN_ALLOCATION_ASCENDING 0 /* zero to seven */ -- cgit v1.2.3 From 078165779608873e7b6eae1316a39c73af9f3edc Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 7 Jan 2019 13:07:37 +0200 Subject: dmaengine: dw: Remove unused internal property All known devices, which use DT for configuration, support memory-to-memory transfers. So enable it by default. The rest two cases, i.e. Intel Quark and PPC460ex, instantiate DMA driver and use its channels exclusively for hardware, which means there is no available channel for any other purposes anyway. Signed-off-by: Andy Shevchenko Signed-off-by: Vinod Koul --- drivers/dma/dw/core.c | 4 +--- drivers/dma/dw/pci.c | 1 - drivers/dma/dw/platform.c | 6 ------ include/linux/platform_data/dma-dw.h | 2 -- 4 files changed, 1 insertion(+), 12 deletions(-) (limited to 'include/linux') diff --git a/drivers/dma/dw/core.c b/drivers/dma/dw/core.c index e25503986680..4982e443869c 100644 --- a/drivers/dma/dw/core.c +++ b/drivers/dma/dw/core.c @@ -1227,7 +1227,6 @@ int dw_dma_probe(struct dw_dma_chip *chip) pdata->block_size = dma_readl(dw, MAX_BLK_SIZE); /* Fill platform data with the default values */ - pdata->is_memcpy = true; pdata->chan_allocation_order = CHAN_ALLOCATION_ASCENDING; pdata->chan_priority = CHAN_PRIORITY_ASCENDING; } else if (chip->pdata->nr_channels > DW_DMA_MAX_NR_CHANNELS) { @@ -1340,8 +1339,7 @@ int dw_dma_probe(struct dw_dma_chip *chip) /* Set capabilities */ dma_cap_set(DMA_SLAVE, dw->dma.cap_mask); dma_cap_set(DMA_PRIVATE, dw->dma.cap_mask); - if (pdata->is_memcpy) - dma_cap_set(DMA_MEMCPY, dw->dma.cap_mask); + dma_cap_set(DMA_MEMCPY, dw->dma.cap_mask); dw->dma.dev = chip->dev; dw->dma.device_alloc_chan_resources = dwc_alloc_chan_resources; diff --git a/drivers/dma/dw/pci.c b/drivers/dma/dw/pci.c index 570498faadc3..66d98d7ccad0 100644 --- a/drivers/dma/dw/pci.c +++ b/drivers/dma/dw/pci.c @@ -17,7 +17,6 @@ static struct dw_dma_platform_data mrfld_pdata = { .nr_channels = 8, - .is_memcpy = true, .is_idma32 = true, .chan_allocation_order = CHAN_ALLOCATION_ASCENDING, .chan_priority = CHAN_PRIORITY_ASCENDING, diff --git a/drivers/dma/dw/platform.c b/drivers/dma/dw/platform.c index 6dd8cd1820c1..58fc1ba02a1e 100644 --- a/drivers/dma/dw/platform.c +++ b/drivers/dma/dw/platform.c @@ -128,12 +128,6 @@ dw_dma_parse_dt(struct platform_device *pdev) pdata->nr_masters = nr_masters; pdata->nr_channels = nr_channels; - /* - * All known devices, which use DT for configuration, support - * memory-to-memory transfers. So enable it by default. - */ - pdata->is_memcpy = true; - if (!of_property_read_u32(np, "chan_allocation_order", &tmp)) pdata->chan_allocation_order = (unsigned char)tmp; diff --git a/include/linux/platform_data/dma-dw.h b/include/linux/platform_data/dma-dw.h index d443025c5c72..1c85eeee4171 100644 --- a/include/linux/platform_data/dma-dw.h +++ b/include/linux/platform_data/dma-dw.h @@ -38,7 +38,6 @@ struct dw_dma_slave { /** * struct dw_dma_platform_data - Controller configuration parameters * @nr_channels: Number of channels supported by hardware (max 8) - * @is_memcpy: The device channels do support memory-to-memory transfers. * @is_idma32: The type of the DMA controller is iDMA32 * @chan_allocation_order: Allocate channels starting from 0 or 7 * @chan_priority: Set channel priority increasing from 0 to 7 or 7 to 0. @@ -51,7 +50,6 @@ struct dw_dma_slave { */ struct dw_dma_platform_data { unsigned int nr_channels; - bool is_memcpy; bool is_idma32; #define CHAN_ALLOCATION_ASCENDING 0 /* zero to seven */ #define CHAN_ALLOCATION_DESCENDING 1 /* seven to zero */ -- cgit v1.2.3 From 69da8be90d5e85e60b5377c47384154b9dabf592 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 7 Jan 2019 13:07:38 +0200 Subject: dmaengine: dw: Split DW and iDMA 32-bit operations Here is a kinda big refactoring that should have been done in the first place, when Intel iDMA 32-bit support appeared. It splits operations which are different to Synopsys DesignWare and Intel iDMA 32-bit controllers. No functional change intended. Signed-off-by: Andy Shevchenko Signed-off-by: Vinod Koul --- drivers/dma/dw/Makefile | 2 +- drivers/dma/dw/core.c | 190 +++++++---------------------------- drivers/dma/dw/dw.c | 112 +++++++++++++++++++++ drivers/dma/dw/idma32.c | 138 +++++++++++++++++++++++++ drivers/dma/dw/internal.h | 10 +- drivers/dma/dw/pci.c | 45 ++++++--- drivers/dma/dw/platform.c | 8 +- drivers/dma/dw/regs.h | 13 +++ include/linux/dma/dw.h | 4 + include/linux/platform_data/dma-dw.h | 2 - 10 files changed, 343 insertions(+), 181 deletions(-) create mode 100644 drivers/dma/dw/dw.c create mode 100644 drivers/dma/dw/idma32.c (limited to 'include/linux') diff --git a/drivers/dma/dw/Makefile b/drivers/dma/dw/Makefile index 2b949c2e4504..63ed895c09aa 100644 --- a/drivers/dma/dw/Makefile +++ b/drivers/dma/dw/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_DW_DMAC_CORE) += dw_dmac_core.o -dw_dmac_core-objs := core.o +dw_dmac_core-objs := core.o dw.o idma32.o obj-$(CONFIG_DW_DMAC) += dw_dmac.o dw_dmac-objs := platform.o diff --git a/drivers/dma/dw/core.c b/drivers/dma/dw/core.c index 4982e443869c..8a581d86ea8d 100644 --- a/drivers/dma/dw/core.c +++ b/drivers/dma/dw/core.c @@ -138,44 +138,6 @@ static void dwc_desc_put(struct dw_dma_chan *dwc, struct dw_desc *desc) dwc->descs_allocated--; } -static void dwc_initialize_chan_idma32(struct dw_dma_chan *dwc) -{ - u32 cfghi = 0; - u32 cfglo = 0; - - /* Set default burst alignment */ - cfglo |= IDMA32C_CFGL_DST_BURST_ALIGN | IDMA32C_CFGL_SRC_BURST_ALIGN; - - /* Low 4 bits of the request lines */ - cfghi |= IDMA32C_CFGH_DST_PER(dwc->dws.dst_id & 0xf); - cfghi |= IDMA32C_CFGH_SRC_PER(dwc->dws.src_id & 0xf); - - /* Request line extension (2 bits) */ - cfghi |= IDMA32C_CFGH_DST_PER_EXT(dwc->dws.dst_id >> 4 & 0x3); - cfghi |= IDMA32C_CFGH_SRC_PER_EXT(dwc->dws.src_id >> 4 & 0x3); - - channel_writel(dwc, CFG_LO, cfglo); - channel_writel(dwc, CFG_HI, cfghi); -} - -static void dwc_initialize_chan_dw(struct dw_dma_chan *dwc) -{ - struct dw_dma *dw = to_dw_dma(dwc->chan.device); - u32 cfghi = DWC_CFGH_FIFO_MODE; - u32 cfglo = DWC_CFGL_CH_PRIOR(dwc->priority); - bool hs_polarity = dwc->dws.hs_polarity; - - cfghi |= DWC_CFGH_DST_PER(dwc->dws.dst_id); - cfghi |= DWC_CFGH_SRC_PER(dwc->dws.src_id); - cfghi |= DWC_CFGH_PROTCTL(dw->pdata->protctl); - - /* Set polarity of handshake interface */ - cfglo |= hs_polarity ? DWC_CFGL_HS_DST_POL | DWC_CFGL_HS_SRC_POL : 0; - - channel_writel(dwc, CFG_LO, cfglo); - channel_writel(dwc, CFG_HI, cfghi); -} - static void dwc_initialize(struct dw_dma_chan *dwc) { struct dw_dma *dw = to_dw_dma(dwc->chan.device); @@ -183,10 +145,7 @@ static void dwc_initialize(struct dw_dma_chan *dwc) if (test_bit(DW_DMA_IS_INITIALIZED, &dwc->flags)) return; - if (dw->pdata->is_idma32) - dwc_initialize_chan_idma32(dwc); - else - dwc_initialize_chan_dw(dwc); + dw->initialize_chan(dwc); /* Enable interrupts */ channel_set_bit(dw, MASK.XFER, dwc->mask); @@ -215,37 +174,6 @@ static inline void dwc_chan_disable(struct dw_dma *dw, struct dw_dma_chan *dwc) cpu_relax(); } -static u32 bytes2block(struct dw_dma_chan *dwc, size_t bytes, - unsigned int width, size_t *len) -{ - struct dw_dma *dw = to_dw_dma(dwc->chan.device); - u32 block; - - /* Always in bytes for iDMA 32-bit */ - if (dw->pdata->is_idma32) - width = 0; - - if ((bytes >> width) > dwc->block_size) { - block = dwc->block_size; - *len = block << width; - } else { - block = bytes >> width; - *len = bytes; - } - - return block; -} - -static size_t block2bytes(struct dw_dma_chan *dwc, u32 block, u32 width) -{ - struct dw_dma *dw = to_dw_dma(dwc->chan.device); - - if (dw->pdata->is_idma32) - return IDMA32C_CTLH_BLOCK_TS(block); - - return DWC_CTLH_BLOCK_TS(block) << width; -} - /*----------------------------------------------------------------------*/ /* Perform single block transfer */ @@ -391,10 +319,11 @@ static void dwc_complete_all(struct dw_dma *dw, struct dw_dma_chan *dwc) /* Returns how many bytes were already received from source */ static inline u32 dwc_get_sent(struct dw_dma_chan *dwc) { + struct dw_dma *dw = to_dw_dma(dwc->chan.device); u32 ctlhi = channel_readl(dwc, CTL_HI); u32 ctllo = channel_readl(dwc, CTL_LO); - return block2bytes(dwc, ctlhi, ctllo >> 4 & 7); + return dw->block2bytes(dwc, ctlhi, ctllo >> 4 & 7); } static void dwc_scan_descriptors(struct dw_dma *dw, struct dw_dma_chan *dwc) @@ -651,7 +580,7 @@ dwc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, unsigned int src_width; unsigned int dst_width; unsigned int data_width = dw->pdata->data_width[m_master]; - u32 ctllo; + u32 ctllo, ctlhi; u8 lms = DWC_LLP_LMS(m_master); dev_vdbg(chan2dev(chan), @@ -680,10 +609,12 @@ dwc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, if (!desc) goto err_desc_get; + ctlhi = dw->bytes2block(dwc, len - offset, src_width, &xfer_count); + lli_write(desc, sar, src + offset); lli_write(desc, dar, dest + offset); lli_write(desc, ctllo, ctllo); - lli_write(desc, ctlhi, bytes2block(dwc, len - offset, src_width, &xfer_count)); + lli_write(desc, ctlhi, ctlhi); desc->len = xfer_count; if (!first) { @@ -721,7 +652,7 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, struct dma_slave_config *sconfig = &dwc->dma_sconfig; struct dw_desc *prev; struct dw_desc *first; - u32 ctllo; + u32 ctllo, ctlhi; u8 m_master = dwc->dws.m_master; u8 lms = DWC_LLP_LMS(m_master); dma_addr_t reg; @@ -768,9 +699,11 @@ slave_sg_todev_fill_desc: if (!desc) goto err_desc_get; + ctlhi = dw->bytes2block(dwc, len, mem_width, &dlen); + lli_write(desc, sar, mem); lli_write(desc, dar, reg); - lli_write(desc, ctlhi, bytes2block(dwc, len, mem_width, &dlen)); + lli_write(desc, ctlhi, ctlhi); lli_write(desc, ctllo, ctllo | DWC_CTLL_SRC_WIDTH(mem_width)); desc->len = dlen; @@ -814,9 +747,11 @@ slave_sg_fromdev_fill_desc: if (!desc) goto err_desc_get; + ctlhi = dw->bytes2block(dwc, len, reg_width, &dlen); + lli_write(desc, sar, reg); lli_write(desc, dar, mem); - lli_write(desc, ctlhi, bytes2block(dwc, len, reg_width, &dlen)); + lli_write(desc, ctlhi, ctlhi); mem_width = __ffs(data_width | mem | dlen); lli_write(desc, ctllo, ctllo | DWC_CTLL_DST_WIDTH(mem_width)); desc->len = dlen; @@ -876,22 +811,12 @@ EXPORT_SYMBOL_GPL(dw_dma_filter); static int dwc_config(struct dma_chan *chan, struct dma_slave_config *sconfig) { struct dw_dma_chan *dwc = to_dw_dma_chan(chan); - struct dma_slave_config *sc = &dwc->dma_sconfig; struct dw_dma *dw = to_dw_dma(chan->device); - /* - * Fix sconfig's burst size according to dw_dmac. We need to convert - * them as: - * 1 -> 0, 4 -> 1, 8 -> 2, 16 -> 3. - * - * NOTE: burst size 2 is not supported by DesignWare controller. - * iDMA 32-bit supports it. - */ - u32 s = dw->pdata->is_idma32 ? 1 : 2; memcpy(&dwc->dma_sconfig, sconfig, sizeof(*sconfig)); - sc->src_maxburst = sc->src_maxburst > 1 ? fls(sc->src_maxburst) - s : 0; - sc->dst_maxburst = sc->dst_maxburst > 1 ? fls(sc->dst_maxburst) - s : 0; + dw->encode_maxburst(dwc, &dwc->dma_sconfig.src_maxburst); + dw->encode_maxburst(dwc, &dwc->dma_sconfig.dst_maxburst); return 0; } @@ -900,16 +825,9 @@ static void dwc_chan_pause(struct dw_dma_chan *dwc, bool drain) { struct dw_dma *dw = to_dw_dma(dwc->chan.device); unsigned int count = 20; /* timeout iterations */ - u32 cfglo; - cfglo = channel_readl(dwc, CFG_LO); - if (dw->pdata->is_idma32) { - if (drain) - cfglo |= IDMA32C_CFGL_CH_DRAIN; - else - cfglo &= ~IDMA32C_CFGL_CH_DRAIN; - } - channel_writel(dwc, CFG_LO, cfglo | DWC_CFGL_CH_SUSP); + dw->suspend_chan(dwc, drain); + while (!(channel_readl(dwc, CFG_LO) & DWC_CFGL_FIFO_EMPTY) && count--) udelay(2); @@ -1058,33 +976,7 @@ static void dwc_issue_pending(struct dma_chan *chan) /*----------------------------------------------------------------------*/ -/* - * Program FIFO size of channels. - * - * By default full FIFO (512 bytes) is assigned to channel 0. Here we - * slice FIFO on equal parts between channels. - */ -static void idma32_fifo_partition(struct dw_dma *dw) -{ - u64 value = IDMA32C_FP_PSIZE_CH0(64) | IDMA32C_FP_PSIZE_CH1(64) | - IDMA32C_FP_UPDATE; - u64 fifo_partition = 0; - - if (!dw->pdata->is_idma32) - return; - - /* Fill FIFO_PARTITION low bits (Channels 0..1, 4..5) */ - fifo_partition |= value << 0; - - /* Fill FIFO_PARTITION high bits (Channels 2..3, 6..7) */ - fifo_partition |= value << 32; - - /* Program FIFO Partition registers - 64 bytes per channel */ - idma32_writeq(dw, FIFO_PARTITION1, fifo_partition); - idma32_writeq(dw, FIFO_PARTITION0, fifo_partition); -} - -static void dw_dma_off(struct dw_dma *dw) +void do_dw_dma_off(struct dw_dma *dw) { unsigned int i; @@ -1103,7 +995,7 @@ static void dw_dma_off(struct dw_dma *dw) clear_bit(DW_DMA_IS_INITIALIZED, &dw->chan[i].flags); } -static void dw_dma_on(struct dw_dma *dw) +void do_dw_dma_on(struct dw_dma *dw) { dma_writel(dw, CFG, DW_CFG_DMA_EN); } @@ -1139,7 +1031,7 @@ static int dwc_alloc_chan_resources(struct dma_chan *chan) /* Enable controller here if needed */ if (!dw->in_use) - dw_dma_on(dw); + do_dw_dma_on(dw); dw->in_use |= dwc->mask; return 0; @@ -1177,30 +1069,25 @@ static void dwc_free_chan_resources(struct dma_chan *chan) /* Disable controller in case it was a last user */ dw->in_use &= ~dwc->mask; if (!dw->in_use) - dw_dma_off(dw); + do_dw_dma_off(dw); dev_vdbg(chan2dev(chan), "%s: done\n", __func__); } -int dw_dma_probe(struct dw_dma_chip *chip) +int do_dma_probe(struct dw_dma_chip *chip) { + struct dw_dma *dw = chip->dw; struct dw_dma_platform_data *pdata; - struct dw_dma *dw; bool autocfg = false; unsigned int dw_params; unsigned int i; int err; - dw = devm_kzalloc(chip->dev, sizeof(*dw), GFP_KERNEL); - if (!dw) - return -ENOMEM; - dw->pdata = devm_kzalloc(chip->dev, sizeof(*dw->pdata), GFP_KERNEL); if (!dw->pdata) return -ENOMEM; dw->regs = chip->regs; - chip->dw = dw; pm_runtime_get_sync(chip->dev); @@ -1250,15 +1137,10 @@ int dw_dma_probe(struct dw_dma_chip *chip) dw->all_chan_mask = (1 << pdata->nr_channels) - 1; /* Force dma off, just in case */ - dw_dma_off(dw); - - idma32_fifo_partition(dw); + dw->disable(dw); /* Device and instance ID for IRQ and DMA pool */ - if (pdata->is_idma32) - snprintf(dw->name, sizeof(dw->name), "idma32:dmac%d", chip->id); - else - snprintf(dw->name, sizeof(dw->name), "dw:dmac%d", chip->id); + dw->set_device_name(dw, chip->id); /* Create a pool of consistent memory blocks for hardware descriptors */ dw->desc_pool = dmam_pool_create(dw->name, chip->dev, @@ -1380,16 +1262,15 @@ err_pdata: pm_runtime_put_sync_suspend(chip->dev); return err; } -EXPORT_SYMBOL_GPL(dw_dma_probe); -int dw_dma_remove(struct dw_dma_chip *chip) +int do_dma_remove(struct dw_dma_chip *chip) { struct dw_dma *dw = chip->dw; struct dw_dma_chan *dwc, *_dwc; pm_runtime_get_sync(chip->dev); - dw_dma_off(dw); + do_dw_dma_off(dw); dma_async_device_unregister(&dw->dma); free_irq(chip->irq, dw); @@ -1404,27 +1285,24 @@ int dw_dma_remove(struct dw_dma_chip *chip) pm_runtime_put_sync_suspend(chip->dev); return 0; } -EXPORT_SYMBOL_GPL(dw_dma_remove); -int dw_dma_disable(struct dw_dma_chip *chip) +int do_dw_dma_disable(struct dw_dma_chip *chip) { struct dw_dma *dw = chip->dw; - dw_dma_off(dw); + dw->disable(dw); return 0; } -EXPORT_SYMBOL_GPL(dw_dma_disable); +EXPORT_SYMBOL_GPL(do_dw_dma_disable); -int dw_dma_enable(struct dw_dma_chip *chip) +int do_dw_dma_enable(struct dw_dma_chip *chip) { struct dw_dma *dw = chip->dw; - idma32_fifo_partition(dw); - - dw_dma_on(dw); + dw->enable(dw); return 0; } -EXPORT_SYMBOL_GPL(dw_dma_enable); +EXPORT_SYMBOL_GPL(do_dw_dma_enable); MODULE_LICENSE("GPL v2"); MODULE_DESCRIPTION("Synopsys DesignWare DMA Controller core driver"); diff --git a/drivers/dma/dw/dw.c b/drivers/dma/dw/dw.c new file mode 100644 index 000000000000..977aa28bf81d --- /dev/null +++ b/drivers/dma/dw/dw.c @@ -0,0 +1,112 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2007-2008 Atmel Corporation +// Copyright (C) 2010-2011 ST Microelectronics +// Copyright (C) 2013,2018 Intel Corporation + +#include +#include +#include +#include + +#include "internal.h" + +static void dw_dma_initialize_chan(struct dw_dma_chan *dwc) +{ + struct dw_dma *dw = to_dw_dma(dwc->chan.device); + u32 cfghi = DWC_CFGH_FIFO_MODE; + u32 cfglo = DWC_CFGL_CH_PRIOR(dwc->priority); + bool hs_polarity = dwc->dws.hs_polarity; + + cfghi |= DWC_CFGH_DST_PER(dwc->dws.dst_id); + cfghi |= DWC_CFGH_SRC_PER(dwc->dws.src_id); + cfghi |= DWC_CFGH_PROTCTL(dw->pdata->protctl); + + /* Set polarity of handshake interface */ + cfglo |= hs_polarity ? DWC_CFGL_HS_DST_POL | DWC_CFGL_HS_SRC_POL : 0; + + channel_writel(dwc, CFG_LO, cfglo); + channel_writel(dwc, CFG_HI, cfghi); +} + +static void dw_dma_suspend_chan(struct dw_dma_chan *dwc, bool drain) +{ + u32 cfglo = channel_readl(dwc, CFG_LO); + + channel_writel(dwc, CFG_LO, cfglo | DWC_CFGL_CH_SUSP); +} + +static u32 dw_dma_bytes2block(struct dw_dma_chan *dwc, + size_t bytes, unsigned int width, size_t *len) +{ + u32 block; + + if ((bytes >> width) > dwc->block_size) { + block = dwc->block_size; + *len = dwc->block_size << width; + } else { + block = bytes >> width; + *len = bytes; + } + + return block; +} + +static size_t dw_dma_block2bytes(struct dw_dma_chan *dwc, u32 block, u32 width) +{ + return DWC_CTLH_BLOCK_TS(block) << width; +} + +static void dw_dma_encode_maxburst(struct dw_dma_chan *dwc, u32 *maxburst) +{ + /* + * Fix burst size according to dw_dmac. We need to convert them as: + * 1 -> 0, 4 -> 1, 8 -> 2, 16 -> 3. + */ + *maxburst = *maxburst > 1 ? fls(*maxburst) - 2 : 0; +} + +static void dw_dma_set_device_name(struct dw_dma *dw, int id) +{ + snprintf(dw->name, sizeof(dw->name), "dw:dmac%d", id); +} + +static void dw_dma_disable(struct dw_dma *dw) +{ + do_dw_dma_off(dw); +} + +static void dw_dma_enable(struct dw_dma *dw) +{ + do_dw_dma_on(dw); +} + +int dw_dma_probe(struct dw_dma_chip *chip) +{ + struct dw_dma *dw; + + dw = devm_kzalloc(chip->dev, sizeof(*dw), GFP_KERNEL); + if (!dw) + return -ENOMEM; + + /* Channel operations */ + dw->initialize_chan = dw_dma_initialize_chan; + dw->suspend_chan = dw_dma_suspend_chan; + dw->encode_maxburst = dw_dma_encode_maxburst; + dw->bytes2block = dw_dma_bytes2block; + dw->block2bytes = dw_dma_block2bytes; + + /* Device operations */ + dw->set_device_name = dw_dma_set_device_name; + dw->disable = dw_dma_disable; + dw->enable = dw_dma_enable; + + chip->dw = dw; + return do_dma_probe(chip); +} +EXPORT_SYMBOL_GPL(dw_dma_probe); + +int dw_dma_remove(struct dw_dma_chip *chip) +{ + return do_dma_remove(chip); +} +EXPORT_SYMBOL_GPL(dw_dma_remove); diff --git a/drivers/dma/dw/idma32.c b/drivers/dma/dw/idma32.c new file mode 100644 index 000000000000..8707830f39ad --- /dev/null +++ b/drivers/dma/dw/idma32.c @@ -0,0 +1,138 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2013,2018 Intel Corporation + +#include +#include +#include +#include + +#include "internal.h" + +static void idma32_initialize_chan(struct dw_dma_chan *dwc) +{ + u32 cfghi = 0; + u32 cfglo = 0; + + /* Set default burst alignment */ + cfglo |= IDMA32C_CFGL_DST_BURST_ALIGN | IDMA32C_CFGL_SRC_BURST_ALIGN; + + /* Low 4 bits of the request lines */ + cfghi |= IDMA32C_CFGH_DST_PER(dwc->dws.dst_id & 0xf); + cfghi |= IDMA32C_CFGH_SRC_PER(dwc->dws.src_id & 0xf); + + /* Request line extension (2 bits) */ + cfghi |= IDMA32C_CFGH_DST_PER_EXT(dwc->dws.dst_id >> 4 & 0x3); + cfghi |= IDMA32C_CFGH_SRC_PER_EXT(dwc->dws.src_id >> 4 & 0x3); + + channel_writel(dwc, CFG_LO, cfglo); + channel_writel(dwc, CFG_HI, cfghi); +} + +static void idma32_suspend_chan(struct dw_dma_chan *dwc, bool drain) +{ + u32 cfglo = channel_readl(dwc, CFG_LO); + + if (drain) + cfglo |= IDMA32C_CFGL_CH_DRAIN; + else + cfglo &= ~IDMA32C_CFGL_CH_DRAIN; + + channel_writel(dwc, CFG_LO, cfglo | DWC_CFGL_CH_SUSP); +} + +static u32 idma32_bytes2block(struct dw_dma_chan *dwc, + size_t bytes, unsigned int width, size_t *len) +{ + u32 block; + + if (bytes > dwc->block_size) { + block = dwc->block_size; + *len = dwc->block_size; + } else { + block = bytes; + *len = bytes; + } + + return block; +} + +static size_t idma32_block2bytes(struct dw_dma_chan *dwc, u32 block, u32 width) +{ + return IDMA32C_CTLH_BLOCK_TS(block); +} + +static void idma32_encode_maxburst(struct dw_dma_chan *dwc, u32 *maxburst) +{ + *maxburst = *maxburst > 1 ? fls(*maxburst) - 1 : 0; +} + +static void idma32_set_device_name(struct dw_dma *dw, int id) +{ + snprintf(dw->name, sizeof(dw->name), "idma32:dmac%d", id); +} + +/* + * Program FIFO size of channels. + * + * By default full FIFO (512 bytes) is assigned to channel 0. Here we + * slice FIFO on equal parts between channels. + */ +static void idma32_fifo_partition(struct dw_dma *dw) +{ + u64 value = IDMA32C_FP_PSIZE_CH0(64) | IDMA32C_FP_PSIZE_CH1(64) | + IDMA32C_FP_UPDATE; + u64 fifo_partition = 0; + + /* Fill FIFO_PARTITION low bits (Channels 0..1, 4..5) */ + fifo_partition |= value << 0; + + /* Fill FIFO_PARTITION high bits (Channels 2..3, 6..7) */ + fifo_partition |= value << 32; + + /* Program FIFO Partition registers - 64 bytes per channel */ + idma32_writeq(dw, FIFO_PARTITION1, fifo_partition); + idma32_writeq(dw, FIFO_PARTITION0, fifo_partition); +} + +static void idma32_disable(struct dw_dma *dw) +{ + do_dw_dma_off(dw); + idma32_fifo_partition(dw); +} + +static void idma32_enable(struct dw_dma *dw) +{ + idma32_fifo_partition(dw); + do_dw_dma_on(dw); +} + +int idma32_dma_probe(struct dw_dma_chip *chip) +{ + struct dw_dma *dw; + + dw = devm_kzalloc(chip->dev, sizeof(*dw), GFP_KERNEL); + if (!dw) + return -ENOMEM; + + /* Channel operations */ + dw->initialize_chan = idma32_initialize_chan; + dw->suspend_chan = idma32_suspend_chan; + dw->encode_maxburst = idma32_encode_maxburst; + dw->bytes2block = idma32_bytes2block; + dw->block2bytes = idma32_block2bytes; + + /* Device operations */ + dw->set_device_name = idma32_set_device_name; + dw->disable = idma32_disable; + dw->enable = idma32_enable; + + chip->dw = dw; + return do_dma_probe(chip); +} +EXPORT_SYMBOL_GPL(idma32_dma_probe); + +int idma32_dma_remove(struct dw_dma_chip *chip) +{ + return do_dma_remove(chip); +} +EXPORT_SYMBOL_GPL(idma32_dma_remove); diff --git a/drivers/dma/dw/internal.h b/drivers/dma/dw/internal.h index 41439732ff6b..fdcac21ea665 100644 --- a/drivers/dma/dw/internal.h +++ b/drivers/dma/dw/internal.h @@ -15,8 +15,14 @@ #include "regs.h" -int dw_dma_disable(struct dw_dma_chip *chip); -int dw_dma_enable(struct dw_dma_chip *chip); +int do_dma_probe(struct dw_dma_chip *chip); +int do_dma_remove(struct dw_dma_chip *chip); + +void do_dw_dma_on(struct dw_dma *dw); +void do_dw_dma_off(struct dw_dma *dw); + +int do_dw_dma_disable(struct dw_dma_chip *chip); +int do_dw_dma_enable(struct dw_dma_chip *chip); extern bool dw_dma_filter(struct dma_chan *chan, void *param); diff --git a/drivers/dma/dw/pci.c b/drivers/dma/dw/pci.c index 66d98d7ccad0..e9ba25b4f950 100644 --- a/drivers/dma/dw/pci.c +++ b/drivers/dma/dw/pci.c @@ -15,9 +15,17 @@ #include "internal.h" -static struct dw_dma_platform_data mrfld_pdata = { +struct dw_dma_pci_data { + const struct dw_dma_platform_data *pdata; + int (*probe)(struct dw_dma_chip *chip); +}; + +static const struct dw_dma_pci_data dw_pci_data = { + .probe = dw_dma_probe, +}; + +static const struct dw_dma_platform_data idma32_pdata = { .nr_channels = 8, - .is_idma32 = true, .chan_allocation_order = CHAN_ALLOCATION_ASCENDING, .chan_priority = CHAN_PRIORITY_ASCENDING, .block_size = 131071, @@ -26,9 +34,14 @@ static struct dw_dma_platform_data mrfld_pdata = { .multi_block = {1, 1, 1, 1, 1, 1, 1, 1}, }; +static const struct dw_dma_pci_data idma32_pci_data = { + .pdata = &idma32_pdata, + .probe = idma32_dma_probe, +}; + static int dw_pci_probe(struct pci_dev *pdev, const struct pci_device_id *pid) { - const struct dw_dma_platform_data *pdata = (void *)pid->driver_data; + const struct dw_dma_pci_data *data = (void *)pid->driver_data; struct dw_dma_chip *chip; int ret; @@ -61,9 +74,9 @@ static int dw_pci_probe(struct pci_dev *pdev, const struct pci_device_id *pid) chip->id = pdev->devfn; chip->regs = pcim_iomap_table(pdev)[0]; chip->irq = pdev->irq; - chip->pdata = pdata; + chip->pdata = data->pdata; - ret = dw_dma_probe(chip); + ret = data->probe(chip); if (ret) return ret; @@ -89,7 +102,7 @@ static int dw_pci_suspend_late(struct device *dev) struct pci_dev *pci = to_pci_dev(dev); struct dw_dma_chip *chip = pci_get_drvdata(pci); - return dw_dma_disable(chip); + return do_dw_dma_disable(chip); }; static int dw_pci_resume_early(struct device *dev) @@ -97,7 +110,7 @@ static int dw_pci_resume_early(struct device *dev) struct pci_dev *pci = to_pci_dev(dev); struct dw_dma_chip *chip = pci_get_drvdata(pci); - return dw_dma_enable(chip); + return do_dw_dma_enable(chip); }; #endif /* CONFIG_PM_SLEEP */ @@ -108,24 +121,24 @@ static const struct dev_pm_ops dw_pci_dev_pm_ops = { static const struct pci_device_id dw_pci_id_table[] = { /* Medfield (GPDMA) */ - { PCI_VDEVICE(INTEL, 0x0827) }, + { PCI_VDEVICE(INTEL, 0x0827), (kernel_ulong_t)&dw_pci_data }, /* BayTrail */ - { PCI_VDEVICE(INTEL, 0x0f06) }, - { PCI_VDEVICE(INTEL, 0x0f40) }, + { PCI_VDEVICE(INTEL, 0x0f06), (kernel_ulong_t)&dw_pci_data }, + { PCI_VDEVICE(INTEL, 0x0f40), (kernel_ulong_t)&dw_pci_data }, - /* Merrifield iDMA 32-bit (GPDMA) */ - { PCI_VDEVICE(INTEL, 0x11a2), (kernel_ulong_t)&mrfld_pdata }, + /* Merrifield */ + { PCI_VDEVICE(INTEL, 0x11a2), (kernel_ulong_t)&idma32_pci_data }, /* Braswell */ - { PCI_VDEVICE(INTEL, 0x2286) }, - { PCI_VDEVICE(INTEL, 0x22c0) }, + { PCI_VDEVICE(INTEL, 0x2286), (kernel_ulong_t)&dw_pci_data }, + { PCI_VDEVICE(INTEL, 0x22c0), (kernel_ulong_t)&dw_pci_data }, /* Haswell */ - { PCI_VDEVICE(INTEL, 0x9c60) }, + { PCI_VDEVICE(INTEL, 0x9c60), (kernel_ulong_t)&dw_pci_data }, /* Broadwell */ - { PCI_VDEVICE(INTEL, 0x9ce0) }, + { PCI_VDEVICE(INTEL, 0x9ce0), (kernel_ulong_t)&dw_pci_data }, { } }; diff --git a/drivers/dma/dw/platform.c b/drivers/dma/dw/platform.c index 58fc1ba02a1e..d5196c97e4f4 100644 --- a/drivers/dma/dw/platform.c +++ b/drivers/dma/dw/platform.c @@ -255,7 +255,7 @@ static void dw_shutdown(struct platform_device *pdev) struct dw_dma_chip *chip = platform_get_drvdata(pdev); /* - * We have to call dw_dma_disable() to stop any ongoing transfer. On + * We have to call do_dw_dma_disable() to stop any ongoing transfer. On * some platforms we can't do that since DMA device is powered off. * Moreover we have no possibility to check if the platform is affected * or not. That's why we call pm_runtime_get_sync() / pm_runtime_put() @@ -264,7 +264,7 @@ static void dw_shutdown(struct platform_device *pdev) * used by the driver. */ pm_runtime_get_sync(chip->dev); - dw_dma_disable(chip); + do_dw_dma_disable(chip); pm_runtime_put_sync_suspend(chip->dev); clk_disable_unprepare(chip->clk); @@ -294,7 +294,7 @@ static int dw_suspend_late(struct device *dev) { struct dw_dma_chip *chip = dev_get_drvdata(dev); - dw_dma_disable(chip); + do_dw_dma_disable(chip); clk_disable_unprepare(chip->clk); return 0; @@ -309,7 +309,7 @@ static int dw_resume_early(struct device *dev) if (ret) return ret; - return dw_dma_enable(chip); + return do_dw_dma_enable(chip); } #endif /* CONFIG_PM_SLEEP */ diff --git a/drivers/dma/dw/regs.h b/drivers/dma/dw/regs.h index 646c9c960c07..66aa8b227248 100644 --- a/drivers/dma/dw/regs.h +++ b/drivers/dma/dw/regs.h @@ -312,6 +312,19 @@ struct dw_dma { u8 all_chan_mask; u8 in_use; + /* Channel operations */ + void (*initialize_chan)(struct dw_dma_chan *dwc); + void (*suspend_chan)(struct dw_dma_chan *dwc, bool drain); + void (*encode_maxburst)(struct dw_dma_chan *dwc, u32 *maxburst); + u32 (*bytes2block)(struct dw_dma_chan *dwc, size_t bytes, + unsigned int width, size_t *len); + size_t (*block2bytes)(struct dw_dma_chan *dwc, u32 block, u32 width); + + /* Device operations */ + void (*set_device_name)(struct dw_dma *dw, int id); + void (*disable)(struct dw_dma *dw); + void (*enable)(struct dw_dma *dw); + /* platform data */ struct dw_dma_platform_data *pdata; }; diff --git a/include/linux/dma/dw.h b/include/linux/dma/dw.h index e166cac8e870..d643d331c20e 100644 --- a/include/linux/dma/dw.h +++ b/include/linux/dma/dw.h @@ -45,9 +45,13 @@ struct dw_dma_chip { #if IS_ENABLED(CONFIG_DW_DMAC_CORE) int dw_dma_probe(struct dw_dma_chip *chip); int dw_dma_remove(struct dw_dma_chip *chip); +int idma32_dma_probe(struct dw_dma_chip *chip); +int idma32_dma_remove(struct dw_dma_chip *chip); #else static inline int dw_dma_probe(struct dw_dma_chip *chip) { return -ENODEV; } static inline int dw_dma_remove(struct dw_dma_chip *chip) { return 0; } +static inline int idma32_dma_probe(struct dw_dma_chip *chip) { return -ENODEV; } +static inline int idma32_dma_remove(struct dw_dma_chip *chip) { return 0; } #endif /* CONFIG_DW_DMAC_CORE */ #endif /* _DMA_DW_H */ diff --git a/include/linux/platform_data/dma-dw.h b/include/linux/platform_data/dma-dw.h index 1c85eeee4171..576048433809 100644 --- a/include/linux/platform_data/dma-dw.h +++ b/include/linux/platform_data/dma-dw.h @@ -38,7 +38,6 @@ struct dw_dma_slave { /** * struct dw_dma_platform_data - Controller configuration parameters * @nr_channels: Number of channels supported by hardware (max 8) - * @is_idma32: The type of the DMA controller is iDMA32 * @chan_allocation_order: Allocate channels starting from 0 or 7 * @chan_priority: Set channel priority increasing from 0 to 7 or 7 to 0. * @block_size: Maximum block size supported by the controller @@ -50,7 +49,6 @@ struct dw_dma_slave { */ struct dw_dma_platform_data { unsigned int nr_channels; - bool is_idma32; #define CHAN_ALLOCATION_ASCENDING 0 /* zero to seven */ #define CHAN_ALLOCATION_DESCENDING 1 /* seven to zero */ unsigned char chan_allocation_order; -- cgit v1.2.3 From b466a37fbcc99ef79ea59e40ef6aa8391430b0d8 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 7 Jan 2019 13:07:41 +0200 Subject: dmaengine: dw: convert to SPDX identifiers This patch updates license to use SPDX-License-Identifier instead of verbose license text. Signed-off-by: Andy Shevchenko Signed-off-by: Vinod Koul --- drivers/dma/dw/Kconfig | 2 ++ drivers/dma/dw/core.c | 5 +---- drivers/dma/dw/internal.h | 5 +---- drivers/dma/dw/pci.c | 5 +---- drivers/dma/dw/platform.c | 5 +---- drivers/dma/dw/regs.h | 5 +---- include/linux/dma/dw.h | 5 +---- include/linux/platform_data/dma-dw.h | 5 +---- 8 files changed, 9 insertions(+), 28 deletions(-) (limited to 'include/linux') diff --git a/drivers/dma/dw/Kconfig b/drivers/dma/dw/Kconfig index 04b9728c1d26..e5162690de8f 100644 --- a/drivers/dma/dw/Kconfig +++ b/drivers/dma/dw/Kconfig @@ -1,3 +1,5 @@ +# SPDX-License-Identifier: GPL-2.0 + # # DMA engine configuration for dw # diff --git a/drivers/dma/dw/core.c b/drivers/dma/dw/core.c index b7e4dab28f8a..3a1ab52ffae0 100644 --- a/drivers/dma/dw/core.c +++ b/drivers/dma/dw/core.c @@ -1,13 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Core driver for the Synopsys DesignWare DMA Controller * * Copyright (C) 2007-2008 Atmel Corporation * Copyright (C) 2010-2011 ST Microelectronics * Copyright (C) 2013 Intel Corporation - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include diff --git a/drivers/dma/dw/internal.h b/drivers/dma/dw/internal.h index fdcac21ea665..1dd7a4e6dd23 100644 --- a/drivers/dma/dw/internal.h +++ b/drivers/dma/dw/internal.h @@ -1,11 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Driver for the Synopsys DesignWare DMA Controller * * Copyright (C) 2013 Intel Corporation - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #ifndef _DMA_DW_INTERNAL_H diff --git a/drivers/dma/dw/pci.c b/drivers/dma/dw/pci.c index e9ba25b4f950..e79a75db0852 100644 --- a/drivers/dma/dw/pci.c +++ b/drivers/dma/dw/pci.c @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * PCI driver for the Synopsys DesignWare DMA Controller * * Copyright (C) 2013 Intel Corporation * Author: Andy Shevchenko - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include diff --git a/drivers/dma/dw/platform.c b/drivers/dma/dw/platform.c index d5196c97e4f4..382dfd9e9600 100644 --- a/drivers/dma/dw/platform.c +++ b/drivers/dma/dw/platform.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Platform driver for the Synopsys DesignWare DMA Controller * @@ -6,10 +7,6 @@ * Copyright (C) 2013 Intel Corporation * * Some parts of this driver are derived from the original dw_dmac. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include diff --git a/drivers/dma/dw/regs.h b/drivers/dma/dw/regs.h index 07f91325e559..3fce66ecee7a 100644 --- a/drivers/dma/dw/regs.h +++ b/drivers/dma/dw/regs.h @@ -1,13 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Driver for the Synopsys DesignWare AHB DMA Controller * * Copyright (C) 2005-2007 Atmel Corporation * Copyright (C) 2010-2011 ST Microelectronics * Copyright (C) 2016 Intel Corporation - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include diff --git a/include/linux/dma/dw.h b/include/linux/dma/dw.h index d643d331c20e..9752f3745f76 100644 --- a/include/linux/dma/dw.h +++ b/include/linux/dma/dw.h @@ -1,13 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Driver for the Synopsys DesignWare DMA Controller * * Copyright (C) 2007 Atmel Corporation * Copyright (C) 2010-2011 ST Microelectronics * Copyright (C) 2014 Intel Corporation - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #ifndef _DMA_DW_H #define _DMA_DW_H diff --git a/include/linux/platform_data/dma-dw.h b/include/linux/platform_data/dma-dw.h index 576048433809..f3eaf9ec00a1 100644 --- a/include/linux/platform_data/dma-dw.h +++ b/include/linux/platform_data/dma-dw.h @@ -1,12 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Driver for the Synopsys DesignWare DMA Controller * * Copyright (C) 2007 Atmel Corporation * Copyright (C) 2010-2011 ST Microelectronics - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #ifndef _PLATFORM_DATA_DMA_DW_H #define _PLATFORM_DATA_DMA_DW_H -- cgit v1.2.3 From d0dcde6426ce071ad447fb9d91c85ab649026114 Mon Sep 17 00:00:00 2001 From: Otto Sabart Date: Sun, 6 Jan 2019 00:29:15 +0100 Subject: doc: networking: convert offload files into RST and update references This patch renames offload files. This is necessary for Sphinx. Also update reference to checksum-offloads.rst file. Whole kernel code was grepped for references using: $ grep -r "\(segmentation\|checksum\)-offloads.txt" . There should be no other references to {segmentation,checksum}-offloads.txt files. Signed-off-by: Otto Sabart Acked-by: David S. Miller Signed-off-by: Jonathan Corbet --- Documentation/networking/checksum-offloads.rst | 143 ++++++++++++++++ Documentation/networking/checksum-offloads.txt | 143 ---------------- Documentation/networking/segmentation-offloads.rst | 184 +++++++++++++++++++++ Documentation/networking/segmentation-offloads.txt | 184 --------------------- include/linux/skbuff.h | 2 +- 5 files changed, 328 insertions(+), 328 deletions(-) create mode 100644 Documentation/networking/checksum-offloads.rst delete mode 100644 Documentation/networking/checksum-offloads.txt create mode 100644 Documentation/networking/segmentation-offloads.rst delete mode 100644 Documentation/networking/segmentation-offloads.txt (limited to 'include/linux') diff --git a/Documentation/networking/checksum-offloads.rst b/Documentation/networking/checksum-offloads.rst new file mode 100644 index 000000000000..1a1cd94a3f6d --- /dev/null +++ b/Documentation/networking/checksum-offloads.rst @@ -0,0 +1,143 @@ +.. SPDX-License-Identifier: GPL-2.0 + +=============================================== +Checksum Offloads in the Linux Networking Stack +=============================================== + + +Introduction +============ + +This document describes a set of techniques in the Linux networking stack to +take advantage of checksum offload capabilities of various NICs. + +The following technologies are described: + +* TX Checksum Offload +* LCO: Local Checksum Offload +* RCO: Remote Checksum Offload + +Things that should be documented here but aren't yet: + +* RX Checksum Offload +* CHECKSUM_UNNECESSARY conversion + + +TX Checksum Offload +=================== + +The interface for offloading a transmit checksum to a device is explained in +detail in comments near the top of include/linux/skbuff.h. + +In brief, it allows to request the device fill in a single ones-complement +checksum defined by the sk_buff fields skb->csum_start and skb->csum_offset. +The device should compute the 16-bit ones-complement checksum (i.e. the +'IP-style' checksum) from csum_start to the end of the packet, and fill in the +result at (csum_start + csum_offset). + +Because csum_offset cannot be negative, this ensures that the previous value of +the checksum field is included in the checksum computation, thus it can be used +to supply any needed corrections to the checksum (such as the sum of the +pseudo-header for UDP or TCP). + +This interface only allows a single checksum to be offloaded. Where +encapsulation is used, the packet may have multiple checksum fields in +different header layers, and the rest will have to be handled by another +mechanism such as LCO or RCO. + +CRC32c can also be offloaded using this interface, by means of filling +skb->csum_start and skb->csum_offset as described above, and setting +skb->csum_not_inet: see skbuff.h comment (section 'D') for more details. + +No offloading of the IP header checksum is performed; it is always done in +software. This is OK because when we build the IP header, we obviously have it +in cache, so summing it isn't expensive. It's also rather short. + +The requirements for GSO are more complicated, because when segmenting an +encapsulated packet both the inner and outer checksums may need to be edited or +recomputed for each resulting segment. See the skbuff.h comment (section 'E') +for more details. + +A driver declares its offload capabilities in netdev->hw_features; see +Documentation/networking/netdev-features.txt for more. Note that a device +which only advertises NETIF_F_IP[V6]_CSUM must still obey the csum_start and +csum_offset given in the SKB; if it tries to deduce these itself in hardware +(as some NICs do) the driver should check that the values in the SKB match +those which the hardware will deduce, and if not, fall back to checksumming in +software instead (with skb_csum_hwoffload_help() or one of the +skb_checksum_help() / skb_crc32c_csum_help functions, as mentioned in +include/linux/skbuff.h). + +The stack should, for the most part, assume that checksum offload is supported +by the underlying device. The only place that should check is +validate_xmit_skb(), and the functions it calls directly or indirectly. That +function compares the offload features requested by the SKB (which may include +other offloads besides TX Checksum Offload) and, if they are not supported or +enabled on the device (determined by netdev->features), performs the +corresponding offload in software. In the case of TX Checksum Offload, that +means calling skb_csum_hwoffload_help(skb, features). + + +LCO: Local Checksum Offload +=========================== + +LCO is a technique for efficiently computing the outer checksum of an +encapsulated datagram when the inner checksum is due to be offloaded. + +The ones-complement sum of a correctly checksummed TCP or UDP packet is equal +to the complement of the sum of the pseudo header, because everything else gets +'cancelled out' by the checksum field. This is because the sum was +complemented before being written to the checksum field. + +More generally, this holds in any case where the 'IP-style' ones complement +checksum is used, and thus any checksum that TX Checksum Offload supports. + +That is, if we have set up TX Checksum Offload with a start/offset pair, we +know that after the device has filled in that checksum, the ones complement sum +from csum_start to the end of the packet will be equal to the complement of +whatever value we put in the checksum field beforehand. This allows us to +compute the outer checksum without looking at the payload: we simply stop +summing when we get to csum_start, then add the complement of the 16-bit word +at (csum_start + csum_offset). + +Then, when the true inner checksum is filled in (either by hardware or by +skb_checksum_help()), the outer checksum will become correct by virtue of the +arithmetic. + +LCO is performed by the stack when constructing an outer UDP header for an +encapsulation such as VXLAN or GENEVE, in udp_set_csum(). Similarly for the +IPv6 equivalents, in udp6_set_csum(). + +It is also performed when constructing an IPv4 GRE header, in +net/ipv4/ip_gre.c:build_header(). It is *not* currently performed when +constructing an IPv6 GRE header; the GRE checksum is computed over the whole +packet in net/ipv6/ip6_gre.c:ip6gre_xmit2(), but it should be possible to use +LCO here as IPv6 GRE still uses an IP-style checksum. + +All of the LCO implementations use a helper function lco_csum(), in +include/linux/skbuff.h. + +LCO can safely be used for nested encapsulations; in this case, the outer +encapsulation layer will sum over both its own header and the 'middle' header. +This does mean that the 'middle' header will get summed multiple times, but +there doesn't seem to be a way to avoid that without incurring bigger costs +(e.g. in SKB bloat). + + +RCO: Remote Checksum Offload +============================ + +RCO is a technique for eliding the inner checksum of an encapsulated datagram, +allowing the outer checksum to be offloaded. It does, however, involve a +change to the encapsulation protocols, which the receiver must also support. +For this reason, it is disabled by default. + +RCO is detailed in the following Internet-Drafts: + +* https://tools.ietf.org/html/draft-herbert-remotecsumoffload-00 +* https://tools.ietf.org/html/draft-herbert-vxlan-rco-00 + +In Linux, RCO is implemented individually in each encapsulation protocol, and +most tunnel types have flags controlling its use. For instance, VXLAN has the +flag VXLAN_F_REMCSUM_TX (per struct vxlan_rdst) to indicate that RCO should be +used when transmitting to a given remote destination. diff --git a/Documentation/networking/checksum-offloads.txt b/Documentation/networking/checksum-offloads.txt deleted file mode 100644 index 1a1cd94a3f6d..000000000000 --- a/Documentation/networking/checksum-offloads.txt +++ /dev/null @@ -1,143 +0,0 @@ -.. SPDX-License-Identifier: GPL-2.0 - -=============================================== -Checksum Offloads in the Linux Networking Stack -=============================================== - - -Introduction -============ - -This document describes a set of techniques in the Linux networking stack to -take advantage of checksum offload capabilities of various NICs. - -The following technologies are described: - -* TX Checksum Offload -* LCO: Local Checksum Offload -* RCO: Remote Checksum Offload - -Things that should be documented here but aren't yet: - -* RX Checksum Offload -* CHECKSUM_UNNECESSARY conversion - - -TX Checksum Offload -=================== - -The interface for offloading a transmit checksum to a device is explained in -detail in comments near the top of include/linux/skbuff.h. - -In brief, it allows to request the device fill in a single ones-complement -checksum defined by the sk_buff fields skb->csum_start and skb->csum_offset. -The device should compute the 16-bit ones-complement checksum (i.e. the -'IP-style' checksum) from csum_start to the end of the packet, and fill in the -result at (csum_start + csum_offset). - -Because csum_offset cannot be negative, this ensures that the previous value of -the checksum field is included in the checksum computation, thus it can be used -to supply any needed corrections to the checksum (such as the sum of the -pseudo-header for UDP or TCP). - -This interface only allows a single checksum to be offloaded. Where -encapsulation is used, the packet may have multiple checksum fields in -different header layers, and the rest will have to be handled by another -mechanism such as LCO or RCO. - -CRC32c can also be offloaded using this interface, by means of filling -skb->csum_start and skb->csum_offset as described above, and setting -skb->csum_not_inet: see skbuff.h comment (section 'D') for more details. - -No offloading of the IP header checksum is performed; it is always done in -software. This is OK because when we build the IP header, we obviously have it -in cache, so summing it isn't expensive. It's also rather short. - -The requirements for GSO are more complicated, because when segmenting an -encapsulated packet both the inner and outer checksums may need to be edited or -recomputed for each resulting segment. See the skbuff.h comment (section 'E') -for more details. - -A driver declares its offload capabilities in netdev->hw_features; see -Documentation/networking/netdev-features.txt for more. Note that a device -which only advertises NETIF_F_IP[V6]_CSUM must still obey the csum_start and -csum_offset given in the SKB; if it tries to deduce these itself in hardware -(as some NICs do) the driver should check that the values in the SKB match -those which the hardware will deduce, and if not, fall back to checksumming in -software instead (with skb_csum_hwoffload_help() or one of the -skb_checksum_help() / skb_crc32c_csum_help functions, as mentioned in -include/linux/skbuff.h). - -The stack should, for the most part, assume that checksum offload is supported -by the underlying device. The only place that should check is -validate_xmit_skb(), and the functions it calls directly or indirectly. That -function compares the offload features requested by the SKB (which may include -other offloads besides TX Checksum Offload) and, if they are not supported or -enabled on the device (determined by netdev->features), performs the -corresponding offload in software. In the case of TX Checksum Offload, that -means calling skb_csum_hwoffload_help(skb, features). - - -LCO: Local Checksum Offload -=========================== - -LCO is a technique for efficiently computing the outer checksum of an -encapsulated datagram when the inner checksum is due to be offloaded. - -The ones-complement sum of a correctly checksummed TCP or UDP packet is equal -to the complement of the sum of the pseudo header, because everything else gets -'cancelled out' by the checksum field. This is because the sum was -complemented before being written to the checksum field. - -More generally, this holds in any case where the 'IP-style' ones complement -checksum is used, and thus any checksum that TX Checksum Offload supports. - -That is, if we have set up TX Checksum Offload with a start/offset pair, we -know that after the device has filled in that checksum, the ones complement sum -from csum_start to the end of the packet will be equal to the complement of -whatever value we put in the checksum field beforehand. This allows us to -compute the outer checksum without looking at the payload: we simply stop -summing when we get to csum_start, then add the complement of the 16-bit word -at (csum_start + csum_offset). - -Then, when the true inner checksum is filled in (either by hardware or by -skb_checksum_help()), the outer checksum will become correct by virtue of the -arithmetic. - -LCO is performed by the stack when constructing an outer UDP header for an -encapsulation such as VXLAN or GENEVE, in udp_set_csum(). Similarly for the -IPv6 equivalents, in udp6_set_csum(). - -It is also performed when constructing an IPv4 GRE header, in -net/ipv4/ip_gre.c:build_header(). It is *not* currently performed when -constructing an IPv6 GRE header; the GRE checksum is computed over the whole -packet in net/ipv6/ip6_gre.c:ip6gre_xmit2(), but it should be possible to use -LCO here as IPv6 GRE still uses an IP-style checksum. - -All of the LCO implementations use a helper function lco_csum(), in -include/linux/skbuff.h. - -LCO can safely be used for nested encapsulations; in this case, the outer -encapsulation layer will sum over both its own header and the 'middle' header. -This does mean that the 'middle' header will get summed multiple times, but -there doesn't seem to be a way to avoid that without incurring bigger costs -(e.g. in SKB bloat). - - -RCO: Remote Checksum Offload -============================ - -RCO is a technique for eliding the inner checksum of an encapsulated datagram, -allowing the outer checksum to be offloaded. It does, however, involve a -change to the encapsulation protocols, which the receiver must also support. -For this reason, it is disabled by default. - -RCO is detailed in the following Internet-Drafts: - -* https://tools.ietf.org/html/draft-herbert-remotecsumoffload-00 -* https://tools.ietf.org/html/draft-herbert-vxlan-rco-00 - -In Linux, RCO is implemented individually in each encapsulation protocol, and -most tunnel types have flags controlling its use. For instance, VXLAN has the -flag VXLAN_F_REMCSUM_TX (per struct vxlan_rdst) to indicate that RCO should be -used when transmitting to a given remote destination. diff --git a/Documentation/networking/segmentation-offloads.rst b/Documentation/networking/segmentation-offloads.rst new file mode 100644 index 000000000000..1794bfe98196 --- /dev/null +++ b/Documentation/networking/segmentation-offloads.rst @@ -0,0 +1,184 @@ +.. SPDX-License-Identifier: GPL-2.0 + +=================================================== +Segmentation Offloads in the Linux Networking Stack +=================================================== + + +Introduction +============ + +This document describes a set of techniques in the Linux networking stack +to take advantage of segmentation offload capabilities of various NICs. + +The following technologies are described: + * TCP Segmentation Offload - TSO + * UDP Fragmentation Offload - UFO + * IPIP, SIT, GRE, and UDP Tunnel Offloads + * Generic Segmentation Offload - GSO + * Generic Receive Offload - GRO + * Partial Generic Segmentation Offload - GSO_PARTIAL + * SCTP accelleration with GSO - GSO_BY_FRAGS + + +TCP Segmentation Offload +======================== + +TCP segmentation allows a device to segment a single frame into multiple +frames with a data payload size specified in skb_shinfo()->gso_size. +When TCP segmentation requested the bit for either SKB_GSO_TCPV4 or +SKB_GSO_TCPV6 should be set in skb_shinfo()->gso_type and +skb_shinfo()->gso_size should be set to a non-zero value. + +TCP segmentation is dependent on support for the use of partial checksum +offload. For this reason TSO is normally disabled if the Tx checksum +offload for a given device is disabled. + +In order to support TCP segmentation offload it is necessary to populate +the network and transport header offsets of the skbuff so that the device +drivers will be able determine the offsets of the IP or IPv6 header and the +TCP header. In addition as CHECKSUM_PARTIAL is required csum_start should +also point to the TCP header of the packet. + +For IPv4 segmentation we support one of two types in terms of the IP ID. +The default behavior is to increment the IP ID with every segment. If the +GSO type SKB_GSO_TCP_FIXEDID is specified then we will not increment the IP +ID and all segments will use the same IP ID. If a device has +NETIF_F_TSO_MANGLEID set then the IP ID can be ignored when performing TSO +and we will either increment the IP ID for all frames, or leave it at a +static value based on driver preference. + + +UDP Fragmentation Offload +========================= + +UDP fragmentation offload allows a device to fragment an oversized UDP +datagram into multiple IPv4 fragments. Many of the requirements for UDP +fragmentation offload are the same as TSO. However the IPv4 ID for +fragments should not increment as a single IPv4 datagram is fragmented. + +UFO is deprecated: modern kernels will no longer generate UFO skbs, but can +still receive them from tuntap and similar devices. Offload of UDP-based +tunnel protocols is still supported. + + +IPIP, SIT, GRE, UDP Tunnel, and Remote Checksum Offloads +======================================================== + +In addition to the offloads described above it is possible for a frame to +contain additional headers such as an outer tunnel. In order to account +for such instances an additional set of segmentation offload types were +introduced including SKB_GSO_IPXIP4, SKB_GSO_IPXIP6, SKB_GSO_GRE, and +SKB_GSO_UDP_TUNNEL. These extra segmentation types are used to identify +cases where there are more than just 1 set of headers. For example in the +case of IPIP and SIT we should have the network and transport headers moved +from the standard list of headers to "inner" header offsets. + +Currently only two levels of headers are supported. The convention is to +refer to the tunnel headers as the outer headers, while the encapsulated +data is normally referred to as the inner headers. Below is the list of +calls to access the given headers: + +IPIP/SIT Tunnel:: + + Outer Inner + MAC skb_mac_header + Network skb_network_header skb_inner_network_header + Transport skb_transport_header + +UDP/GRE Tunnel:: + + Outer Inner + MAC skb_mac_header skb_inner_mac_header + Network skb_network_header skb_inner_network_header + Transport skb_transport_header skb_inner_transport_header + +In addition to the above tunnel types there are also SKB_GSO_GRE_CSUM and +SKB_GSO_UDP_TUNNEL_CSUM. These two additional tunnel types reflect the +fact that the outer header also requests to have a non-zero checksum +included in the outer header. + +Finally there is SKB_GSO_TUNNEL_REMCSUM which indicates that a given tunnel +header has requested a remote checksum offload. In this case the inner +headers will be left with a partial checksum and only the outer header +checksum will be computed. + + +Generic Segmentation Offload +============================ + +Generic segmentation offload is a pure software offload that is meant to +deal with cases where device drivers cannot perform the offloads described +above. What occurs in GSO is that a given skbuff will have its data broken +out over multiple skbuffs that have been resized to match the MSS provided +via skb_shinfo()->gso_size. + +Before enabling any hardware segmentation offload a corresponding software +offload is required in GSO. Otherwise it becomes possible for a frame to +be re-routed between devices and end up being unable to be transmitted. + + +Generic Receive Offload +======================= + +Generic receive offload is the complement to GSO. Ideally any frame +assembled by GRO should be segmented to create an identical sequence of +frames using GSO, and any sequence of frames segmented by GSO should be +able to be reassembled back to the original by GRO. The only exception to +this is IPv4 ID in the case that the DF bit is set for a given IP header. +If the value of the IPv4 ID is not sequentially incrementing it will be +altered so that it is when a frame assembled via GRO is segmented via GSO. + + +Partial Generic Segmentation Offload +==================================== + +Partial generic segmentation offload is a hybrid between TSO and GSO. What +it effectively does is take advantage of certain traits of TCP and tunnels +so that instead of having to rewrite the packet headers for each segment +only the inner-most transport header and possibly the outer-most network +header need to be updated. This allows devices that do not support tunnel +offloads or tunnel offloads with checksum to still make use of segmentation. + +With the partial offload what occurs is that all headers excluding the +inner transport header are updated such that they will contain the correct +values for if the header was simply duplicated. The one exception to this +is the outer IPv4 ID field. It is up to the device drivers to guarantee +that the IPv4 ID field is incremented in the case that a given header does +not have the DF bit set. + + +SCTP accelleration with GSO +=========================== + +SCTP - despite the lack of hardware support - can still take advantage of +GSO to pass one large packet through the network stack, rather than +multiple small packets. + +This requires a different approach to other offloads, as SCTP packets +cannot be just segmented to (P)MTU. Rather, the chunks must be contained in +IP segments, padding respected. So unlike regular GSO, SCTP can't just +generate a big skb, set gso_size to the fragmentation point and deliver it +to IP layer. + +Instead, the SCTP protocol layer builds an skb with the segments correctly +padded and stored as chained skbs, and skb_segment() splits based on those. +To signal this, gso_size is set to the special value GSO_BY_FRAGS. + +Therefore, any code in the core networking stack must be aware of the +possibility that gso_size will be GSO_BY_FRAGS and handle that case +appropriately. + +There are some helpers to make this easier: + +- skb_is_gso(skb) && skb_is_gso_sctp(skb) is the best way to see if + an skb is an SCTP GSO skb. + +- For size checks, the skb_gso_validate_*_len family of helpers correctly + considers GSO_BY_FRAGS. + +- For manipulating packets, skb_increase_gso_size and skb_decrease_gso_size + will check for GSO_BY_FRAGS and WARN if asked to manipulate these skbs. + +This also affects drivers with the NETIF_F_FRAGLIST & NETIF_F_GSO_SCTP bits +set. Note also that NETIF_F_GSO_SCTP is included in NETIF_F_GSO_SOFTWARE. diff --git a/Documentation/networking/segmentation-offloads.txt b/Documentation/networking/segmentation-offloads.txt deleted file mode 100644 index 1794bfe98196..000000000000 --- a/Documentation/networking/segmentation-offloads.txt +++ /dev/null @@ -1,184 +0,0 @@ -.. SPDX-License-Identifier: GPL-2.0 - -=================================================== -Segmentation Offloads in the Linux Networking Stack -=================================================== - - -Introduction -============ - -This document describes a set of techniques in the Linux networking stack -to take advantage of segmentation offload capabilities of various NICs. - -The following technologies are described: - * TCP Segmentation Offload - TSO - * UDP Fragmentation Offload - UFO - * IPIP, SIT, GRE, and UDP Tunnel Offloads - * Generic Segmentation Offload - GSO - * Generic Receive Offload - GRO - * Partial Generic Segmentation Offload - GSO_PARTIAL - * SCTP accelleration with GSO - GSO_BY_FRAGS - - -TCP Segmentation Offload -======================== - -TCP segmentation allows a device to segment a single frame into multiple -frames with a data payload size specified in skb_shinfo()->gso_size. -When TCP segmentation requested the bit for either SKB_GSO_TCPV4 or -SKB_GSO_TCPV6 should be set in skb_shinfo()->gso_type and -skb_shinfo()->gso_size should be set to a non-zero value. - -TCP segmentation is dependent on support for the use of partial checksum -offload. For this reason TSO is normally disabled if the Tx checksum -offload for a given device is disabled. - -In order to support TCP segmentation offload it is necessary to populate -the network and transport header offsets of the skbuff so that the device -drivers will be able determine the offsets of the IP or IPv6 header and the -TCP header. In addition as CHECKSUM_PARTIAL is required csum_start should -also point to the TCP header of the packet. - -For IPv4 segmentation we support one of two types in terms of the IP ID. -The default behavior is to increment the IP ID with every segment. If the -GSO type SKB_GSO_TCP_FIXEDID is specified then we will not increment the IP -ID and all segments will use the same IP ID. If a device has -NETIF_F_TSO_MANGLEID set then the IP ID can be ignored when performing TSO -and we will either increment the IP ID for all frames, or leave it at a -static value based on driver preference. - - -UDP Fragmentation Offload -========================= - -UDP fragmentation offload allows a device to fragment an oversized UDP -datagram into multiple IPv4 fragments. Many of the requirements for UDP -fragmentation offload are the same as TSO. However the IPv4 ID for -fragments should not increment as a single IPv4 datagram is fragmented. - -UFO is deprecated: modern kernels will no longer generate UFO skbs, but can -still receive them from tuntap and similar devices. Offload of UDP-based -tunnel protocols is still supported. - - -IPIP, SIT, GRE, UDP Tunnel, and Remote Checksum Offloads -======================================================== - -In addition to the offloads described above it is possible for a frame to -contain additional headers such as an outer tunnel. In order to account -for such instances an additional set of segmentation offload types were -introduced including SKB_GSO_IPXIP4, SKB_GSO_IPXIP6, SKB_GSO_GRE, and -SKB_GSO_UDP_TUNNEL. These extra segmentation types are used to identify -cases where there are more than just 1 set of headers. For example in the -case of IPIP and SIT we should have the network and transport headers moved -from the standard list of headers to "inner" header offsets. - -Currently only two levels of headers are supported. The convention is to -refer to the tunnel headers as the outer headers, while the encapsulated -data is normally referred to as the inner headers. Below is the list of -calls to access the given headers: - -IPIP/SIT Tunnel:: - - Outer Inner - MAC skb_mac_header - Network skb_network_header skb_inner_network_header - Transport skb_transport_header - -UDP/GRE Tunnel:: - - Outer Inner - MAC skb_mac_header skb_inner_mac_header - Network skb_network_header skb_inner_network_header - Transport skb_transport_header skb_inner_transport_header - -In addition to the above tunnel types there are also SKB_GSO_GRE_CSUM and -SKB_GSO_UDP_TUNNEL_CSUM. These two additional tunnel types reflect the -fact that the outer header also requests to have a non-zero checksum -included in the outer header. - -Finally there is SKB_GSO_TUNNEL_REMCSUM which indicates that a given tunnel -header has requested a remote checksum offload. In this case the inner -headers will be left with a partial checksum and only the outer header -checksum will be computed. - - -Generic Segmentation Offload -============================ - -Generic segmentation offload is a pure software offload that is meant to -deal with cases where device drivers cannot perform the offloads described -above. What occurs in GSO is that a given skbuff will have its data broken -out over multiple skbuffs that have been resized to match the MSS provided -via skb_shinfo()->gso_size. - -Before enabling any hardware segmentation offload a corresponding software -offload is required in GSO. Otherwise it becomes possible for a frame to -be re-routed between devices and end up being unable to be transmitted. - - -Generic Receive Offload -======================= - -Generic receive offload is the complement to GSO. Ideally any frame -assembled by GRO should be segmented to create an identical sequence of -frames using GSO, and any sequence of frames segmented by GSO should be -able to be reassembled back to the original by GRO. The only exception to -this is IPv4 ID in the case that the DF bit is set for a given IP header. -If the value of the IPv4 ID is not sequentially incrementing it will be -altered so that it is when a frame assembled via GRO is segmented via GSO. - - -Partial Generic Segmentation Offload -==================================== - -Partial generic segmentation offload is a hybrid between TSO and GSO. What -it effectively does is take advantage of certain traits of TCP and tunnels -so that instead of having to rewrite the packet headers for each segment -only the inner-most transport header and possibly the outer-most network -header need to be updated. This allows devices that do not support tunnel -offloads or tunnel offloads with checksum to still make use of segmentation. - -With the partial offload what occurs is that all headers excluding the -inner transport header are updated such that they will contain the correct -values for if the header was simply duplicated. The one exception to this -is the outer IPv4 ID field. It is up to the device drivers to guarantee -that the IPv4 ID field is incremented in the case that a given header does -not have the DF bit set. - - -SCTP accelleration with GSO -=========================== - -SCTP - despite the lack of hardware support - can still take advantage of -GSO to pass one large packet through the network stack, rather than -multiple small packets. - -This requires a different approach to other offloads, as SCTP packets -cannot be just segmented to (P)MTU. Rather, the chunks must be contained in -IP segments, padding respected. So unlike regular GSO, SCTP can't just -generate a big skb, set gso_size to the fragmentation point and deliver it -to IP layer. - -Instead, the SCTP protocol layer builds an skb with the segments correctly -padded and stored as chained skbs, and skb_segment() splits based on those. -To signal this, gso_size is set to the special value GSO_BY_FRAGS. - -Therefore, any code in the core networking stack must be aware of the -possibility that gso_size will be GSO_BY_FRAGS and handle that case -appropriately. - -There are some helpers to make this easier: - -- skb_is_gso(skb) && skb_is_gso_sctp(skb) is the best way to see if - an skb is an SCTP GSO skb. - -- For size checks, the skb_gso_validate_*_len family of helpers correctly - considers GSO_BY_FRAGS. - -- For manipulating packets, skb_increase_gso_size and skb_decrease_gso_size - will check for GSO_BY_FRAGS and WARN if asked to manipulate these skbs. - -This also affects drivers with the NETIF_F_FRAGLIST & NETIF_F_GSO_SCTP bits -set. Note also that NETIF_F_GSO_SCTP is included in NETIF_F_GSO_SOFTWARE. diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 93f56fddd92a..4e671b46e767 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -4296,7 +4296,7 @@ static inline bool skb_head_is_locked(const struct sk_buff *skb) /* Local Checksum Offload. * Compute outer checksum based on the assumption that the * inner checksum will be offloaded later. - * See Documentation/networking/checksum-offloads.txt for + * See Documentation/networking/checksum-offloads.rst for * explanation of how this works. * Fill in outer checksum adjustment (e.g. with sum of outer * pseudo-header) before calling. -- cgit v1.2.3 From 9ac6cb5fbb1781d120ca0ad29d014d35c9c3f0c4 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Wed, 19 Dec 2018 17:48:17 +0100 Subject: i2c: add suspended flag and accessors for i2c adapters A few drivers open code the handling of suspended adapters. It could be handled by the core, though, to ensure generic handling. This patch adds the flag and accessor functions. The usage of these helpers is optional, though. See the kerneldoc in this patch. Using the new flag, we now reject further transfers if the adapter is already marked suspended. Signed-off-by: Wolfram Sang Signed-off-by: Wolfram Sang --- Documentation/i2c/fault-codes | 4 ++++ drivers/i2c/i2c-core-base.c | 3 +++ include/linux/i2c.h | 34 ++++++++++++++++++++++++++++++++++ 3 files changed, 41 insertions(+) (limited to 'include/linux') diff --git a/Documentation/i2c/fault-codes b/Documentation/i2c/fault-codes index 47c25abb7d52..0cee0fc545b4 100644 --- a/Documentation/i2c/fault-codes +++ b/Documentation/i2c/fault-codes @@ -112,6 +112,10 @@ EPROTO case is when the length of an SMBus block data response (from the SMBus slave) is outside the range 1-32 bytes. +ESHUTDOWN + Returned when a transfer was requested using an adapter + which is already suspended. + ETIMEDOUT This is returned by drivers when an operation took too much time, and was aborted before it completed. diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c index 28460f6a60cc..926ca0a7477f 100644 --- a/drivers/i2c/i2c-core-base.c +++ b/drivers/i2c/i2c-core-base.c @@ -1232,6 +1232,7 @@ static int i2c_register_adapter(struct i2c_adapter *adap) if (!adap->lock_ops) adap->lock_ops = &i2c_adapter_lock_ops; + adap->locked_flags = 0; rt_mutex_init(&adap->bus_lock); rt_mutex_init(&adap->mux_lock); mutex_init(&adap->userspace_clients_lock); @@ -1865,6 +1866,8 @@ int __i2c_transfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num) if (WARN_ON(!msgs || num < 1)) return -EINVAL; + if (WARN_ON(test_bit(I2C_ALF_IS_SUSPENDED, &adap->locked_flags))) + return -ESHUTDOWN; if (adap->quirks && i2c_check_for_quirks(adap, msgs, num)) return -EOPNOTSUPP; diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 65b4eaed1d96..cba59d66c00d 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -680,6 +680,8 @@ struct i2c_adapter { int timeout; /* in jiffies */ int retries; struct device dev; /* the adapter device */ + unsigned long locked_flags; /* owned by the I2C core */ +#define I2C_ALF_IS_SUSPENDED 0 int nr; char name[48]; @@ -762,6 +764,38 @@ i2c_unlock_bus(struct i2c_adapter *adapter, unsigned int flags) adapter->lock_ops->unlock_bus(adapter, flags); } +/** + * i2c_mark_adapter_suspended - Report suspended state of the adapter to the core + * @adap: Adapter to mark as suspended + * + * When using this helper to mark an adapter as suspended, the core will reject + * further transfers to this adapter. The usage of this helper is optional but + * recommended for devices having distinct handlers for system suspend and + * runtime suspend. More complex devices are free to implement custom solutions + * to reject transfers when suspended. + */ +static inline void i2c_mark_adapter_suspended(struct i2c_adapter *adap) +{ + i2c_lock_bus(adap, I2C_LOCK_ROOT_ADAPTER); + set_bit(I2C_ALF_IS_SUSPENDED, &adap->locked_flags); + i2c_unlock_bus(adap, I2C_LOCK_ROOT_ADAPTER); +} + +/** + * i2c_mark_adapter_resumed - Report resumed state of the adapter to the core + * @adap: Adapter to mark as resumed + * + * When using this helper to mark an adapter as resumed, the core will allow + * further transfers to this adapter. See also further notes to + * @i2c_mark_adapter_suspended(). + */ +static inline void i2c_mark_adapter_resumed(struct i2c_adapter *adap) +{ + i2c_lock_bus(adap, I2C_LOCK_ROOT_ADAPTER); + clear_bit(I2C_ALF_IS_SUSPENDED, &adap->locked_flags); + i2c_unlock_bus(adap, I2C_LOCK_ROOT_ADAPTER); +} + /*flags for the client struct: */ #define I2C_CLIENT_PEC 0x04 /* Use Packet Error Checking */ #define I2C_CLIENT_TEN 0x10 /* we have a ten bit chip address */ -- cgit v1.2.3 From 47008e5161fa097ce9b848dee194b43262b743a5 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 19 Sep 2018 16:13:25 -0700 Subject: LSM: Introduce LSM_FLAG_LEGACY_MAJOR This adds a flag for the current "major" LSMs to distinguish them when we have a universal method for ordering all LSMs. It's called "legacy" since the distinction of "major" will go away in the blob-sharing world. Signed-off-by: Kees Cook Reviewed-by: Casey Schaufler Reviewed-by: John Johansen --- include/linux/lsm_hooks.h | 3 +++ security/apparmor/lsm.c | 1 + security/selinux/hooks.c | 1 + security/smack/smack_lsm.c | 1 + security/tomoyo/tomoyo.c | 1 + 5 files changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 9a0bdf91e646..318d93f918c3 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -2042,8 +2042,11 @@ extern char *lsm_names; extern void security_add_hooks(struct security_hook_list *hooks, int count, char *lsm); +#define LSM_FLAG_LEGACY_MAJOR BIT(0) + struct lsm_info { const char *name; /* Required. */ + unsigned long flags; /* Optional: flags describing LSM */ int (*init)(void); /* Required. */ }; diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index 2c010874329f..e49c50e0d5ab 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -1729,5 +1729,6 @@ alloc_out: DEFINE_LSM(apparmor) = { .name = "apparmor", + .flags = LSM_FLAG_LEGACY_MAJOR, .init = apparmor_init, }; diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index f0e36c3492ba..41908d2d6149 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -6999,6 +6999,7 @@ void selinux_complete_init(void) all processes and objects when they are created. */ DEFINE_LSM(selinux) = { .name = "selinux", + .flags = LSM_FLAG_LEGACY_MAJOR, .init = selinux_init, }; diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index 430d4f35e55c..d72d215d7fde 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -4812,5 +4812,6 @@ static __init int smack_init(void) */ DEFINE_LSM(smack) = { .name = "smack", + .flags = LSM_FLAG_LEGACY_MAJOR, .init = smack_init, }; diff --git a/security/tomoyo/tomoyo.c b/security/tomoyo/tomoyo.c index 1b5b5097efd7..09f7af130d3a 100644 --- a/security/tomoyo/tomoyo.c +++ b/security/tomoyo/tomoyo.c @@ -552,5 +552,6 @@ static int __init tomoyo_init(void) DEFINE_LSM(tomoyo) = { .name = "tomoyo", + .flags = LSM_FLAG_LEGACY_MAJOR, .init = tomoyo_init, }; -- cgit v1.2.3 From c5459b829b716dafd226ad270f25c9a3050f7586 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 13 Sep 2018 22:28:48 -0700 Subject: LSM: Plumb visibility into optional "enabled" state In preparation for lifting the "is this LSM enabled?" logic out of the individual LSMs, pass in any special enabled state tracking (as needed for SELinux, AppArmor, and LoadPin). This should be an "int" to include handling any future cases where "enabled" is exposed via sysctl which has no "bool" type. Signed-off-by: Kees Cook Reviewed-by: Casey Schaufler Reviewed-by: John Johansen --- include/linux/lsm_hooks.h | 1 + security/apparmor/lsm.c | 5 +++-- security/selinux/hooks.c | 1 + 3 files changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 318d93f918c3..7bbe5e287161 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -2047,6 +2047,7 @@ extern void security_add_hooks(struct security_hook_list *hooks, int count, struct lsm_info { const char *name; /* Required. */ unsigned long flags; /* Optional: flags describing LSM */ + int *enabled; /* Optional: NULL means enabled. */ int (*init)(void); /* Required. */ }; diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index e49c50e0d5ab..a4652ff622cf 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -1333,8 +1333,8 @@ bool aa_g_paranoid_load = true; module_param_named(paranoid_load, aa_g_paranoid_load, aabool, S_IRUGO); /* Boot time disable flag */ -static bool apparmor_enabled = CONFIG_SECURITY_APPARMOR_BOOTPARAM_VALUE; -module_param_named(enabled, apparmor_enabled, bool, S_IRUGO); +static int apparmor_enabled = CONFIG_SECURITY_APPARMOR_BOOTPARAM_VALUE; +module_param_named(enabled, apparmor_enabled, int, 0444); static int __init apparmor_enabled_setup(char *str) { @@ -1730,5 +1730,6 @@ alloc_out: DEFINE_LSM(apparmor) = { .name = "apparmor", .flags = LSM_FLAG_LEGACY_MAJOR, + .enabled = &apparmor_enabled, .init = apparmor_init, }; diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 41908d2d6149..f847514d6f03 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -7000,6 +7000,7 @@ void selinux_complete_init(void) DEFINE_LSM(selinux) = { .name = "selinux", .flags = LSM_FLAG_LEGACY_MAJOR, + .enabled = &selinux_enabled, .init = selinux_init, }; -- cgit v1.2.3 From f4941d75b9cba5e1fae1aebe0139dcca0703a294 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 13 Sep 2018 23:17:50 -0700 Subject: LSM: Lift LSM selection out of individual LSMs As a prerequisite to adjusting LSM selection logic in the future, this moves the selection logic up out of the individual major LSMs, making their init functions only run when actually enabled. This considers all LSMs enabled by default unless they specified an external "enable" variable. Signed-off-by: Kees Cook Reviewed-by: Casey Schaufler Reviewed-by: John Johansen --- include/linux/lsm_hooks.h | 1 - security/apparmor/lsm.c | 6 --- security/security.c | 102 +++++++++++++++++++++++++++++++-------------- security/selinux/hooks.c | 10 ----- security/smack/smack_lsm.c | 3 -- security/tomoyo/tomoyo.c | 2 - 6 files changed, 71 insertions(+), 53 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 7bbe5e287161..be1581d18e3e 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -2088,7 +2088,6 @@ static inline void security_delete_hooks(struct security_hook_list *hooks, #define __lsm_ro_after_init __ro_after_init #endif /* CONFIG_SECURITY_WRITABLE_HOOKS */ -extern int __init security_module_enable(const char *module); extern void __init capability_add_hooks(void); #ifdef CONFIG_SECURITY_YAMA extern void __init yama_add_hooks(void); diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index a4652ff622cf..dfc5fbf8ba82 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -1663,12 +1663,6 @@ static int __init apparmor_init(void) { int error; - if (!apparmor_enabled || !security_module_enable("apparmor")) { - aa_info_message("AppArmor disabled by boot time parameter"); - apparmor_enabled = false; - return 0; - } - aa_secids_init(); error = aa_setup_dfa_engine(); diff --git a/security/security.c b/security/security.c index 6bc591f77b1a..c900d7a1441a 100644 --- a/security/security.c +++ b/security/security.c @@ -52,33 +52,96 @@ static __initdata bool debug; pr_info(__VA_ARGS__); \ } while (0) +static bool __init is_enabled(struct lsm_info *lsm) +{ + if (!lsm->enabled || *lsm->enabled) + return true; + + return false; +} + +/* Mark an LSM's enabled flag. */ +static int lsm_enabled_true __initdata = 1; +static int lsm_enabled_false __initdata = 0; +static void __init set_enabled(struct lsm_info *lsm, bool enabled) +{ + /* + * When an LSM hasn't configured an enable variable, we can use + * a hard-coded location for storing the default enabled state. + */ + if (!lsm->enabled) { + if (enabled) + lsm->enabled = &lsm_enabled_true; + else + lsm->enabled = &lsm_enabled_false; + } else if (lsm->enabled == &lsm_enabled_true) { + if (!enabled) + lsm->enabled = &lsm_enabled_false; + } else if (lsm->enabled == &lsm_enabled_false) { + if (enabled) + lsm->enabled = &lsm_enabled_true; + } else { + *lsm->enabled = enabled; + } +} + +/* Is an LSM allowed to be initialized? */ +static bool __init lsm_allowed(struct lsm_info *lsm) +{ + /* Skip if the LSM is disabled. */ + if (!is_enabled(lsm)) + return false; + + /* Skip major-specific checks if not a major LSM. */ + if ((lsm->flags & LSM_FLAG_LEGACY_MAJOR) == 0) + return true; + + /* Disabled if this LSM isn't the chosen one. */ + if (strcmp(lsm->name, chosen_lsm) != 0) + return false; + + return true; +} + +/* Check if LSM should be initialized. */ +static void __init maybe_initialize_lsm(struct lsm_info *lsm) +{ + int enabled = lsm_allowed(lsm); + + /* Record enablement (to handle any following exclusive LSMs). */ + set_enabled(lsm, enabled); + + /* If selected, initialize the LSM. */ + if (enabled) { + int ret; + + init_debug("initializing %s\n", lsm->name); + ret = lsm->init(); + WARN(ret, "%s failed to initialize: %d\n", lsm->name, ret); + } +} + static void __init ordered_lsm_init(void) { struct lsm_info *lsm; - int ret; for (lsm = __start_lsm_info; lsm < __end_lsm_info; lsm++) { if ((lsm->flags & LSM_FLAG_LEGACY_MAJOR) != 0) continue; - init_debug("initializing %s\n", lsm->name); - ret = lsm->init(); - WARN(ret, "%s failed to initialize: %d\n", lsm->name, ret); + maybe_initialize_lsm(lsm); } } static void __init major_lsm_init(void) { struct lsm_info *lsm; - int ret; for (lsm = __start_lsm_info; lsm < __end_lsm_info; lsm++) { if ((lsm->flags & LSM_FLAG_LEGACY_MAJOR) == 0) continue; - init_debug("initializing %s\n", lsm->name); - ret = lsm->init(); - WARN(ret, "%s failed to initialize: %d\n", lsm->name, ret); + maybe_initialize_lsm(lsm); } } @@ -168,29 +231,6 @@ static int lsm_append(char *new, char **result) return 0; } -/** - * security_module_enable - Load given security module on boot ? - * @module: the name of the module - * - * Each LSM must pass this method before registering its own operations - * to avoid security registration races. This method may also be used - * to check if your LSM is currently loaded during kernel initialization. - * - * Returns: - * - * true if: - * - * - The passed LSM is the one chosen by user at boot time, - * - or the passed LSM is configured as the default and the user did not - * choose an alternate LSM at boot time. - * - * Otherwise, return false. - */ -int __init security_module_enable(const char *module) -{ - return !strcmp(module, chosen_lsm); -} - /** * security_add_hooks - Add a modules hooks to the hook lists. * @hooks: the hooks to add diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index f847514d6f03..0f8ae2fbd14a 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -6928,16 +6928,6 @@ static struct security_hook_list selinux_hooks[] __lsm_ro_after_init = { static __init int selinux_init(void) { - if (!security_module_enable("selinux")) { - selinux_enabled = 0; - return 0; - } - - if (!selinux_enabled) { - pr_info("SELinux: Disabled at boot.\n"); - return 0; - } - pr_info("SELinux: Initializing.\n"); memset(&selinux_state, 0, sizeof(selinux_state)); diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index d72d215d7fde..580e9d6e5680 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -4762,9 +4762,6 @@ static __init int smack_init(void) struct cred *cred; struct task_smack *tsp; - if (!security_module_enable("smack")) - return 0; - smack_inode_cache = KMEM_CACHE(inode_smack, 0); if (!smack_inode_cache) return -ENOMEM; diff --git a/security/tomoyo/tomoyo.c b/security/tomoyo/tomoyo.c index 09f7af130d3a..a46f6bc1e97c 100644 --- a/security/tomoyo/tomoyo.c +++ b/security/tomoyo/tomoyo.c @@ -540,8 +540,6 @@ static int __init tomoyo_init(void) { struct cred *cred = (struct cred *) current_cred(); - if (!security_module_enable("tomoyo")) - return 0; /* register ourselves with the security framework */ security_add_hooks(tomoyo_hooks, ARRAY_SIZE(tomoyo_hooks), "tomoyo"); printk(KERN_INFO "TOMOYO Linux initialized\n"); -- cgit v1.2.3 From a8027fb0d188599ccdb2096f49f708bae04d86c4 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 9 Oct 2018 14:42:57 -0700 Subject: LSM: Tie enabling logic to presence in ordered list Until now, any LSM without an enable storage variable was considered enabled. This inverts the logic and sets defaults to true only if the LSM gets added to the ordered initialization list. (And an exception continues for the major LSMs until they are integrated into the ordered initialization in a later patch.) Signed-off-by: Kees Cook --- include/linux/lsm_hooks.h | 2 +- security/security.c | 14 +++++++++++--- 2 files changed, 12 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index be1581d18e3e..e28a3aa639e8 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -2047,7 +2047,7 @@ extern void security_add_hooks(struct security_hook_list *hooks, int count, struct lsm_info { const char *name; /* Required. */ unsigned long flags; /* Optional: flags describing LSM */ - int *enabled; /* Optional: NULL means enabled. */ + int *enabled; /* Optional: controlled by CONFIG_LSM */ int (*init)(void); /* Required. */ }; diff --git a/security/security.c b/security/security.c index 2e1f48e8a6f2..b6d3456978a4 100644 --- a/security/security.c +++ b/security/security.c @@ -63,10 +63,10 @@ static __initdata bool debug; static bool __init is_enabled(struct lsm_info *lsm) { - if (!lsm->enabled || *lsm->enabled) - return true; + if (!lsm->enabled) + return false; - return false; + return *lsm->enabled; } /* Mark an LSM's enabled flag. */ @@ -117,7 +117,11 @@ static void __init append_ordered_lsm(struct lsm_info *lsm, const char *from) if (WARN(last_lsm == LSM_COUNT, "%s: out of LSM slots!?\n", from)) return; + /* Enable this LSM, if it is not already set. */ + if (!lsm->enabled) + lsm->enabled = &lsm_enabled_true; ordered_lsms[last_lsm++] = lsm; + init_debug("%s ordering: %s (%sabled)\n", from, lsm->name, is_enabled(lsm) ? "en" : "dis"); } @@ -210,6 +214,10 @@ static void __init major_lsm_init(void) if ((lsm->flags & LSM_FLAG_LEGACY_MAJOR) == 0) continue; + /* Enable this LSM, if it is not already set. */ + if (!lsm->enabled) + lsm->enabled = &lsm_enabled_true; + maybe_initialize_lsm(lsm); } } -- cgit v1.2.3 From 14bd99c821f7ace0e8110a1bfdfaa27e1788e20f Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 19 Sep 2018 19:57:06 -0700 Subject: LSM: Separate idea of "major" LSM from "exclusive" LSM In order to both support old "security=" Legacy Major LSM selection, and handling real exclusivity, this creates LSM_FLAG_EXCLUSIVE and updates the selection logic to handle them. Signed-off-by: Kees Cook Reviewed-by: Casey Schaufler --- include/linux/lsm_hooks.h | 1 + security/apparmor/lsm.c | 2 +- security/security.c | 12 ++++++++++++ security/selinux/hooks.c | 2 +- security/smack/smack_lsm.c | 2 +- security/tomoyo/tomoyo.c | 2 +- 6 files changed, 17 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index e28a3aa639e8..c3843b33da9e 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -2043,6 +2043,7 @@ extern void security_add_hooks(struct security_hook_list *hooks, int count, char *lsm); #define LSM_FLAG_LEGACY_MAJOR BIT(0) +#define LSM_FLAG_EXCLUSIVE BIT(1) struct lsm_info { const char *name; /* Required. */ diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index dfc5fbf8ba82..149a3e16b5da 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -1723,7 +1723,7 @@ alloc_out: DEFINE_LSM(apparmor) = { .name = "apparmor", - .flags = LSM_FLAG_LEGACY_MAJOR, + .flags = LSM_FLAG_LEGACY_MAJOR | LSM_FLAG_EXCLUSIVE, .enabled = &apparmor_enabled, .init = apparmor_init, }; diff --git a/security/security.c b/security/security.c index 88de6b073246..a8dd7defe30a 100644 --- a/security/security.c +++ b/security/security.c @@ -49,6 +49,7 @@ static __initconst const char * const builtin_lsm_order = CONFIG_LSM; /* Ordered list of LSMs to initialize. */ static __initdata struct lsm_info **ordered_lsms; +static __initdata struct lsm_info *exclusive; static __initdata bool debug; #define init_debug(...) \ @@ -129,6 +130,12 @@ static bool __init lsm_allowed(struct lsm_info *lsm) if (!is_enabled(lsm)) return false; + /* Not allowed if another exclusive LSM already initialized. */ + if ((lsm->flags & LSM_FLAG_EXCLUSIVE) && exclusive) { + init_debug("exclusive disabled: %s\n", lsm->name); + return false; + } + return true; } @@ -144,6 +151,11 @@ static void __init maybe_initialize_lsm(struct lsm_info *lsm) if (enabled) { int ret; + if ((lsm->flags & LSM_FLAG_EXCLUSIVE) && !exclusive) { + exclusive = lsm; + init_debug("exclusive chosen: %s\n", lsm->name); + } + init_debug("initializing %s\n", lsm->name); ret = lsm->init(); WARN(ret, "%s failed to initialize: %d\n", lsm->name, ret); diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 0f8ae2fbd14a..49865f119b16 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -6989,7 +6989,7 @@ void selinux_complete_init(void) all processes and objects when they are created. */ DEFINE_LSM(selinux) = { .name = "selinux", - .flags = LSM_FLAG_LEGACY_MAJOR, + .flags = LSM_FLAG_LEGACY_MAJOR | LSM_FLAG_EXCLUSIVE, .enabled = &selinux_enabled, .init = selinux_init, }; diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index 580e9d6e5680..780733341d02 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -4809,6 +4809,6 @@ static __init int smack_init(void) */ DEFINE_LSM(smack) = { .name = "smack", - .flags = LSM_FLAG_LEGACY_MAJOR, + .flags = LSM_FLAG_LEGACY_MAJOR | LSM_FLAG_EXCLUSIVE, .init = smack_init, }; diff --git a/security/tomoyo/tomoyo.c b/security/tomoyo/tomoyo.c index a46f6bc1e97c..daff7d7897ad 100644 --- a/security/tomoyo/tomoyo.c +++ b/security/tomoyo/tomoyo.c @@ -550,6 +550,6 @@ static int __init tomoyo_init(void) DEFINE_LSM(tomoyo) = { .name = "tomoyo", - .flags = LSM_FLAG_LEGACY_MAJOR, + .flags = LSM_FLAG_LEGACY_MAJOR | LSM_FLAG_EXCLUSIVE, .init = tomoyo_init, }; -- cgit v1.2.3 From 70b62c25665f636c9f6c700b26af7df296b0887e Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 14 Sep 2018 15:26:37 -0700 Subject: LoadPin: Initialize as ordered LSM This converts LoadPin from being a direct "minor" LSM into an ordered LSM. Signed-off-by: Kees Cook Reviewed-by: Casey Schaufler --- include/linux/lsm_hooks.h | 5 ----- security/Kconfig | 39 +-------------------------------------- security/loadpin/loadpin.c | 8 +++++++- security/security.c | 1 - 4 files changed, 8 insertions(+), 45 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index c3843b33da9e..fb1a653ccfcb 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -2095,10 +2095,5 @@ extern void __init yama_add_hooks(void); #else static inline void __init yama_add_hooks(void) { } #endif -#ifdef CONFIG_SECURITY_LOADPIN -void __init loadpin_add_hooks(void); -#else -static inline void loadpin_add_hooks(void) { }; -#endif #endif /* ! __LINUX_LSM_HOOKS_H */ diff --git a/security/Kconfig b/security/Kconfig index cedf69e8a22c..2cd737ba7660 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -239,46 +239,9 @@ source "security/yama/Kconfig" source "security/integrity/Kconfig" -choice - prompt "Default security module" - default DEFAULT_SECURITY_SELINUX if SECURITY_SELINUX - default DEFAULT_SECURITY_SMACK if SECURITY_SMACK - default DEFAULT_SECURITY_TOMOYO if SECURITY_TOMOYO - default DEFAULT_SECURITY_APPARMOR if SECURITY_APPARMOR - default DEFAULT_SECURITY_DAC - - help - Select the security module that will be used by default if the - kernel parameter security= is not specified. - - config DEFAULT_SECURITY_SELINUX - bool "SELinux" if SECURITY_SELINUX=y - - config DEFAULT_SECURITY_SMACK - bool "Simplified Mandatory Access Control" if SECURITY_SMACK=y - - config DEFAULT_SECURITY_TOMOYO - bool "TOMOYO" if SECURITY_TOMOYO=y - - config DEFAULT_SECURITY_APPARMOR - bool "AppArmor" if SECURITY_APPARMOR=y - - config DEFAULT_SECURITY_DAC - bool "Unix Discretionary Access Controls" - -endchoice - -config DEFAULT_SECURITY - string - default "selinux" if DEFAULT_SECURITY_SELINUX - default "smack" if DEFAULT_SECURITY_SMACK - default "tomoyo" if DEFAULT_SECURITY_TOMOYO - default "apparmor" if DEFAULT_SECURITY_APPARMOR - default "" if DEFAULT_SECURITY_DAC - config LSM string "Ordered list of enabled LSMs" - default "integrity" + default "loadpin,integrity,selinux,smack,tomoyo,apparmor" help A comma-separated list of LSMs, in initialization order. Any LSMs left off this list will be ignored. This can be diff --git a/security/loadpin/loadpin.c b/security/loadpin/loadpin.c index 48f39631b370..055fb0a64169 100644 --- a/security/loadpin/loadpin.c +++ b/security/loadpin/loadpin.c @@ -187,13 +187,19 @@ static struct security_hook_list loadpin_hooks[] __lsm_ro_after_init = { LSM_HOOK_INIT(kernel_load_data, loadpin_load_data), }; -void __init loadpin_add_hooks(void) +static int __init loadpin_init(void) { pr_info("ready to pin (currently %senforcing)\n", enforce ? "" : "not "); security_add_hooks(loadpin_hooks, ARRAY_SIZE(loadpin_hooks), "loadpin"); + return 0; } +DEFINE_LSM(loadpin) = { + .name = "loadpin", + .init = loadpin_init, +}; + /* Should not be mutable after boot, so not listed in sysfs (perm == 0). */ module_param(enforce, int, 0); MODULE_PARM_DESC(enforce, "Enforce module/firmware pinning"); diff --git a/security/security.c b/security/security.c index 46c5b0fa515e..b8d75f5a948d 100644 --- a/security/security.c +++ b/security/security.c @@ -275,7 +275,6 @@ int __init security_init(void) */ capability_add_hooks(); yama_add_hooks(); - loadpin_add_hooks(); /* Load LSMs in specified order. */ ordered_lsm_init(); -- cgit v1.2.3 From d6aed64b74b73b64278c059eacd59d87167aa968 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 14 Sep 2018 15:37:20 -0700 Subject: Yama: Initialize as ordered LSM This converts Yama from being a direct "minor" LSM into an ordered LSM. Signed-off-by: Kees Cook Reviewed-by: Casey Schaufler --- include/linux/lsm_hooks.h | 5 ----- security/Kconfig | 2 +- security/security.c | 1 - security/yama/yama_lsm.c | 8 +++++++- 4 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index fb1a653ccfcb..2849e9b2c01d 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -2090,10 +2090,5 @@ static inline void security_delete_hooks(struct security_hook_list *hooks, #endif /* CONFIG_SECURITY_WRITABLE_HOOKS */ extern void __init capability_add_hooks(void); -#ifdef CONFIG_SECURITY_YAMA -extern void __init yama_add_hooks(void); -#else -static inline void __init yama_add_hooks(void) { } -#endif #endif /* ! __LINUX_LSM_HOOKS_H */ diff --git a/security/Kconfig b/security/Kconfig index 2cd737ba7660..78dc12b7eeb3 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -241,7 +241,7 @@ source "security/integrity/Kconfig" config LSM string "Ordered list of enabled LSMs" - default "loadpin,integrity,selinux,smack,tomoyo,apparmor" + default "yama,loadpin,integrity,selinux,smack,tomoyo,apparmor" help A comma-separated list of LSMs, in initialization order. Any LSMs left off this list will be ignored. This can be diff --git a/security/security.c b/security/security.c index b8d75f5a948d..35f93b7c585b 100644 --- a/security/security.c +++ b/security/security.c @@ -274,7 +274,6 @@ int __init security_init(void) * Load minor LSMs, with the capability module always first. */ capability_add_hooks(); - yama_add_hooks(); /* Load LSMs in specified order. */ ordered_lsm_init(); diff --git a/security/yama/yama_lsm.c b/security/yama/yama_lsm.c index ffda91a4a1aa..eb1da1303d2e 100644 --- a/security/yama/yama_lsm.c +++ b/security/yama/yama_lsm.c @@ -477,9 +477,15 @@ static void __init yama_init_sysctl(void) static inline void yama_init_sysctl(void) { } #endif /* CONFIG_SYSCTL */ -void __init yama_add_hooks(void) +static int __init yama_init(void) { pr_info("Yama: becoming mindful.\n"); security_add_hooks(yama_hooks, ARRAY_SIZE(yama_hooks), "yama"); yama_init_sysctl(); + return 0; } + +DEFINE_LSM(yama) = { + .name = "yama", + .init = yama_init, +}; -- cgit v1.2.3 From e2bc445b66cad25b0627391df8138a83d0e48f97 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 19 Sep 2018 17:48:21 -0700 Subject: LSM: Introduce enum lsm_order In preparation for distinguishing the "capability" LSM from other LSMs, it must be ordered first. This introduces LSM_ORDER_MUTABLE for the general LSMs and LSM_ORDER_FIRST for capability. In the future LSM_ORDER_LAST for could be added for anything that must run last (e.g. Landlock may use this). Signed-off-by: Kees Cook --- include/linux/lsm_hooks.h | 6 ++++++ security/security.c | 9 ++++++++- 2 files changed, 14 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 2849e9b2c01d..27d4db9588bb 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -2045,8 +2045,14 @@ extern void security_add_hooks(struct security_hook_list *hooks, int count, #define LSM_FLAG_LEGACY_MAJOR BIT(0) #define LSM_FLAG_EXCLUSIVE BIT(1) +enum lsm_order { + LSM_ORDER_FIRST = -1, /* This is only for capabilities. */ + LSM_ORDER_MUTABLE = 0, +}; + struct lsm_info { const char *name; /* Required. */ + enum lsm_order order; /* Optional: default is LSM_ORDER_MUTABLE */ unsigned long flags; /* Optional: flags describing LSM */ int *enabled; /* Optional: controlled by CONFIG_LSM */ int (*init)(void); /* Required. */ diff --git a/security/security.c b/security/security.c index 35f93b7c585b..8b673bb2a0dd 100644 --- a/security/security.c +++ b/security/security.c @@ -174,6 +174,12 @@ static void __init ordered_lsm_parse(const char *order, const char *origin) struct lsm_info *lsm; char *sep, *name, *next; + /* LSM_ORDER_FIRST is always first. */ + for (lsm = __start_lsm_info; lsm < __end_lsm_info; lsm++) { + if (lsm->order == LSM_ORDER_FIRST) + append_ordered_lsm(lsm, "first"); + } + /* Process "security=", if given. */ if (chosen_major_lsm) { struct lsm_info *major; @@ -202,7 +208,8 @@ static void __init ordered_lsm_parse(const char *order, const char *origin) bool found = false; for (lsm = __start_lsm_info; lsm < __end_lsm_info; lsm++) { - if (strcmp(lsm->name, name) == 0) { + if (lsm->order == LSM_ORDER_MUTABLE && + strcmp(lsm->name, name) == 0) { append_ordered_lsm(lsm, origin); found = true; } -- cgit v1.2.3 From d117a154e6128abac5409d3f173584e7b25981a2 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 14 Sep 2018 15:40:45 -0700 Subject: capability: Initialize as LSM_ORDER_FIRST This converts capabilities to use the new LSM_ORDER_FIRST position. Signed-off-by: Kees Cook Reviewed-by: Casey Schaufler --- include/linux/lsm_hooks.h | 2 -- security/commoncap.c | 9 ++++++++- security/security.c | 5 ----- 3 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 27d4db9588bb..0c908c091a03 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -2095,6 +2095,4 @@ static inline void security_delete_hooks(struct security_hook_list *hooks, #define __lsm_ro_after_init __ro_after_init #endif /* CONFIG_SECURITY_WRITABLE_HOOKS */ -extern void __init capability_add_hooks(void); - #endif /* ! __LINUX_LSM_HOOKS_H */ diff --git a/security/commoncap.c b/security/commoncap.c index 232db019f051..52e04136bfa8 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -1362,10 +1362,17 @@ struct security_hook_list capability_hooks[] __lsm_ro_after_init = { LSM_HOOK_INIT(vm_enough_memory, cap_vm_enough_memory), }; -void __init capability_add_hooks(void) +static int __init capability_init(void) { security_add_hooks(capability_hooks, ARRAY_SIZE(capability_hooks), "capability"); + return 0; } +DEFINE_LSM(capability) = { + .name = "capability", + .order = LSM_ORDER_FIRST, + .init = capability_init, +}; + #endif /* CONFIG_SECURITY */ diff --git a/security/security.c b/security/security.c index 8b673bb2a0dd..9411f659454b 100644 --- a/security/security.c +++ b/security/security.c @@ -277,11 +277,6 @@ int __init security_init(void) i++) INIT_HLIST_HEAD(&list[i]); - /* - * Load minor LSMs, with the capability module always first. - */ - capability_add_hooks(); - /* Load LSMs in specified order. */ ordered_lsm_init(); -- cgit v1.2.3 From 6d9c939dbe4d0bcea09cd4b410f624cde1acb678 Mon Sep 17 00:00:00 2001 From: Casey Schaufler Date: Fri, 21 Sep 2018 17:16:59 -0700 Subject: procfs: add smack subdir to attrs Back in 2007 I made what turned out to be a rather serious mistake in the implementation of the Smack security module. The SELinux module used an interface in /proc to manipulate the security context on processes. Rather than use a similar interface, I used the same interface. The AppArmor team did likewise. Now /proc/.../attr/current will tell you the security "context" of the process, but it will be different depending on the security module you're using. This patch provides a subdirectory in /proc/.../attr for Smack. Smack user space can use the "current" file in this subdirectory and never have to worry about getting SELinux attributes by mistake. Programs that use the old interface will continue to work (or fail, as the case may be) as before. The proposed S.A.R.A security module is dependent on the mechanism to create its own attr subdirectory. The original implementation is by Kees Cook. Signed-off-by: Casey Schaufler Reviewed-by: Kees Cook Signed-off-by: Kees Cook --- Documentation/admin-guide/LSM/index.rst | 13 +++++-- fs/proc/base.c | 64 ++++++++++++++++++++++++++++----- fs/proc/internal.h | 1 + include/linux/security.h | 15 +++++--- security/security.c | 24 ++++++++++--- 5 files changed, 96 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/Documentation/admin-guide/LSM/index.rst b/Documentation/admin-guide/LSM/index.rst index c980dfe9abf1..9842e21afd4a 100644 --- a/Documentation/admin-guide/LSM/index.rst +++ b/Documentation/admin-guide/LSM/index.rst @@ -17,9 +17,8 @@ MAC extensions, other extensions can be built using the LSM to provide specific changes to system operation when these tweaks are not available in the core functionality of Linux itself. -Without a specific LSM built into the kernel, the default LSM will be the -Linux capabilities system. Most LSMs choose to extend the capabilities -system, building their checks on top of the defined capability hooks. +The Linux capabilities modules will always be included. This may be +followed by any number of "minor" modules and at most one "major" module. For more details on capabilities, see ``capabilities(7)`` in the Linux man-pages project. @@ -30,6 +29,14 @@ order in which checks are made. The capability module will always be first, followed by any "minor" modules (e.g. Yama) and then the one "major" module (e.g. SELinux) if there is one configured. +Process attributes associated with "major" security modules should +be accessed and maintained using the special files in ``/proc/.../attr``. +A security module may maintain a module specific subdirectory there, +named after the module. ``/proc/.../attr/smack`` is provided by the Smack +security module and contains all its special files. The files directly +in ``/proc/.../attr`` remain as legacy interfaces for modules that provide +subdirectories. + .. toctree:: :maxdepth: 1 diff --git a/fs/proc/base.c b/fs/proc/base.c index 633a63462573..c9d775fd24ef 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -140,9 +140,13 @@ struct pid_entry { #define REG(NAME, MODE, fops) \ NOD(NAME, (S_IFREG|(MODE)), NULL, &fops, {}) #define ONE(NAME, MODE, show) \ - NOD(NAME, (S_IFREG|(MODE)), \ + NOD(NAME, (S_IFREG|(MODE)), \ NULL, &proc_single_file_operations, \ { .proc_show = show } ) +#define ATTR(LSM, NAME, MODE) \ + NOD(NAME, (S_IFREG|(MODE)), \ + NULL, &proc_pid_attr_operations, \ + { .lsm = LSM }) /* * Count the number of hardlinks for the pid_entry table, excluding the . @@ -2525,7 +2529,7 @@ static ssize_t proc_pid_attr_read(struct file * file, char __user * buf, if (!task) return -ESRCH; - length = security_getprocattr(task, + length = security_getprocattr(task, PROC_I(inode)->op.lsm, (char*)file->f_path.dentry->d_name.name, &p); put_task_struct(task); @@ -2574,7 +2578,9 @@ static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf, if (rv < 0) goto out_free; - rv = security_setprocattr(file->f_path.dentry->d_name.name, page, count); + rv = security_setprocattr(PROC_I(inode)->op.lsm, + file->f_path.dentry->d_name.name, page, + count); mutex_unlock(¤t->signal->cred_guard_mutex); out_free: kfree(page); @@ -2588,13 +2594,53 @@ static const struct file_operations proc_pid_attr_operations = { .llseek = generic_file_llseek, }; +#define LSM_DIR_OPS(LSM) \ +static int proc_##LSM##_attr_dir_iterate(struct file *filp, \ + struct dir_context *ctx) \ +{ \ + return proc_pident_readdir(filp, ctx, \ + LSM##_attr_dir_stuff, \ + ARRAY_SIZE(LSM##_attr_dir_stuff)); \ +} \ +\ +static const struct file_operations proc_##LSM##_attr_dir_ops = { \ + .read = generic_read_dir, \ + .iterate = proc_##LSM##_attr_dir_iterate, \ + .llseek = default_llseek, \ +}; \ +\ +static struct dentry *proc_##LSM##_attr_dir_lookup(struct inode *dir, \ + struct dentry *dentry, unsigned int flags) \ +{ \ + return proc_pident_lookup(dir, dentry, \ + LSM##_attr_dir_stuff, \ + ARRAY_SIZE(LSM##_attr_dir_stuff)); \ +} \ +\ +static const struct inode_operations proc_##LSM##_attr_dir_inode_ops = { \ + .lookup = proc_##LSM##_attr_dir_lookup, \ + .getattr = pid_getattr, \ + .setattr = proc_setattr, \ +} + +#ifdef CONFIG_SECURITY_SMACK +static const struct pid_entry smack_attr_dir_stuff[] = { + ATTR("smack", "current", 0666), +}; +LSM_DIR_OPS(smack); +#endif + static const struct pid_entry attr_dir_stuff[] = { - REG("current", S_IRUGO|S_IWUGO, proc_pid_attr_operations), - REG("prev", S_IRUGO, proc_pid_attr_operations), - REG("exec", S_IRUGO|S_IWUGO, proc_pid_attr_operations), - REG("fscreate", S_IRUGO|S_IWUGO, proc_pid_attr_operations), - REG("keycreate", S_IRUGO|S_IWUGO, proc_pid_attr_operations), - REG("sockcreate", S_IRUGO|S_IWUGO, proc_pid_attr_operations), + ATTR(NULL, "current", 0666), + ATTR(NULL, "prev", 0444), + ATTR(NULL, "exec", 0666), + ATTR(NULL, "fscreate", 0666), + ATTR(NULL, "keycreate", 0666), + ATTR(NULL, "sockcreate", 0666), +#ifdef CONFIG_SECURITY_SMACK + DIR("smack", 0555, + proc_smack_attr_dir_inode_ops, proc_smack_attr_dir_ops), +#endif }; static int proc_attr_dir_readdir(struct file *file, struct dir_context *ctx) diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 5185d7f6a51e..d4f9989063d0 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -81,6 +81,7 @@ union proc_op { int (*proc_show)(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task); + const char *lsm; }; struct proc_inode { diff --git a/include/linux/security.h b/include/linux/security.h index dbfb5a66babb..b2c5333ed4b5 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -366,8 +366,10 @@ int security_sem_semctl(struct kern_ipc_perm *sma, int cmd); int security_sem_semop(struct kern_ipc_perm *sma, struct sembuf *sops, unsigned nsops, int alter); void security_d_instantiate(struct dentry *dentry, struct inode *inode); -int security_getprocattr(struct task_struct *p, char *name, char **value); -int security_setprocattr(const char *name, void *value, size_t size); +int security_getprocattr(struct task_struct *p, const char *lsm, char *name, + char **value); +int security_setprocattr(const char *lsm, const char *name, void *value, + size_t size); int security_netlink_send(struct sock *sk, struct sk_buff *skb); int security_ismaclabel(const char *name); int security_secid_to_secctx(u32 secid, char **secdata, u32 *seclen); @@ -1112,15 +1114,18 @@ static inline int security_sem_semop(struct kern_ipc_perm *sma, return 0; } -static inline void security_d_instantiate(struct dentry *dentry, struct inode *inode) +static inline void security_d_instantiate(struct dentry *dentry, + struct inode *inode) { } -static inline int security_getprocattr(struct task_struct *p, char *name, char **value) +static inline int security_getprocattr(struct task_struct *p, const char *lsm, + char *name, char **value) { return -EINVAL; } -static inline int security_setprocattr(char *name, void *value, size_t size) +static inline int security_setprocattr(const char *lsm, char *name, + void *value, size_t size) { return -EINVAL; } diff --git a/security/security.c b/security/security.c index 9411f659454b..60b39db95c2f 100644 --- a/security/security.c +++ b/security/security.c @@ -1485,14 +1485,30 @@ void security_d_instantiate(struct dentry *dentry, struct inode *inode) } EXPORT_SYMBOL(security_d_instantiate); -int security_getprocattr(struct task_struct *p, char *name, char **value) +int security_getprocattr(struct task_struct *p, const char *lsm, char *name, + char **value) { - return call_int_hook(getprocattr, -EINVAL, p, name, value); + struct security_hook_list *hp; + + hlist_for_each_entry(hp, &security_hook_heads.getprocattr, list) { + if (lsm != NULL && strcmp(lsm, hp->lsm)) + continue; + return hp->hook.getprocattr(p, name, value); + } + return -EINVAL; } -int security_setprocattr(const char *name, void *value, size_t size) +int security_setprocattr(const char *lsm, const char *name, void *value, + size_t size) { - return call_int_hook(setprocattr, -EINVAL, name, value, size); + struct security_hook_list *hp; + + hlist_for_each_entry(hp, &security_hook_heads.setprocattr, list) { + if (lsm != NULL && strcmp(lsm, hp->lsm)) + continue; + return hp->hook.setprocattr(name, value, size); + } + return -EINVAL; } int security_netlink_send(struct sock *sk, struct sk_buff *skb) -- cgit v1.2.3 From 3d252529480c68bfd6a6774652df7c8968b28e41 Mon Sep 17 00:00:00 2001 From: Casey Schaufler Date: Fri, 21 Sep 2018 17:17:34 -0700 Subject: SELinux: Remove unused selinux_is_enabled There are no longer users of selinux_is_enabled(). Remove it. As selinux_is_enabled() is the only reason for include/linux/selinux.h remove that as well. Signed-off-by: Casey Schaufler Reviewed-by: Kees Cook Signed-off-by: Kees Cook --- include/linux/cred.h | 1 - include/linux/selinux.h | 35 ----------------------------------- security/selinux/Makefile | 2 +- security/selinux/exports.c | 23 ----------------------- security/selinux/hooks.c | 1 - security/selinux/include/audit.h | 3 --- security/selinux/ss/services.c | 1 - 7 files changed, 1 insertion(+), 65 deletions(-) delete mode 100644 include/linux/selinux.h delete mode 100644 security/selinux/exports.c (limited to 'include/linux') diff --git a/include/linux/cred.h b/include/linux/cred.h index 4907c9df86b3..ddd45bb74887 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include diff --git a/include/linux/selinux.h b/include/linux/selinux.h deleted file mode 100644 index 44f459612690..000000000000 --- a/include/linux/selinux.h +++ /dev/null @@ -1,35 +0,0 @@ -/* - * SELinux services exported to the rest of the kernel. - * - * Author: James Morris - * - * Copyright (C) 2005 Red Hat, Inc., James Morris - * Copyright (C) 2006 Trusted Computer Solutions, Inc. - * Copyright (C) 2006 IBM Corporation, Timothy R. Chavez - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2, - * as published by the Free Software Foundation. - */ -#ifndef _LINUX_SELINUX_H -#define _LINUX_SELINUX_H - -struct selinux_audit_rule; -struct audit_context; -struct kern_ipc_perm; - -#ifdef CONFIG_SECURITY_SELINUX - -/** - * selinux_is_enabled - is SELinux enabled? - */ -bool selinux_is_enabled(void); -#else - -static inline bool selinux_is_enabled(void) -{ - return false; -} -#endif /* CONFIG_SECURITY_SELINUX */ - -#endif /* _LINUX_SELINUX_H */ diff --git a/security/selinux/Makefile b/security/selinux/Makefile index c7161f8792b2..ccf950409384 100644 --- a/security/selinux/Makefile +++ b/security/selinux/Makefile @@ -6,7 +6,7 @@ obj-$(CONFIG_SECURITY_SELINUX) := selinux.o selinux-y := avc.o hooks.o selinuxfs.o netlink.o nlmsgtab.o netif.o \ - netnode.o netport.o ibpkey.o exports.o \ + netnode.o netport.o ibpkey.o \ ss/ebitmap.o ss/hashtab.o ss/symtab.o ss/sidtab.o ss/avtab.o \ ss/policydb.o ss/services.o ss/conditional.o ss/mls.o ss/status.o diff --git a/security/selinux/exports.c b/security/selinux/exports.c deleted file mode 100644 index e75dd94e2d2b..000000000000 --- a/security/selinux/exports.c +++ /dev/null @@ -1,23 +0,0 @@ -/* - * SELinux services exported to the rest of the kernel. - * - * Author: James Morris - * - * Copyright (C) 2005 Red Hat, Inc., James Morris - * Copyright (C) 2006 Trusted Computer Solutions, Inc. - * Copyright (C) 2006 IBM Corporation, Timothy R. Chavez - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2, - * as published by the Free Software Foundation. - */ -#include -#include - -#include "security.h" - -bool selinux_is_enabled(void) -{ - return selinux_enabled; -} -EXPORT_SYMBOL_GPL(selinux_is_enabled); diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index ad227177550b..169cf5b3334b 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -79,7 +79,6 @@ #include #include #include -#include #include #include #include diff --git a/security/selinux/include/audit.h b/security/selinux/include/audit.h index 1bdf973433cc..36e1d44c0209 100644 --- a/security/selinux/include/audit.h +++ b/security/selinux/include/audit.h @@ -1,9 +1,6 @@ /* * SELinux support for the Audit LSM hooks * - * Most of below header was moved from include/linux/selinux.h which - * is released under below copyrights: - * * Author: James Morris * * Copyright (C) 2005 Red Hat, Inc., James Morris diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index dd44126c8d14..d6e7b4856d93 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -49,7 +49,6 @@ #include #include #include -#include #include #include #include -- cgit v1.2.3 From bbd3662a834813730912a58efb44dd6df6d952e6 Mon Sep 17 00:00:00 2001 From: Casey Schaufler Date: Mon, 12 Nov 2018 09:30:56 -0800 Subject: Infrastructure management of the cred security blob Move management of the cred security blob out of the security modules and into the security infrastructre. Instead of allocating and freeing space the security modules tell the infrastructure how much space they require. Signed-off-by: Casey Schaufler Reviewed-by: Kees Cook [kees: adjusted for ordered init series] Signed-off-by: Kees Cook --- include/linux/lsm_hooks.h | 12 ++++++ security/apparmor/include/cred.h | 4 +- security/apparmor/include/lib.h | 4 ++ security/apparmor/lsm.c | 9 ++++ security/security.c | 89 ++++++++++++++++++++++++++++++++++++++- security/selinux/hooks.c | 51 +++++----------------- security/selinux/include/objsec.h | 4 +- security/smack/smack.h | 3 +- security/smack/smack_lsm.c | 79 +++++++++++----------------------- security/tomoyo/common.h | 3 +- security/tomoyo/tomoyo.c | 6 +++ 11 files changed, 162 insertions(+), 102 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 0c908c091a03..dd33666567bc 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -2027,6 +2027,13 @@ struct security_hook_list { char *lsm; } __randomize_layout; +/* + * Security blob size or offset data. + */ +struct lsm_blob_sizes { + int lbs_cred; +}; + /* * Initializing a security_hook_list structure takes * up a lot of space in a source file. This macro takes @@ -2056,6 +2063,7 @@ struct lsm_info { unsigned long flags; /* Optional: flags describing LSM */ int *enabled; /* Optional: controlled by CONFIG_LSM */ int (*init)(void); /* Required. */ + struct lsm_blob_sizes *blobs; /* Optional: for blob sharing. */ }; extern struct lsm_info __start_lsm_info[], __end_lsm_info[]; @@ -2095,4 +2103,8 @@ static inline void security_delete_hooks(struct security_hook_list *hooks, #define __lsm_ro_after_init __ro_after_init #endif /* CONFIG_SECURITY_WRITABLE_HOOKS */ +#ifdef CONFIG_SECURITY +void __init lsm_early_cred(struct cred *cred); +#endif + #endif /* ! __LINUX_LSM_HOOKS_H */ diff --git a/security/apparmor/include/cred.h b/security/apparmor/include/cred.h index a757370f2a0c..b9504a05fddc 100644 --- a/security/apparmor/include/cred.h +++ b/security/apparmor/include/cred.h @@ -25,7 +25,7 @@ static inline struct aa_label *cred_label(const struct cred *cred) { - struct aa_label **blob = cred->security; + struct aa_label **blob = cred->security + apparmor_blob_sizes.lbs_cred; AA_BUG(!blob); return *blob; @@ -34,7 +34,7 @@ static inline struct aa_label *cred_label(const struct cred *cred) static inline void set_cred_label(const struct cred *cred, struct aa_label *label) { - struct aa_label **blob = cred->security; + struct aa_label **blob = cred->security + apparmor_blob_sizes.lbs_cred; AA_BUG(!blob); *blob = label; diff --git a/security/apparmor/include/lib.h b/security/apparmor/include/lib.h index 6505e1ad9e23..bbe9b384d71d 100644 --- a/security/apparmor/include/lib.h +++ b/security/apparmor/include/lib.h @@ -16,6 +16,7 @@ #include #include +#include #include "match.h" @@ -55,6 +56,9 @@ const char *aa_splitn_fqname(const char *fqname, size_t n, const char **ns_name, size_t *ns_len); void aa_info_message(const char *str); +/* Security blob offsets */ +extern struct lsm_blob_sizes apparmor_blob_sizes; + /** * aa_strneq - compare null terminated @str to a non null terminated substring * @str: a null terminated string diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index 8c2cb4b1a6c3..d5e4a384f205 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -1151,6 +1151,13 @@ static int apparmor_inet_conn_request(struct sock *sk, struct sk_buff *skb, } #endif +/* + * The cred blob is a pointer to, not an instance of, an aa_task_ctx. + */ +struct lsm_blob_sizes apparmor_blob_sizes __lsm_ro_after_init = { + .lbs_cred = sizeof(struct aa_task_ctx *), +}; + static struct security_hook_list apparmor_hooks[] __lsm_ro_after_init = { LSM_HOOK_INIT(ptrace_access_check, apparmor_ptrace_access_check), LSM_HOOK_INIT(ptrace_traceme, apparmor_ptrace_traceme), @@ -1485,6 +1492,7 @@ static int __init set_init_ctx(void) if (!ctx) return -ENOMEM; + lsm_early_cred(cred); set_cred_label(cred, aa_get_label(ns_unconfined(root_ns))); task_ctx(current) = ctx; @@ -1725,5 +1733,6 @@ DEFINE_LSM(apparmor) = { .name = "apparmor", .flags = LSM_FLAG_LEGACY_MAJOR | LSM_FLAG_EXCLUSIVE, .enabled = &apparmor_enabled, + .blobs = &apparmor_blob_sizes, .init = apparmor_init, }; diff --git a/security/security.c b/security/security.c index 60b39db95c2f..09be8ce007a2 100644 --- a/security/security.c +++ b/security/security.c @@ -41,6 +41,8 @@ struct security_hook_heads security_hook_heads __lsm_ro_after_init; static ATOMIC_NOTIFIER_HEAD(lsm_notifier_chain); char *lsm_names; +static struct lsm_blob_sizes blob_sizes __lsm_ro_after_init; + /* Boot-time LSM user choice */ static __initdata const char *chosen_lsm_order; static __initdata const char *chosen_major_lsm; @@ -139,6 +141,25 @@ static bool __init lsm_allowed(struct lsm_info *lsm) return true; } +static void __init lsm_set_blob_size(int *need, int *lbs) +{ + int offset; + + if (*need > 0) { + offset = *lbs; + *lbs += *need; + *need = offset; + } +} + +static void __init lsm_set_blob_sizes(struct lsm_blob_sizes *needed) +{ + if (!needed) + return; + + lsm_set_blob_size(&needed->lbs_cred, &blob_sizes.lbs_cred); +} + /* Prepare LSM for initialization. */ static void __init prepare_lsm(struct lsm_info *lsm) { @@ -153,6 +174,8 @@ static void __init prepare_lsm(struct lsm_info *lsm) exclusive = lsm; init_debug("exclusive chosen: %s\n", lsm->name); } + + lsm_set_blob_sizes(lsm->blobs); } } @@ -255,6 +278,8 @@ static void __init ordered_lsm_init(void) for (lsm = ordered_lsms; *lsm; lsm++) prepare_lsm(*lsm); + init_debug("cred blob size = %d\n", blob_sizes.lbs_cred); + for (lsm = ordered_lsms; *lsm; lsm++) initialize_lsm(*lsm); @@ -382,6 +407,47 @@ int unregister_lsm_notifier(struct notifier_block *nb) } EXPORT_SYMBOL(unregister_lsm_notifier); +/** + * lsm_cred_alloc - allocate a composite cred blob + * @cred: the cred that needs a blob + * @gfp: allocation type + * + * Allocate the cred blob for all the modules + * + * Returns 0, or -ENOMEM if memory can't be allocated. + */ +static int lsm_cred_alloc(struct cred *cred, gfp_t gfp) +{ + if (blob_sizes.lbs_cred == 0) { + cred->security = NULL; + return 0; + } + + cred->security = kzalloc(blob_sizes.lbs_cred, gfp); + if (cred->security == NULL) + return -ENOMEM; + return 0; +} + +/** + * lsm_early_cred - during initialization allocate a composite cred blob + * @cred: the cred that needs a blob + * + * Allocate the cred blob for all the modules if it's not already there + */ +void __init lsm_early_cred(struct cred *cred) +{ + int rc; + + if (cred == NULL) + panic("%s: NULL cred.\n", __func__); + if (cred->security != NULL) + return; + rc = lsm_cred_alloc(cred, GFP_KERNEL); + if (rc) + panic("%s: Early cred alloc failed.\n", __func__); +} + /* * Hook list operation macros. * @@ -1195,17 +1261,36 @@ void security_task_free(struct task_struct *task) int security_cred_alloc_blank(struct cred *cred, gfp_t gfp) { - return call_int_hook(cred_alloc_blank, 0, cred, gfp); + int rc = lsm_cred_alloc(cred, gfp); + + if (rc) + return rc; + + rc = call_int_hook(cred_alloc_blank, 0, cred, gfp); + if (rc) + security_cred_free(cred); + return rc; } void security_cred_free(struct cred *cred) { call_void_hook(cred_free, cred); + + kfree(cred->security); + cred->security = NULL; } int security_prepare_creds(struct cred *new, const struct cred *old, gfp_t gfp) { - return call_int_hook(cred_prepare, 0, new, old, gfp); + int rc = lsm_cred_alloc(new, gfp); + + if (rc) + return rc; + + rc = call_int_hook(cred_prepare, 0, new, old, gfp); + if (rc) + security_cred_free(new); + return rc; } void security_transfer_creds(struct cred *new, const struct cred *old) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 169cf5b3334b..239b13b442e7 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -210,12 +210,9 @@ static void cred_init_security(void) struct cred *cred = (struct cred *) current->real_cred; struct task_security_struct *tsec; - tsec = kzalloc(sizeof(struct task_security_struct), GFP_KERNEL); - if (!tsec) - panic("SELinux: Failed to initialize initial task.\n"); - + lsm_early_cred(cred); + tsec = selinux_cred(cred); tsec->osid = tsec->sid = SECINITSID_KERNEL; - cred->security = tsec; } /* @@ -3685,47 +3682,16 @@ static int selinux_task_alloc(struct task_struct *task, sid, sid, SECCLASS_PROCESS, PROCESS__FORK, NULL); } -/* - * allocate the SELinux part of blank credentials - */ -static int selinux_cred_alloc_blank(struct cred *cred, gfp_t gfp) -{ - struct task_security_struct *tsec; - - tsec = kzalloc(sizeof(struct task_security_struct), gfp); - if (!tsec) - return -ENOMEM; - - cred->security = tsec; - return 0; -} - -/* - * detach and free the LSM part of a set of credentials - */ -static void selinux_cred_free(struct cred *cred) -{ - struct task_security_struct *tsec = selinux_cred(cred); - - kfree(tsec); -} - /* * prepare a new set of credentials for modification */ static int selinux_cred_prepare(struct cred *new, const struct cred *old, gfp_t gfp) { - const struct task_security_struct *old_tsec; - struct task_security_struct *tsec; - - old_tsec = selinux_cred(old); - - tsec = kmemdup(old_tsec, sizeof(struct task_security_struct), gfp); - if (!tsec) - return -ENOMEM; + const struct task_security_struct *old_tsec = selinux_cred(old); + struct task_security_struct *tsec = selinux_cred(new); - new->security = tsec; + *tsec = *old_tsec; return 0; } @@ -6678,6 +6644,10 @@ static void selinux_bpf_prog_free(struct bpf_prog_aux *aux) } #endif +struct lsm_blob_sizes selinux_blob_sizes __lsm_ro_after_init = { + .lbs_cred = sizeof(struct task_security_struct), +}; + static struct security_hook_list selinux_hooks[] __lsm_ro_after_init = { LSM_HOOK_INIT(binder_set_context_mgr, selinux_binder_set_context_mgr), LSM_HOOK_INIT(binder_transaction, selinux_binder_transaction), @@ -6761,8 +6731,6 @@ static struct security_hook_list selinux_hooks[] __lsm_ro_after_init = { LSM_HOOK_INIT(file_open, selinux_file_open), LSM_HOOK_INIT(task_alloc, selinux_task_alloc), - LSM_HOOK_INIT(cred_alloc_blank, selinux_cred_alloc_blank), - LSM_HOOK_INIT(cred_free, selinux_cred_free), LSM_HOOK_INIT(cred_prepare, selinux_cred_prepare), LSM_HOOK_INIT(cred_transfer, selinux_cred_transfer), LSM_HOOK_INIT(cred_getsecid, selinux_cred_getsecid), @@ -6981,6 +6949,7 @@ DEFINE_LSM(selinux) = { .name = "selinux", .flags = LSM_FLAG_LEGACY_MAJOR | LSM_FLAG_EXCLUSIVE, .enabled = &selinux_enabled, + .blobs = &selinux_blob_sizes, .init = selinux_init, }; diff --git a/security/selinux/include/objsec.h b/security/selinux/include/objsec.h index 734b6833bdff..c2974b031d05 100644 --- a/security/selinux/include/objsec.h +++ b/security/selinux/include/objsec.h @@ -25,6 +25,7 @@ #include #include #include +#include #include #include "flask.h" #include "avc.h" @@ -158,9 +159,10 @@ struct bpf_security_struct { u32 sid; /*SID of bpf obj creater*/ }; +extern struct lsm_blob_sizes selinux_blob_sizes; static inline struct task_security_struct *selinux_cred(const struct cred *cred) { - return cred->security; + return cred->security + selinux_blob_sizes.lbs_cred; } #endif /* _SELINUX_OBJSEC_H_ */ diff --git a/security/smack/smack.h b/security/smack/smack.h index 01a922856eba..b27eb252e953 100644 --- a/security/smack/smack.h +++ b/security/smack/smack.h @@ -336,6 +336,7 @@ extern struct smack_known *smack_syslog_label; extern struct smack_known *smack_unconfined; #endif extern int smack_ptrace_rule; +extern struct lsm_blob_sizes smack_blob_sizes; extern struct smack_known smack_known_floor; extern struct smack_known smack_known_hat; @@ -358,7 +359,7 @@ extern struct hlist_head smack_known_hash[SMACK_HASH_SLOTS]; static inline struct task_smack *smack_cred(const struct cred *cred) { - return cred->security; + return cred->security + smack_blob_sizes.lbs_cred; } /* diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index 9a050ca17296..bad27a8e1631 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -326,29 +326,20 @@ static struct inode_smack *new_inode_smack(struct smack_known *skp) } /** - * new_task_smack - allocate a task security blob + * init_task_smack - initialize a task security blob + * @tsp: blob to initialize * @task: a pointer to the Smack label for the running task * @forked: a pointer to the Smack label for the forked task - * @gfp: type of the memory for the allocation * - * Returns the new blob or NULL if there's no memory available */ -static struct task_smack *new_task_smack(struct smack_known *task, - struct smack_known *forked, gfp_t gfp) +static void init_task_smack(struct task_smack *tsp, struct smack_known *task, + struct smack_known *forked) { - struct task_smack *tsp; - - tsp = kzalloc(sizeof(struct task_smack), gfp); - if (tsp == NULL) - return NULL; - tsp->smk_task = task; tsp->smk_forked = forked; INIT_LIST_HEAD(&tsp->smk_rules); INIT_LIST_HEAD(&tsp->smk_relabel); mutex_init(&tsp->smk_rules_lock); - - return tsp; } /** @@ -1881,14 +1872,7 @@ static int smack_file_open(struct file *file) */ static int smack_cred_alloc_blank(struct cred *cred, gfp_t gfp) { - struct task_smack *tsp; - - tsp = new_task_smack(NULL, NULL, gfp); - if (tsp == NULL) - return -ENOMEM; - - cred->security = tsp; - + init_task_smack(smack_cred(cred), NULL, NULL); return 0; } @@ -1905,10 +1889,6 @@ static void smack_cred_free(struct cred *cred) struct list_head *l; struct list_head *n; - if (tsp == NULL) - return; - cred->security = NULL; - smk_destroy_label_list(&tsp->smk_relabel); list_for_each_safe(l, n, &tsp->smk_rules) { @@ -1916,7 +1896,6 @@ static void smack_cred_free(struct cred *cred) list_del(&rp->list); kfree(rp); } - kfree(tsp); } /** @@ -1931,14 +1910,10 @@ static int smack_cred_prepare(struct cred *new, const struct cred *old, gfp_t gfp) { struct task_smack *old_tsp = smack_cred(old); - struct task_smack *new_tsp; + struct task_smack *new_tsp = smack_cred(new); int rc; - new_tsp = new_task_smack(old_tsp->smk_task, old_tsp->smk_task, gfp); - if (new_tsp == NULL) - return -ENOMEM; - - new->security = new_tsp; + init_task_smack(new_tsp, old_tsp->smk_task, old_tsp->smk_task); rc = smk_copy_rules(&new_tsp->smk_rules, &old_tsp->smk_rules, gfp); if (rc != 0) @@ -1946,10 +1921,7 @@ static int smack_cred_prepare(struct cred *new, const struct cred *old, rc = smk_copy_relabel(&new_tsp->smk_relabel, &old_tsp->smk_relabel, gfp); - if (rc != 0) - return rc; - - return 0; + return rc; } /** @@ -4581,6 +4553,10 @@ static int smack_dentry_create_files_as(struct dentry *dentry, int mode, return 0; } +struct lsm_blob_sizes smack_blob_sizes __lsm_ro_after_init = { + .lbs_cred = sizeof(struct task_smack), +}; + static struct security_hook_list smack_hooks[] __lsm_ro_after_init = { LSM_HOOK_INIT(ptrace_access_check, smack_ptrace_access_check), LSM_HOOK_INIT(ptrace_traceme, smack_ptrace_traceme), @@ -4758,20 +4734,25 @@ static __init void init_smack_known_list(void) */ static __init int smack_init(void) { - struct cred *cred; + struct cred *cred = (struct cred *) current->cred; struct task_smack *tsp; smack_inode_cache = KMEM_CACHE(inode_smack, 0); if (!smack_inode_cache) return -ENOMEM; - tsp = new_task_smack(&smack_known_floor, &smack_known_floor, - GFP_KERNEL); - if (tsp == NULL) { - kmem_cache_destroy(smack_inode_cache); - return -ENOMEM; - } + lsm_early_cred(cred); + /* + * Set the security state for the initial task. + */ + tsp = smack_cred(cred); + init_task_smack(tsp, &smack_known_floor, &smack_known_floor); + + /* + * Register with LSM + */ + security_add_hooks(smack_hooks, ARRAY_SIZE(smack_hooks), "smack"); smack_enabled = 1; pr_info("Smack: Initializing.\n"); @@ -4785,20 +4766,9 @@ static __init int smack_init(void) pr_info("Smack: IPv6 Netfilter enabled.\n"); #endif - /* - * Set the security state for the initial task. - */ - cred = (struct cred *) current->cred; - cred->security = tsp; - /* initialize the smack_known_list */ init_smack_known_list(); - /* - * Register with LSM - */ - security_add_hooks(smack_hooks, ARRAY_SIZE(smack_hooks), "smack"); - return 0; } @@ -4809,5 +4779,6 @@ static __init int smack_init(void) DEFINE_LSM(smack) = { .name = "smack", .flags = LSM_FLAG_LEGACY_MAJOR | LSM_FLAG_EXCLUSIVE, + .blobs = &smack_blob_sizes, .init = smack_init, }; diff --git a/security/tomoyo/common.h b/security/tomoyo/common.h index 41898613d93b..4fc17294a12d 100644 --- a/security/tomoyo/common.h +++ b/security/tomoyo/common.h @@ -1087,6 +1087,7 @@ extern struct tomoyo_domain_info tomoyo_kernel_domain; extern struct tomoyo_policy_namespace tomoyo_kernel_namespace; extern unsigned int tomoyo_memory_quota[TOMOYO_MAX_MEMORY_STAT]; extern unsigned int tomoyo_memory_used[TOMOYO_MAX_MEMORY_STAT]; +extern struct lsm_blob_sizes tomoyo_blob_sizes; /********** Inlined functions. **********/ @@ -1206,7 +1207,7 @@ static inline void tomoyo_put_group(struct tomoyo_group *group) */ static inline struct tomoyo_domain_info **tomoyo_cred(const struct cred *cred) { - return (struct tomoyo_domain_info **)&cred->security; + return cred->security + tomoyo_blob_sizes.lbs_cred; } /** diff --git a/security/tomoyo/tomoyo.c b/security/tomoyo/tomoyo.c index 15864307925d..9094cf41a247 100644 --- a/security/tomoyo/tomoyo.c +++ b/security/tomoyo/tomoyo.c @@ -509,6 +509,10 @@ static int tomoyo_socket_sendmsg(struct socket *sock, struct msghdr *msg, return tomoyo_socket_sendmsg_permission(sock, msg, size); } +struct lsm_blob_sizes tomoyo_blob_sizes __lsm_ro_after_init = { + .lbs_cred = sizeof(struct tomoyo_domain_info *), +}; + /* * tomoyo_security_ops is a "struct security_operations" which is used for * registering TOMOYO. @@ -562,6 +566,7 @@ static int __init tomoyo_init(void) /* register ourselves with the security framework */ security_add_hooks(tomoyo_hooks, ARRAY_SIZE(tomoyo_hooks), "tomoyo"); printk(KERN_INFO "TOMOYO Linux initialized\n"); + lsm_early_cred(cred); blob = tomoyo_cred(cred); *blob = &tomoyo_kernel_domain; tomoyo_mm_init(); @@ -573,5 +578,6 @@ DEFINE_LSM(tomoyo) = { .name = "tomoyo", .enabled = &tomoyo_enabled, .flags = LSM_FLAG_LEGACY_MAJOR | LSM_FLAG_EXCLUSIVE, + .blobs = &tomoyo_blob_sizes, .init = tomoyo_init, }; -- cgit v1.2.3 From 33bf60cabcc7687b194a689b068b65e9ecd556be Mon Sep 17 00:00:00 2001 From: Casey Schaufler Date: Mon, 12 Nov 2018 12:02:49 -0800 Subject: LSM: Infrastructure management of the file security Move management of the file->f_security blob out of the individual security modules and into the infrastructure. The modules no longer allocate or free the data, instead they tell the infrastructure how much space they require. Signed-off-by: Casey Schaufler Reviewed-by: Kees Cook [kees: adjusted for ordered init series] Signed-off-by: Kees Cook --- include/linux/lsm_hooks.h | 1 + security/apparmor/include/file.h | 5 +++- security/apparmor/lsm.c | 19 +++++++------- security/security.c | 54 ++++++++++++++++++++++++++++++++++++--- security/selinux/hooks.c | 25 ++---------------- security/selinux/include/objsec.h | 2 +- security/smack/smack.h | 3 ++- security/smack/smack_lsm.c | 14 +--------- 8 files changed, 72 insertions(+), 51 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index dd33666567bc..e8cef019b645 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -2032,6 +2032,7 @@ struct security_hook_list { */ struct lsm_blob_sizes { int lbs_cred; + int lbs_file; }; /* diff --git a/security/apparmor/include/file.h b/security/apparmor/include/file.h index 4c2c8ac8842f..8be09208cf7c 100644 --- a/security/apparmor/include/file.h +++ b/security/apparmor/include/file.h @@ -32,7 +32,10 @@ struct path; AA_MAY_CHMOD | AA_MAY_CHOWN | AA_MAY_LOCK | \ AA_EXEC_MMAP | AA_MAY_LINK) -#define file_ctx(X) ((struct aa_file_ctx *)(X)->f_security) +static inline struct aa_file_ctx *file_ctx(struct file *file) +{ + return file->f_security + apparmor_blob_sizes.lbs_file; +} /* struct aa_file_ctx - the AppArmor context the file was opened in * @lock: lock to update the ctx diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index d5e4a384f205..6821187b06ad 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -434,21 +434,21 @@ static int apparmor_file_open(struct file *file) static int apparmor_file_alloc_security(struct file *file) { - int error = 0; - - /* freed by apparmor_file_free_security */ + struct aa_file_ctx *ctx = file_ctx(file); struct aa_label *label = begin_current_label_crit_section(); - file->f_security = aa_alloc_file_ctx(label, GFP_KERNEL); - if (!file_ctx(file)) - error = -ENOMEM; - end_current_label_crit_section(label); - return error; + spin_lock_init(&ctx->lock); + rcu_assign_pointer(ctx->label, aa_get_label(label)); + end_current_label_crit_section(label); + return 0; } static void apparmor_file_free_security(struct file *file) { - aa_free_file_ctx(file_ctx(file)); + struct aa_file_ctx *ctx = file_ctx(file); + + if (ctx) + aa_put_label(rcu_access_pointer(ctx->label)); } static int common_file_perm(const char *op, struct file *file, u32 mask) @@ -1156,6 +1156,7 @@ static int apparmor_inet_conn_request(struct sock *sk, struct sk_buff *skb, */ struct lsm_blob_sizes apparmor_blob_sizes __lsm_ro_after_init = { .lbs_cred = sizeof(struct aa_task_ctx *), + .lbs_file = sizeof(struct aa_file_ctx), }; static struct security_hook_list apparmor_hooks[] __lsm_ro_after_init = { diff --git a/security/security.c b/security/security.c index 09be8ce007a2..f32d7d2075c6 100644 --- a/security/security.c +++ b/security/security.c @@ -40,6 +40,8 @@ struct security_hook_heads security_hook_heads __lsm_ro_after_init; static ATOMIC_NOTIFIER_HEAD(lsm_notifier_chain); +static struct kmem_cache *lsm_file_cache; + char *lsm_names; static struct lsm_blob_sizes blob_sizes __lsm_ro_after_init; @@ -158,6 +160,7 @@ static void __init lsm_set_blob_sizes(struct lsm_blob_sizes *needed) return; lsm_set_blob_size(&needed->lbs_cred, &blob_sizes.lbs_cred); + lsm_set_blob_size(&needed->lbs_file, &blob_sizes.lbs_file); } /* Prepare LSM for initialization. */ @@ -279,6 +282,15 @@ static void __init ordered_lsm_init(void) prepare_lsm(*lsm); init_debug("cred blob size = %d\n", blob_sizes.lbs_cred); + init_debug("file blob size = %d\n", blob_sizes.lbs_file); + + /* + * Create any kmem_caches needed for blobs + */ + if (blob_sizes.lbs_file) + lsm_file_cache = kmem_cache_create("lsm_file_cache", + blob_sizes.lbs_file, 0, + SLAB_PANIC, NULL); for (lsm = ordered_lsms; *lsm; lsm++) initialize_lsm(*lsm); @@ -448,6 +460,27 @@ void __init lsm_early_cred(struct cred *cred) panic("%s: Early cred alloc failed.\n", __func__); } +/** + * lsm_file_alloc - allocate a composite file blob + * @file: the file that needs a blob + * + * Allocate the file blob for all the modules + * + * Returns 0, or -ENOMEM if memory can't be allocated. + */ +static int lsm_file_alloc(struct file *file) +{ + if (!lsm_file_cache) { + file->f_security = NULL; + return 0; + } + + file->f_security = kmem_cache_zalloc(lsm_file_cache, GFP_KERNEL); + if (file->f_security == NULL) + return -ENOMEM; + return 0; +} + /* * Hook list operation macros. * @@ -1144,12 +1177,27 @@ int security_file_permission(struct file *file, int mask) int security_file_alloc(struct file *file) { - return call_int_hook(file_alloc_security, 0, file); + int rc = lsm_file_alloc(file); + + if (rc) + return rc; + rc = call_int_hook(file_alloc_security, 0, file); + if (unlikely(rc)) + security_file_free(file); + return rc; } void security_file_free(struct file *file) { + void *blob; + call_void_hook(file_free_security, file); + + blob = file->f_security; + if (blob) { + file->f_security = NULL; + kmem_cache_free(lsm_file_cache, blob); + } } int security_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg) @@ -1267,7 +1315,7 @@ int security_cred_alloc_blank(struct cred *cred, gfp_t gfp) return rc; rc = call_int_hook(cred_alloc_blank, 0, cred, gfp); - if (rc) + if (unlikely(rc)) security_cred_free(cred); return rc; } @@ -1288,7 +1336,7 @@ int security_prepare_creds(struct cred *new, const struct cred *old, gfp_t gfp) return rc; rc = call_int_hook(cred_prepare, 0, new, old, gfp); - if (rc) + if (unlikely(rc)) security_cred_free(new); return rc; } diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 620be0367c0b..632813821da6 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -146,7 +146,6 @@ static int __init checkreqprot_setup(char *str) __setup("checkreqprot=", checkreqprot_setup); static struct kmem_cache *sel_inode_cache; -static struct kmem_cache *file_security_cache; /** * selinux_secmark_enabled - Check to see if SECMARK is currently enabled @@ -378,27 +377,15 @@ static void inode_free_security(struct inode *inode) static int file_alloc_security(struct file *file) { - struct file_security_struct *fsec; + struct file_security_struct *fsec = selinux_file(file); u32 sid = current_sid(); - fsec = kmem_cache_zalloc(file_security_cache, GFP_KERNEL); - if (!fsec) - return -ENOMEM; - fsec->sid = sid; fsec->fown_sid = sid; - file->f_security = fsec; return 0; } -static void file_free_security(struct file *file) -{ - struct file_security_struct *fsec = selinux_file(file); - file->f_security = NULL; - kmem_cache_free(file_security_cache, fsec); -} - static int superblock_alloc_security(struct super_block *sb) { struct superblock_security_struct *sbsec; @@ -3345,11 +3332,6 @@ static int selinux_file_alloc_security(struct file *file) return file_alloc_security(file); } -static void selinux_file_free_security(struct file *file) -{ - file_free_security(file); -} - /* * Check whether a task has the ioctl permission and cmd * operation to an inode. @@ -6646,6 +6628,7 @@ static void selinux_bpf_prog_free(struct bpf_prog_aux *aux) struct lsm_blob_sizes selinux_blob_sizes __lsm_ro_after_init = { .lbs_cred = sizeof(struct task_security_struct), + .lbs_file = sizeof(struct file_security_struct), }; static struct security_hook_list selinux_hooks[] __lsm_ro_after_init = { @@ -6717,7 +6700,6 @@ static struct security_hook_list selinux_hooks[] __lsm_ro_after_init = { LSM_HOOK_INIT(file_permission, selinux_file_permission), LSM_HOOK_INIT(file_alloc_security, selinux_file_alloc_security), - LSM_HOOK_INIT(file_free_security, selinux_file_free_security), LSM_HOOK_INIT(file_ioctl, selinux_file_ioctl), LSM_HOOK_INIT(mmap_file, selinux_mmap_file), LSM_HOOK_INIT(mmap_addr, selinux_mmap_addr), @@ -6902,9 +6884,6 @@ static __init int selinux_init(void) sel_inode_cache = kmem_cache_create("selinux_inode_security", sizeof(struct inode_security_struct), 0, SLAB_PANIC, NULL); - file_security_cache = kmem_cache_create("selinux_file_security", - sizeof(struct file_security_struct), - 0, SLAB_PANIC, NULL); avc_init(); avtab_cache_init(); diff --git a/security/selinux/include/objsec.h b/security/selinux/include/objsec.h index e0ac2992e059..96374dbf4ace 100644 --- a/security/selinux/include/objsec.h +++ b/security/selinux/include/objsec.h @@ -167,7 +167,7 @@ static inline struct task_security_struct *selinux_cred(const struct cred *cred) static inline struct file_security_struct *selinux_file(const struct file *file) { - return file->f_security; + return file->f_security + selinux_blob_sizes.lbs_file; } #endif /* _SELINUX_OBJSEC_H_ */ diff --git a/security/smack/smack.h b/security/smack/smack.h index 50854969a391..2007d38d0e46 100644 --- a/security/smack/smack.h +++ b/security/smack/smack.h @@ -364,7 +364,8 @@ static inline struct task_smack *smack_cred(const struct cred *cred) static inline struct smack_known **smack_file(const struct file *file) { - return (struct smack_known **)&file->f_security; + return (struct smack_known **)(file->f_security + + smack_blob_sizes.lbs_file); } /* diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index 8f72641f94ab..7c76668ea3a6 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -1495,18 +1495,6 @@ static int smack_file_alloc_security(struct file *file) return 0; } -/** - * smack_file_free_security - clear a file security blob - * @file: the object - * - * The security blob for a file is a pointer to the master - * label list, so no memory is freed. - */ -static void smack_file_free_security(struct file *file) -{ - file->f_security = NULL; -} - /** * smack_file_ioctl - Smack check on ioctls * @file: the object @@ -4559,6 +4547,7 @@ static int smack_dentry_create_files_as(struct dentry *dentry, int mode, struct lsm_blob_sizes smack_blob_sizes __lsm_ro_after_init = { .lbs_cred = sizeof(struct task_smack), + .lbs_file = sizeof(struct smack_known *), }; static struct security_hook_list smack_hooks[] __lsm_ro_after_init = { @@ -4595,7 +4584,6 @@ static struct security_hook_list smack_hooks[] __lsm_ro_after_init = { LSM_HOOK_INIT(inode_getsecid, smack_inode_getsecid), LSM_HOOK_INIT(file_alloc_security, smack_file_alloc_security), - LSM_HOOK_INIT(file_free_security, smack_file_free_security), LSM_HOOK_INIT(file_ioctl, smack_file_ioctl), LSM_HOOK_INIT(file_lock, smack_file_lock), LSM_HOOK_INIT(file_fcntl, smack_file_fcntl), -- cgit v1.2.3 From afb1cbe37440c7f38b9cf46fc331cc9dfd5cce21 Mon Sep 17 00:00:00 2001 From: Casey Schaufler Date: Fri, 21 Sep 2018 17:19:29 -0700 Subject: LSM: Infrastructure management of the inode security Move management of the inode->i_security blob out of the individual security modules and into the security infrastructure. Instead of allocating the blobs from within the modules the modules tell the infrastructure how much space is required, and the space is allocated there. Signed-off-by: Casey Schaufler Reviewed-by: Kees Cook [kees: adjusted for ordered init series] Signed-off-by: Kees Cook --- include/linux/lsm_hooks.h | 3 ++ security/security.c | 64 +++++++++++++++++++++++++++++++-- security/selinux/hooks.c | 37 ++++--------------- security/selinux/include/objsec.h | 9 +++-- security/smack/smack.h | 2 +- security/smack/smack_lsm.c | 76 +++++++++------------------------------ 6 files changed, 93 insertions(+), 98 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index e8cef019b645..1c798e842de2 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -2033,6 +2033,7 @@ struct security_hook_list { struct lsm_blob_sizes { int lbs_cred; int lbs_file; + int lbs_inode; }; /* @@ -2104,6 +2105,8 @@ static inline void security_delete_hooks(struct security_hook_list *hooks, #define __lsm_ro_after_init __ro_after_init #endif /* CONFIG_SECURITY_WRITABLE_HOOKS */ +extern int lsm_inode_alloc(struct inode *inode); + #ifdef CONFIG_SECURITY void __init lsm_early_cred(struct cred *cred); #endif diff --git a/security/security.c b/security/security.c index f32d7d2075c6..4989fb65e662 100644 --- a/security/security.c +++ b/security/security.c @@ -41,6 +41,7 @@ struct security_hook_heads security_hook_heads __lsm_ro_after_init; static ATOMIC_NOTIFIER_HEAD(lsm_notifier_chain); static struct kmem_cache *lsm_file_cache; +static struct kmem_cache *lsm_inode_cache; char *lsm_names; static struct lsm_blob_sizes blob_sizes __lsm_ro_after_init; @@ -161,6 +162,13 @@ static void __init lsm_set_blob_sizes(struct lsm_blob_sizes *needed) lsm_set_blob_size(&needed->lbs_cred, &blob_sizes.lbs_cred); lsm_set_blob_size(&needed->lbs_file, &blob_sizes.lbs_file); + /* + * The inode blob gets an rcu_head in addition to + * what the modules might need. + */ + if (needed->lbs_inode && blob_sizes.lbs_inode == 0) + blob_sizes.lbs_inode = sizeof(struct rcu_head); + lsm_set_blob_size(&needed->lbs_inode, &blob_sizes.lbs_inode); } /* Prepare LSM for initialization. */ @@ -283,6 +291,7 @@ static void __init ordered_lsm_init(void) init_debug("cred blob size = %d\n", blob_sizes.lbs_cred); init_debug("file blob size = %d\n", blob_sizes.lbs_file); + init_debug("inode blob size = %d\n", blob_sizes.lbs_inode); /* * Create any kmem_caches needed for blobs @@ -291,6 +300,10 @@ static void __init ordered_lsm_init(void) lsm_file_cache = kmem_cache_create("lsm_file_cache", blob_sizes.lbs_file, 0, SLAB_PANIC, NULL); + if (blob_sizes.lbs_inode) + lsm_inode_cache = kmem_cache_create("lsm_inode_cache", + blob_sizes.lbs_inode, 0, + SLAB_PANIC, NULL); for (lsm = ordered_lsms; *lsm; lsm++) initialize_lsm(*lsm); @@ -481,6 +494,27 @@ static int lsm_file_alloc(struct file *file) return 0; } +/** + * lsm_inode_alloc - allocate a composite inode blob + * @inode: the inode that needs a blob + * + * Allocate the inode blob for all the modules + * + * Returns 0, or -ENOMEM if memory can't be allocated. + */ +int lsm_inode_alloc(struct inode *inode) +{ + if (!lsm_inode_cache) { + inode->i_security = NULL; + return 0; + } + + inode->i_security = kmem_cache_zalloc(lsm_inode_cache, GFP_NOFS); + if (inode->i_security == NULL) + return -ENOMEM; + return 0; +} + /* * Hook list operation macros. * @@ -740,14 +774,40 @@ EXPORT_SYMBOL(security_add_mnt_opt); int security_inode_alloc(struct inode *inode) { - inode->i_security = NULL; - return call_int_hook(inode_alloc_security, 0, inode); + int rc = lsm_inode_alloc(inode); + + if (unlikely(rc)) + return rc; + rc = call_int_hook(inode_alloc_security, 0, inode); + if (unlikely(rc)) + security_inode_free(inode); + return rc; +} + +static void inode_free_by_rcu(struct rcu_head *head) +{ + /* + * The rcu head is at the start of the inode blob + */ + kmem_cache_free(lsm_inode_cache, head); } void security_inode_free(struct inode *inode) { integrity_inode_free(inode); call_void_hook(inode_free_security, inode); + /* + * The inode may still be referenced in a path walk and + * a call to security_inode_permission() can be made + * after inode_free_security() is called. Ideally, the VFS + * wouldn't do this, but fixing that is a much harder + * job. For now, simply free the i_security via RCU, and + * leave the current inode->i_security pointer intact. + * The inode will be freed after the RCU grace period too. + */ + if (inode->i_security) + call_rcu((struct rcu_head *)inode->i_security, + inode_free_by_rcu); } int security_dentry_init_security(struct dentry *dentry, int mode, diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 2d691e8dfbbf..23da46cd6e37 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -145,8 +145,6 @@ static int __init checkreqprot_setup(char *str) } __setup("checkreqprot=", checkreqprot_setup); -static struct kmem_cache *sel_inode_cache; - /** * selinux_secmark_enabled - Check to see if SECMARK is currently enabled * @@ -242,13 +240,9 @@ static inline u32 task_sid(const struct task_struct *task) static int inode_alloc_security(struct inode *inode) { - struct inode_security_struct *isec; + struct inode_security_struct *isec = selinux_inode(inode); u32 sid = current_sid(); - isec = kmem_cache_zalloc(sel_inode_cache, GFP_NOFS); - if (!isec) - return -ENOMEM; - spin_lock_init(&isec->lock); INIT_LIST_HEAD(&isec->list); isec->inode = inode; @@ -256,7 +250,6 @@ static int inode_alloc_security(struct inode *inode) isec->sclass = SECCLASS_FILE; isec->task_sid = sid; isec->initialized = LABEL_INVALID; - inode->i_security = isec; return 0; } @@ -334,19 +327,14 @@ static struct inode_security_struct *backing_inode_security(struct dentry *dentr return selinux_inode(inode); } -static void inode_free_rcu(struct rcu_head *head) -{ - struct inode_security_struct *isec; - - isec = container_of(head, struct inode_security_struct, rcu); - kmem_cache_free(sel_inode_cache, isec); -} - static void inode_free_security(struct inode *inode) { struct inode_security_struct *isec = selinux_inode(inode); - struct superblock_security_struct *sbsec = inode->i_sb->s_security; + struct superblock_security_struct *sbsec; + if (!isec) + return; + sbsec = inode->i_sb->s_security; /* * As not all inode security structures are in a list, we check for * empty list outside of the lock to make sure that we won't waste @@ -362,17 +350,6 @@ static void inode_free_security(struct inode *inode) list_del_init(&isec->list); spin_unlock(&sbsec->isec_lock); } - - /* - * The inode may still be referenced in a path walk and - * a call to selinux_inode_permission() can be made - * after inode_free_security() is called. Ideally, the VFS - * wouldn't do this, but fixing that is a much harder - * job. For now, simply free the i_security via RCU, and - * leave the current inode->i_security pointer intact. - * The inode will be freed after the RCU grace period too. - */ - call_rcu(&isec->rcu, inode_free_rcu); } static int file_alloc_security(struct file *file) @@ -6629,6 +6606,7 @@ static void selinux_bpf_prog_free(struct bpf_prog_aux *aux) struct lsm_blob_sizes selinux_blob_sizes __lsm_ro_after_init = { .lbs_cred = sizeof(struct task_security_struct), .lbs_file = sizeof(struct file_security_struct), + .lbs_inode = sizeof(struct inode_security_struct), }; static struct security_hook_list selinux_hooks[] __lsm_ro_after_init = { @@ -6881,9 +6859,6 @@ static __init int selinux_init(void) default_noexec = !(VM_DATA_DEFAULT_FLAGS & VM_EXEC); - sel_inode_cache = kmem_cache_create("selinux_inode_security", - sizeof(struct inode_security_struct), - 0, SLAB_PANIC, NULL); avc_init(); avtab_cache_init(); diff --git a/security/selinux/include/objsec.h b/security/selinux/include/objsec.h index 26b4ff6b4d81..562fad58c56b 100644 --- a/security/selinux/include/objsec.h +++ b/security/selinux/include/objsec.h @@ -57,10 +57,7 @@ enum label_initialized { struct inode_security_struct { struct inode *inode; /* back pointer to inode object */ - union { - struct list_head list; /* list of inode_security_struct */ - struct rcu_head rcu; /* for freeing the inode_security_struct */ - }; + struct list_head list; /* list of inode_security_struct */ u32 task_sid; /* SID of creating task */ u32 sid; /* SID of this object */ u16 sclass; /* security class of this object */ @@ -173,7 +170,9 @@ static inline struct file_security_struct *selinux_file(const struct file *file) static inline struct inode_security_struct *selinux_inode( const struct inode *inode) { - return inode->i_security; + if (unlikely(!inode->i_security)) + return NULL; + return inode->i_security + selinux_blob_sizes.lbs_inode; } #endif /* _SELINUX_OBJSEC_H_ */ diff --git a/security/smack/smack.h b/security/smack/smack.h index 436231dfae33..bf0abc35ca1c 100644 --- a/security/smack/smack.h +++ b/security/smack/smack.h @@ -370,7 +370,7 @@ static inline struct smack_known **smack_file(const struct file *file) static inline struct inode_smack *smack_inode(const struct inode *inode) { - return inode->i_security; + return inode->i_security + smack_blob_sizes.lbs_inode; } /* diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index ddffda39d107..804897c82810 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -305,24 +305,18 @@ static struct smack_known *smk_fetch(const char *name, struct inode *ip, } /** - * new_inode_smack - allocate an inode security blob + * init_inode_smack - initialize an inode security blob + * @isp: the blob to initialize * @skp: a pointer to the Smack label entry to use in the blob * - * Returns the new blob or NULL if there's no memory available */ -static struct inode_smack *new_inode_smack(struct smack_known *skp) +static void init_inode_smack(struct inode *inode, struct smack_known *skp) { - struct inode_smack *isp; - - isp = kmem_cache_zalloc(smack_inode_cache, GFP_NOFS); - if (isp == NULL) - return NULL; + struct inode_smack *isp = smack_inode(inode); isp->smk_inode = skp; isp->smk_flags = 0; mutex_init(&isp->smk_lock); - - return isp; } /** @@ -709,6 +703,13 @@ static int smack_set_mnt_opts(struct super_block *sb, if (sp->smk_flags & SMK_SB_INITIALIZED) return 0; + if (inode->i_security == NULL) { + int rc = lsm_inode_alloc(inode); + + if (rc) + return rc; + } + if (!smack_privileged(CAP_MAC_ADMIN)) { /* * Unprivileged mounts don't get to specify Smack values. @@ -773,17 +774,12 @@ static int smack_set_mnt_opts(struct super_block *sb, /* * Initialize the root inode. */ - isp = smack_inode(inode); - if (isp == NULL) { - isp = new_inode_smack(sp->smk_root); - if (isp == NULL) - return -ENOMEM; - inode->i_security = isp; - } else - isp->smk_inode = sp->smk_root; + init_inode_smack(inode, sp->smk_root); - if (transmute) + if (transmute) { + isp = smack_inode(inode); isp->smk_flags |= SMK_INODE_TRANSMUTE; + } return 0; } @@ -881,48 +877,10 @@ static int smack_inode_alloc_security(struct inode *inode) { struct smack_known *skp = smk_of_current(); - inode->i_security = new_inode_smack(skp); - if (inode->i_security == NULL) - return -ENOMEM; + init_inode_smack(inode, skp); return 0; } -/** - * smack_inode_free_rcu - Free inode_smack blob from cache - * @head: the rcu_head for getting inode_smack pointer - * - * Call back function called from call_rcu() to free - * the i_security blob pointer in inode - */ -static void smack_inode_free_rcu(struct rcu_head *head) -{ - struct inode_smack *issp; - - issp = container_of(head, struct inode_smack, smk_rcu); - kmem_cache_free(smack_inode_cache, issp); -} - -/** - * smack_inode_free_security - free an inode blob using call_rcu() - * @inode: the inode with a blob - * - * Clears the blob pointer in inode using RCU - */ -static void smack_inode_free_security(struct inode *inode) -{ - struct inode_smack *issp = smack_inode(inode); - - /* - * The inode may still be referenced in a path walk and - * a call to smack_inode_permission() can be made - * after smack_inode_free_security() is called. - * To avoid race condition free the i_security via RCU - * and leave the current inode->i_security pointer intact. - * The inode will be freed after the RCU grace period too. - */ - call_rcu(&issp->smk_rcu, smack_inode_free_rcu); -} - /** * smack_inode_init_security - copy out the smack from an inode * @inode: the newly created inode @@ -4548,6 +4506,7 @@ static int smack_dentry_create_files_as(struct dentry *dentry, int mode, struct lsm_blob_sizes smack_blob_sizes __lsm_ro_after_init = { .lbs_cred = sizeof(struct task_smack), .lbs_file = sizeof(struct smack_known *), + .lbs_inode = sizeof(struct inode_smack), }; static struct security_hook_list smack_hooks[] __lsm_ro_after_init = { @@ -4565,7 +4524,6 @@ static struct security_hook_list smack_hooks[] __lsm_ro_after_init = { LSM_HOOK_INIT(bprm_set_creds, smack_bprm_set_creds), LSM_HOOK_INIT(inode_alloc_security, smack_inode_alloc_security), - LSM_HOOK_INIT(inode_free_security, smack_inode_free_security), LSM_HOOK_INIT(inode_init_security, smack_inode_init_security), LSM_HOOK_INIT(inode_link, smack_inode_link), LSM_HOOK_INIT(inode_unlink, smack_inode_unlink), -- cgit v1.2.3 From f4ad8f2c40769b3cc9497ba0883bbaf823f7752f Mon Sep 17 00:00:00 2001 From: Casey Schaufler Date: Fri, 21 Sep 2018 17:19:37 -0700 Subject: LSM: Infrastructure management of the task security Move management of the task_struct->security blob out of the individual security modules and into the security infrastructure. Instead of allocating the blobs from within the modules the modules tell the infrastructure how much space is required, and the space is allocated there. The only user of this blob is AppArmor. The AppArmor use is abstracted to avoid future conflict. Signed-off-by: Casey Schaufler Reviewed-by: Kees Cook [kees: adjusted for ordered init series] Signed-off-by: Kees Cook --- include/linux/lsm_hooks.h | 2 ++ security/apparmor/include/task.h | 18 +++----------- security/apparmor/lsm.c | 15 +++-------- security/security.c | 54 +++++++++++++++++++++++++++++++++++++++- 4 files changed, 62 insertions(+), 27 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 1c798e842de2..9b39fefa88c4 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -2034,6 +2034,7 @@ struct lsm_blob_sizes { int lbs_cred; int lbs_file; int lbs_inode; + int lbs_task; }; /* @@ -2109,6 +2110,7 @@ extern int lsm_inode_alloc(struct inode *inode); #ifdef CONFIG_SECURITY void __init lsm_early_cred(struct cred *cred); +void __init lsm_early_task(struct task_struct *task); #endif #endif /* ! __LINUX_LSM_HOOKS_H */ diff --git a/security/apparmor/include/task.h b/security/apparmor/include/task.h index 55edaa1d83f8..039c1e60887a 100644 --- a/security/apparmor/include/task.h +++ b/security/apparmor/include/task.h @@ -14,7 +14,10 @@ #ifndef __AA_TASK_H #define __AA_TASK_H -#define task_ctx(X) ((X)->security) +static inline struct aa_task_ctx *task_ctx(struct task_struct *task) +{ + return task->security; +} /* * struct aa_task_ctx - information for current task label change @@ -36,17 +39,6 @@ int aa_set_current_hat(struct aa_label *label, u64 token); int aa_restore_previous_label(u64 cookie); struct aa_label *aa_get_task_label(struct task_struct *task); -/** - * aa_alloc_task_ctx - allocate a new task_ctx - * @flags: gfp flags for allocation - * - * Returns: allocated buffer or NULL on failure - */ -static inline struct aa_task_ctx *aa_alloc_task_ctx(gfp_t flags) -{ - return kzalloc(sizeof(struct aa_task_ctx), flags); -} - /** * aa_free_task_ctx - free a task_ctx * @ctx: task_ctx to free (MAYBE NULL) @@ -57,8 +49,6 @@ static inline void aa_free_task_ctx(struct aa_task_ctx *ctx) aa_put_label(ctx->nnp); aa_put_label(ctx->previous); aa_put_label(ctx->onexec); - - kzfree(ctx); } } diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index 6821187b06ad..60ef71268ccf 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -94,19 +94,14 @@ static void apparmor_task_free(struct task_struct *task) { aa_free_task_ctx(task_ctx(task)); - task_ctx(task) = NULL; } static int apparmor_task_alloc(struct task_struct *task, unsigned long clone_flags) { - struct aa_task_ctx *new = aa_alloc_task_ctx(GFP_KERNEL); - - if (!new) - return -ENOMEM; + struct aa_task_ctx *new = task_ctx(task); aa_dup_task_ctx(new, task_ctx(current)); - task_ctx(task) = new; return 0; } @@ -1157,6 +1152,7 @@ static int apparmor_inet_conn_request(struct sock *sk, struct sk_buff *skb, struct lsm_blob_sizes apparmor_blob_sizes __lsm_ro_after_init = { .lbs_cred = sizeof(struct aa_task_ctx *), .lbs_file = sizeof(struct aa_file_ctx), + .lbs_task = sizeof(struct aa_task_ctx), }; static struct security_hook_list apparmor_hooks[] __lsm_ro_after_init = { @@ -1487,15 +1483,10 @@ static int param_set_mode(const char *val, const struct kernel_param *kp) static int __init set_init_ctx(void) { struct cred *cred = (struct cred *)current->real_cred; - struct aa_task_ctx *ctx; - - ctx = aa_alloc_task_ctx(GFP_KERNEL); - if (!ctx) - return -ENOMEM; lsm_early_cred(cred); + lsm_early_task(current); set_cred_label(cred, aa_get_label(ns_unconfined(root_ns))); - task_ctx(current) = ctx; return 0; } diff --git a/security/security.c b/security/security.c index 4989fb65e662..e59a1e1514ee 100644 --- a/security/security.c +++ b/security/security.c @@ -169,6 +169,7 @@ static void __init lsm_set_blob_sizes(struct lsm_blob_sizes *needed) if (needed->lbs_inode && blob_sizes.lbs_inode == 0) blob_sizes.lbs_inode = sizeof(struct rcu_head); lsm_set_blob_size(&needed->lbs_inode, &blob_sizes.lbs_inode); + lsm_set_blob_size(&needed->lbs_task, &blob_sizes.lbs_task); } /* Prepare LSM for initialization. */ @@ -292,6 +293,7 @@ static void __init ordered_lsm_init(void) init_debug("cred blob size = %d\n", blob_sizes.lbs_cred); init_debug("file blob size = %d\n", blob_sizes.lbs_file); init_debug("inode blob size = %d\n", blob_sizes.lbs_inode); + init_debug("task blob size = %d\n", blob_sizes.lbs_task); /* * Create any kmem_caches needed for blobs @@ -515,6 +517,46 @@ int lsm_inode_alloc(struct inode *inode) return 0; } +/** + * lsm_task_alloc - allocate a composite task blob + * @task: the task that needs a blob + * + * Allocate the task blob for all the modules + * + * Returns 0, or -ENOMEM if memory can't be allocated. + */ +int lsm_task_alloc(struct task_struct *task) +{ + if (blob_sizes.lbs_task == 0) { + task->security = NULL; + return 0; + } + + task->security = kzalloc(blob_sizes.lbs_task, GFP_KERNEL); + if (task->security == NULL) + return -ENOMEM; + return 0; +} + +/** + * lsm_early_task - during initialization allocate a composite task blob + * @task: the task that needs a blob + * + * Allocate the task blob for all the modules if it's not already there + */ +void __init lsm_early_task(struct task_struct *task) +{ + int rc; + + if (task == NULL) + panic("%s: task cred.\n", __func__); + if (task->security != NULL) + return; + rc = lsm_task_alloc(task); + if (rc) + panic("%s: Early task alloc failed.\n", __func__); +} + /* * Hook list operation macros. * @@ -1359,12 +1401,22 @@ int security_file_open(struct file *file) int security_task_alloc(struct task_struct *task, unsigned long clone_flags) { - return call_int_hook(task_alloc, 0, task, clone_flags); + int rc = lsm_task_alloc(task); + + if (rc) + return rc; + rc = call_int_hook(task_alloc, 0, task, clone_flags); + if (unlikely(rc)) + security_task_free(task); + return rc; } void security_task_free(struct task_struct *task) { call_void_hook(task_free, task); + + kfree(task->security); + task->security = NULL; } int security_cred_alloc_blank(struct cred *cred, gfp_t gfp) -- cgit v1.2.3 From ecd5f82e05ddd9b06c258167ec7467ac79741d77 Mon Sep 17 00:00:00 2001 From: Casey Schaufler Date: Tue, 20 Nov 2018 11:55:02 -0800 Subject: LSM: Infrastructure management of the ipc security blob Move management of the kern_ipc_perm->security and msg_msg->security blobs out of the individual security modules and into the security infrastructure. Instead of allocating the blobs from within the modules the modules tell the infrastructure how much space is required, and the space is allocated there. Signed-off-by: Casey Schaufler Reviewed-by: Kees Cook [kees: adjusted for ordered init series] Signed-off-by: Kees Cook --- include/linux/lsm_hooks.h | 2 + security/security.c | 91 ++++++++++++++++++++++++++++++++++-- security/selinux/hooks.c | 98 ++++++--------------------------------- security/selinux/include/objsec.h | 4 +- security/smack/smack.h | 4 +- security/smack/smack_lsm.c | 32 ++----------- 6 files changed, 110 insertions(+), 121 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 9b39fefa88c4..40511a8a5ae6 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -2034,6 +2034,8 @@ struct lsm_blob_sizes { int lbs_cred; int lbs_file; int lbs_inode; + int lbs_ipc; + int lbs_msg_msg; int lbs_task; }; diff --git a/security/security.c b/security/security.c index e59a1e1514ee..953fc3ea18a9 100644 --- a/security/security.c +++ b/security/security.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #define MAX_LSM_EVM_XATTR 2 @@ -169,6 +170,8 @@ static void __init lsm_set_blob_sizes(struct lsm_blob_sizes *needed) if (needed->lbs_inode && blob_sizes.lbs_inode == 0) blob_sizes.lbs_inode = sizeof(struct rcu_head); lsm_set_blob_size(&needed->lbs_inode, &blob_sizes.lbs_inode); + lsm_set_blob_size(&needed->lbs_ipc, &blob_sizes.lbs_ipc); + lsm_set_blob_size(&needed->lbs_msg_msg, &blob_sizes.lbs_msg_msg); lsm_set_blob_size(&needed->lbs_task, &blob_sizes.lbs_task); } @@ -293,6 +296,8 @@ static void __init ordered_lsm_init(void) init_debug("cred blob size = %d\n", blob_sizes.lbs_cred); init_debug("file blob size = %d\n", blob_sizes.lbs_file); init_debug("inode blob size = %d\n", blob_sizes.lbs_inode); + init_debug("ipc blob size = %d\n", blob_sizes.lbs_ipc); + init_debug("msg_msg blob size = %d\n", blob_sizes.lbs_msg_msg); init_debug("task blob size = %d\n", blob_sizes.lbs_task); /* @@ -538,6 +543,48 @@ int lsm_task_alloc(struct task_struct *task) return 0; } +/** + * lsm_ipc_alloc - allocate a composite ipc blob + * @kip: the ipc that needs a blob + * + * Allocate the ipc blob for all the modules + * + * Returns 0, or -ENOMEM if memory can't be allocated. + */ +int lsm_ipc_alloc(struct kern_ipc_perm *kip) +{ + if (blob_sizes.lbs_ipc == 0) { + kip->security = NULL; + return 0; + } + + kip->security = kzalloc(blob_sizes.lbs_ipc, GFP_KERNEL); + if (kip->security == NULL) + return -ENOMEM; + return 0; +} + +/** + * lsm_msg_msg_alloc - allocate a composite msg_msg blob + * @mp: the msg_msg that needs a blob + * + * Allocate the ipc blob for all the modules + * + * Returns 0, or -ENOMEM if memory can't be allocated. + */ +int lsm_msg_msg_alloc(struct msg_msg *mp) +{ + if (blob_sizes.lbs_msg_msg == 0) { + mp->security = NULL; + return 0; + } + + mp->security = kzalloc(blob_sizes.lbs_msg_msg, GFP_KERNEL); + if (mp->security == NULL) + return -ENOMEM; + return 0; +} + /** * lsm_early_task - during initialization allocate a composite task blob * @task: the task that needs a blob @@ -1631,22 +1678,40 @@ void security_ipc_getsecid(struct kern_ipc_perm *ipcp, u32 *secid) int security_msg_msg_alloc(struct msg_msg *msg) { - return call_int_hook(msg_msg_alloc_security, 0, msg); + int rc = lsm_msg_msg_alloc(msg); + + if (unlikely(rc)) + return rc; + rc = call_int_hook(msg_msg_alloc_security, 0, msg); + if (unlikely(rc)) + security_msg_msg_free(msg); + return rc; } void security_msg_msg_free(struct msg_msg *msg) { call_void_hook(msg_msg_free_security, msg); + kfree(msg->security); + msg->security = NULL; } int security_msg_queue_alloc(struct kern_ipc_perm *msq) { - return call_int_hook(msg_queue_alloc_security, 0, msq); + int rc = lsm_ipc_alloc(msq); + + if (unlikely(rc)) + return rc; + rc = call_int_hook(msg_queue_alloc_security, 0, msq); + if (unlikely(rc)) + security_msg_queue_free(msq); + return rc; } void security_msg_queue_free(struct kern_ipc_perm *msq) { call_void_hook(msg_queue_free_security, msq); + kfree(msq->security); + msq->security = NULL; } int security_msg_queue_associate(struct kern_ipc_perm *msq, int msqflg) @@ -1673,12 +1738,21 @@ int security_msg_queue_msgrcv(struct kern_ipc_perm *msq, struct msg_msg *msg, int security_shm_alloc(struct kern_ipc_perm *shp) { - return call_int_hook(shm_alloc_security, 0, shp); + int rc = lsm_ipc_alloc(shp); + + if (unlikely(rc)) + return rc; + rc = call_int_hook(shm_alloc_security, 0, shp); + if (unlikely(rc)) + security_shm_free(shp); + return rc; } void security_shm_free(struct kern_ipc_perm *shp) { call_void_hook(shm_free_security, shp); + kfree(shp->security); + shp->security = NULL; } int security_shm_associate(struct kern_ipc_perm *shp, int shmflg) @@ -1698,12 +1772,21 @@ int security_shm_shmat(struct kern_ipc_perm *shp, char __user *shmaddr, int shmf int security_sem_alloc(struct kern_ipc_perm *sma) { - return call_int_hook(sem_alloc_security, 0, sma); + int rc = lsm_ipc_alloc(sma); + + if (unlikely(rc)) + return rc; + rc = call_int_hook(sem_alloc_security, 0, sma); + if (unlikely(rc)) + security_sem_free(sma); + return rc; } void security_sem_free(struct kern_ipc_perm *sma) { call_void_hook(sem_free_security, sma); + kfree(sma->security); + sma->security = NULL; } int security_sem_associate(struct kern_ipc_perm *sma, int semflg) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 4b64ad31326f..d98e1d8d18f6 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -5626,51 +5626,22 @@ static int selinux_netlink_send(struct sock *sk, struct sk_buff *skb) return selinux_nlmsg_perm(sk, skb); } -static int ipc_alloc_security(struct kern_ipc_perm *perm, - u16 sclass) +static void ipc_init_security(struct ipc_security_struct *isec, u16 sclass) { - struct ipc_security_struct *isec; - - isec = kzalloc(sizeof(struct ipc_security_struct), GFP_KERNEL); - if (!isec) - return -ENOMEM; - isec->sclass = sclass; isec->sid = current_sid(); - perm->security = isec; - - return 0; -} - -static void ipc_free_security(struct kern_ipc_perm *perm) -{ - struct ipc_security_struct *isec = perm->security; - perm->security = NULL; - kfree(isec); } static int msg_msg_alloc_security(struct msg_msg *msg) { struct msg_security_struct *msec; - msec = kzalloc(sizeof(struct msg_security_struct), GFP_KERNEL); - if (!msec) - return -ENOMEM; - + msec = selinux_msg_msg(msg); msec->sid = SECINITSID_UNLABELED; - msg->security = msec; return 0; } -static void msg_msg_free_security(struct msg_msg *msg) -{ - struct msg_security_struct *msec = msg->security; - - msg->security = NULL; - kfree(msec); -} - static int ipc_has_perm(struct kern_ipc_perm *ipc_perms, u32 perms) { @@ -5692,11 +5663,6 @@ static int selinux_msg_msg_alloc_security(struct msg_msg *msg) return msg_msg_alloc_security(msg); } -static void selinux_msg_msg_free_security(struct msg_msg *msg) -{ - msg_msg_free_security(msg); -} - /* message queue security operations */ static int selinux_msg_queue_alloc_security(struct kern_ipc_perm *msq) { @@ -5705,11 +5671,8 @@ static int selinux_msg_queue_alloc_security(struct kern_ipc_perm *msq) u32 sid = current_sid(); int rc; - rc = ipc_alloc_security(msq, SECCLASS_MSGQ); - if (rc) - return rc; - - isec = msq->security; + isec = selinux_ipc(msq); + ipc_init_security(isec, SECCLASS_MSGQ); ad.type = LSM_AUDIT_DATA_IPC; ad.u.ipc_id = msq->key; @@ -5717,16 +5680,7 @@ static int selinux_msg_queue_alloc_security(struct kern_ipc_perm *msq) rc = avc_has_perm(&selinux_state, sid, isec->sid, SECCLASS_MSGQ, MSGQ__CREATE, &ad); - if (rc) { - ipc_free_security(msq); - return rc; - } - return 0; -} - -static void selinux_msg_queue_free_security(struct kern_ipc_perm *msq) -{ - ipc_free_security(msq); + return rc; } static int selinux_msg_queue_associate(struct kern_ipc_perm *msq, int msqflg) @@ -5856,11 +5810,8 @@ static int selinux_shm_alloc_security(struct kern_ipc_perm *shp) u32 sid = current_sid(); int rc; - rc = ipc_alloc_security(shp, SECCLASS_SHM); - if (rc) - return rc; - - isec = shp->security; + isec = selinux_ipc(shp); + ipc_init_security(isec, SECCLASS_SHM); ad.type = LSM_AUDIT_DATA_IPC; ad.u.ipc_id = shp->key; @@ -5868,16 +5819,7 @@ static int selinux_shm_alloc_security(struct kern_ipc_perm *shp) rc = avc_has_perm(&selinux_state, sid, isec->sid, SECCLASS_SHM, SHM__CREATE, &ad); - if (rc) { - ipc_free_security(shp); - return rc; - } - return 0; -} - -static void selinux_shm_free_security(struct kern_ipc_perm *shp) -{ - ipc_free_security(shp); + return rc; } static int selinux_shm_associate(struct kern_ipc_perm *shp, int shmflg) @@ -5953,11 +5895,8 @@ static int selinux_sem_alloc_security(struct kern_ipc_perm *sma) u32 sid = current_sid(); int rc; - rc = ipc_alloc_security(sma, SECCLASS_SEM); - if (rc) - return rc; - - isec = sma->security; + isec = selinux_ipc(sma); + ipc_init_security(isec, SECCLASS_SEM); ad.type = LSM_AUDIT_DATA_IPC; ad.u.ipc_id = sma->key; @@ -5965,16 +5904,7 @@ static int selinux_sem_alloc_security(struct kern_ipc_perm *sma) rc = avc_has_perm(&selinux_state, sid, isec->sid, SECCLASS_SEM, SEM__CREATE, &ad); - if (rc) { - ipc_free_security(sma); - return rc; - } - return 0; -} - -static void selinux_sem_free_security(struct kern_ipc_perm *sma) -{ - ipc_free_security(sma); + return rc; } static int selinux_sem_associate(struct kern_ipc_perm *sma, int semflg) @@ -6607,6 +6537,8 @@ struct lsm_blob_sizes selinux_blob_sizes __lsm_ro_after_init = { .lbs_cred = sizeof(struct task_security_struct), .lbs_file = sizeof(struct file_security_struct), .lbs_inode = sizeof(struct inode_security_struct), + .lbs_ipc = sizeof(struct ipc_security_struct), + .lbs_msg_msg = sizeof(struct msg_security_struct), }; static struct security_hook_list selinux_hooks[] __lsm_ro_after_init = { @@ -6718,24 +6650,20 @@ static struct security_hook_list selinux_hooks[] __lsm_ro_after_init = { LSM_HOOK_INIT(ipc_getsecid, selinux_ipc_getsecid), LSM_HOOK_INIT(msg_msg_alloc_security, selinux_msg_msg_alloc_security), - LSM_HOOK_INIT(msg_msg_free_security, selinux_msg_msg_free_security), LSM_HOOK_INIT(msg_queue_alloc_security, selinux_msg_queue_alloc_security), - LSM_HOOK_INIT(msg_queue_free_security, selinux_msg_queue_free_security), LSM_HOOK_INIT(msg_queue_associate, selinux_msg_queue_associate), LSM_HOOK_INIT(msg_queue_msgctl, selinux_msg_queue_msgctl), LSM_HOOK_INIT(msg_queue_msgsnd, selinux_msg_queue_msgsnd), LSM_HOOK_INIT(msg_queue_msgrcv, selinux_msg_queue_msgrcv), LSM_HOOK_INIT(shm_alloc_security, selinux_shm_alloc_security), - LSM_HOOK_INIT(shm_free_security, selinux_shm_free_security), LSM_HOOK_INIT(shm_associate, selinux_shm_associate), LSM_HOOK_INIT(shm_shmctl, selinux_shm_shmctl), LSM_HOOK_INIT(shm_shmat, selinux_shm_shmat), LSM_HOOK_INIT(sem_alloc_security, selinux_sem_alloc_security), - LSM_HOOK_INIT(sem_free_security, selinux_sem_free_security), LSM_HOOK_INIT(sem_associate, selinux_sem_associate), LSM_HOOK_INIT(sem_semctl, selinux_sem_semctl), LSM_HOOK_INIT(sem_semop, selinux_sem_semop), diff --git a/security/selinux/include/objsec.h b/security/selinux/include/objsec.h index 539cacf4a572..231262d8eac9 100644 --- a/security/selinux/include/objsec.h +++ b/security/selinux/include/objsec.h @@ -179,13 +179,13 @@ static inline struct inode_security_struct *selinux_inode( static inline struct msg_security_struct *selinux_msg_msg( const struct msg_msg *msg_msg) { - return msg_msg->security; + return msg_msg->security + selinux_blob_sizes.lbs_msg_msg; } static inline struct ipc_security_struct *selinux_ipc( const struct kern_ipc_perm *ipc) { - return ipc->security; + return ipc->security + selinux_blob_sizes.lbs_ipc; } #endif /* _SELINUX_OBJSEC_H_ */ diff --git a/security/smack/smack.h b/security/smack/smack.h index 0adddbeecc62..9c7c95a5c497 100644 --- a/security/smack/smack.h +++ b/security/smack/smack.h @@ -376,12 +376,12 @@ static inline struct inode_smack *smack_inode(const struct inode *inode) static inline struct smack_known **smack_msg_msg(const struct msg_msg *msg) { - return (struct smack_known **)&msg->security; + return msg->security + smack_blob_sizes.lbs_msg_msg; } static inline struct smack_known **smack_ipc(const struct kern_ipc_perm *ipc) { - return (struct smack_known **)&ipc->security; + return ipc->security + smack_blob_sizes.lbs_ipc; } /* diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index 154521b6843b..0b848b1f6366 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -2809,23 +2809,12 @@ static int smack_flags_to_may(int flags) */ static int smack_msg_msg_alloc_security(struct msg_msg *msg) { - struct smack_known *skp = smk_of_current(); + struct smack_known **blob = smack_msg_msg(msg); - msg->security = skp; + *blob = smk_of_current(); return 0; } -/** - * smack_msg_msg_free_security - Clear the security blob for msg_msg - * @msg: the object - * - * Clears the blob pointer - */ -static void smack_msg_msg_free_security(struct msg_msg *msg) -{ - msg->security = NULL; -} - /** * smack_of_ipc - the smack pointer for the ipc * @isp: the object @@ -2853,17 +2842,6 @@ static int smack_ipc_alloc_security(struct kern_ipc_perm *isp) return 0; } -/** - * smack_ipc_free_security - Clear the security blob for ipc - * @isp: the object - * - * Clears the blob pointer - */ -static void smack_ipc_free_security(struct kern_ipc_perm *isp) -{ - isp->security = NULL; -} - /** * smk_curacc_shm : check if current has access on shm * @isp : the object @@ -4511,6 +4489,8 @@ struct lsm_blob_sizes smack_blob_sizes __lsm_ro_after_init = { .lbs_cred = sizeof(struct task_smack), .lbs_file = sizeof(struct smack_known *), .lbs_inode = sizeof(struct inode_smack), + .lbs_ipc = sizeof(struct smack_known *), + .lbs_msg_msg = sizeof(struct smack_known *), }; static struct security_hook_list smack_hooks[] __lsm_ro_after_init = { @@ -4581,23 +4561,19 @@ static struct security_hook_list smack_hooks[] __lsm_ro_after_init = { LSM_HOOK_INIT(ipc_getsecid, smack_ipc_getsecid), LSM_HOOK_INIT(msg_msg_alloc_security, smack_msg_msg_alloc_security), - LSM_HOOK_INIT(msg_msg_free_security, smack_msg_msg_free_security), LSM_HOOK_INIT(msg_queue_alloc_security, smack_ipc_alloc_security), - LSM_HOOK_INIT(msg_queue_free_security, smack_ipc_free_security), LSM_HOOK_INIT(msg_queue_associate, smack_msg_queue_associate), LSM_HOOK_INIT(msg_queue_msgctl, smack_msg_queue_msgctl), LSM_HOOK_INIT(msg_queue_msgsnd, smack_msg_queue_msgsnd), LSM_HOOK_INIT(msg_queue_msgrcv, smack_msg_queue_msgrcv), LSM_HOOK_INIT(shm_alloc_security, smack_ipc_alloc_security), - LSM_HOOK_INIT(shm_free_security, smack_ipc_free_security), LSM_HOOK_INIT(shm_associate, smack_shm_associate), LSM_HOOK_INIT(shm_shmctl, smack_shm_shmctl), LSM_HOOK_INIT(shm_shmat, smack_shm_shmat), LSM_HOOK_INIT(sem_alloc_security, smack_ipc_alloc_security), - LSM_HOOK_INIT(sem_free_security, smack_ipc_free_security), LSM_HOOK_INIT(sem_associate, smack_sem_associate), LSM_HOOK_INIT(sem_semctl, smack_sem_semctl), LSM_HOOK_INIT(sem_semop, smack_sem_semop), -- cgit v1.2.3 From 0ada768517dafa1504ef5986ba04f118b7436960 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 8 Jan 2019 16:07:27 +0200 Subject: RDMA/mlx5: Delete declaration of already removed function The implementation of mlx5_core_page_fault_resume() was removed in commit d5d284b829a6 ("{net,IB}/mlx5: Move Page fault EQ and ODP logic to RDMA"). This patch removes declaration too. Fixes: d5d284b829a6 ("{net,IB}/mlx5: Move Page fault EQ and ODP logic to RDMA") Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/linux/mlx5/driver.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 54299251d40d..b6f5839f129a 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -939,10 +939,6 @@ int mlx5_query_odp_caps(struct mlx5_core_dev *dev, struct mlx5_odp_caps *odp_caps); int mlx5_core_query_ib_ppcnt(struct mlx5_core_dev *dev, u8 port_num, void *out, size_t sz); -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING -int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 token, - u32 wq_num, u8 type, int error); -#endif int mlx5_init_rl_table(struct mlx5_core_dev *dev); void mlx5_cleanup_rl_table(struct mlx5_core_dev *dev); -- cgit v1.2.3 From f3186dd876697e696d07136623d5cf0a6fb0bc0f Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Mon, 7 Jan 2019 16:51:50 +0100 Subject: spi: Optionally use GPIO descriptors for CS GPIOs This augments the SPI core to optionally use GPIO descriptors for chip select on a per-master-driver opt-in basis. Drivers using this will rely on the SPI core to look up GPIO descriptors associated with the device, such as when using device tree or board files with GPIO descriptor tables. When getting descriptors from the device tree, this will in turn activate the code in gpiolib that was added in commit 6953c57ab172 ("gpio: of: Handle SPI chipselect legacy bindings") which means that these descriptors are aware of the active low semantics that is the default for SPI CS GPIO lines and we can assume that all of these are "active high" and thus assign SPI_CS_HIGH to all CS lines on the DT path. The previously used gpio_set_value() would call down into gpiod_set_raw_value() and ignore the polarity inversion semantics. It seems like many drivers go to great lengths to set up the CS GPIO line as non-asserted, respecting SPI_CS_HIGH. We pull this out of the SPI drivers and into the core, and by simply requesting the line as GPIOD_OUT_LOW when retrieveing it from the device and relying on the gpiolib to handle any inversion semantics. This way a lot of code can be simplified and removed in each converted driver. The end goal after dealing with each driver in turn, is to delete the non-descriptor path (of_spi_register_master() for example) and let the core deal with only descriptors. The different SPI drivers have complex interactions with the core so we cannot simply change them all over, we need to use a stepwise, bisectable approach so that each driver can be converted and fixed in isolation. This patch has the intended side effect of adding support for ACPI GPIOs as it starts relying on gpiod_get_*() to get the GPIO handle associated with the device. Cc: Linuxarm Acked-by: Jonathan Cameron Tested-by: Fangjian (Turing) Signed-off-by: Linus Walleij Signed-off-by: Mark Brown --- drivers/spi/spi.c | 104 +++++++++++++++++++++++++++++++++++++++++++----- include/linux/spi/spi.h | 23 +++++++++-- 2 files changed, 113 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 9a7def7c3237..13f447a67d67 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -578,7 +579,10 @@ int spi_add_device(struct spi_device *spi) goto done; } - if (ctlr->cs_gpios) + /* Descriptors take precedence */ + if (ctlr->cs_gpiods) + spi->cs_gpiod = ctlr->cs_gpiods[spi->chip_select]; + else if (ctlr->cs_gpios) spi->cs_gpio = ctlr->cs_gpios[spi->chip_select]; /* Drivers may modify this initial i/o setup, but will @@ -772,10 +776,20 @@ static void spi_set_cs(struct spi_device *spi, bool enable) if (spi->mode & SPI_CS_HIGH) enable = !enable; - if (gpio_is_valid(spi->cs_gpio)) { - /* Honour the SPI_NO_CS flag */ - if (!(spi->mode & SPI_NO_CS)) - gpio_set_value(spi->cs_gpio, !enable); + if (spi->cs_gpiod || gpio_is_valid(spi->cs_gpio)) { + /* + * Honour the SPI_NO_CS flag and invert the enable line, as + * active low is default for SPI. Execution paths that handle + * polarity inversion in gpiolib (such as device tree) will + * enforce active high using the SPI_CS_HIGH resulting in a + * double inversion through the code above. + */ + if (!(spi->mode & SPI_NO_CS)) { + if (spi->cs_gpiod) + gpiod_set_value(spi->cs_gpiod, !enable); + else + gpio_set_value(spi->cs_gpio, !enable); + } /* Some SPI masters need both GPIO CS & slave_select */ if ((spi->controller->flags & SPI_MASTER_GPIO_SS) && spi->controller->set_cs) @@ -1615,13 +1629,21 @@ static int of_spi_parse_dt(struct spi_controller *ctlr, struct spi_device *spi, spi->mode |= SPI_CPHA; if (of_property_read_bool(nc, "spi-cpol")) spi->mode |= SPI_CPOL; - if (of_property_read_bool(nc, "spi-cs-high")) - spi->mode |= SPI_CS_HIGH; if (of_property_read_bool(nc, "spi-3wire")) spi->mode |= SPI_3WIRE; if (of_property_read_bool(nc, "spi-lsb-first")) spi->mode |= SPI_LSB_FIRST; + /* + * For descriptors associated with the device, polarity inversion is + * handled in the gpiolib, so all chip selects are "active high" in + * the logical sense, the gpiolib will invert the line if need be. + */ + if (ctlr->use_gpio_descriptors) + spi->mode |= SPI_CS_HIGH; + else if (of_property_read_bool(nc, "spi-cs-high")) + spi->mode |= SPI_CS_HIGH; + /* Device DUAL/QUAD mode */ if (!of_property_read_u32(nc, "spi-tx-bus-width", &value)) { switch (value) { @@ -2137,6 +2159,60 @@ static int of_spi_register_master(struct spi_controller *ctlr) } #endif +/** + * spi_get_gpio_descs() - grab chip select GPIOs for the master + * @ctlr: The SPI master to grab GPIO descriptors for + */ +static int spi_get_gpio_descs(struct spi_controller *ctlr) +{ + int nb, i; + struct gpio_desc **cs; + struct device *dev = &ctlr->dev; + + nb = gpiod_count(dev, "cs"); + ctlr->num_chipselect = max_t(int, nb, ctlr->num_chipselect); + + /* No GPIOs at all is fine, else return the error */ + if (nb == 0 || nb == -ENOENT) + return 0; + else if (nb < 0) + return nb; + + cs = devm_kcalloc(dev, ctlr->num_chipselect, sizeof(*cs), + GFP_KERNEL); + if (!cs) + return -ENOMEM; + ctlr->cs_gpiods = cs; + + for (i = 0; i < nb; i++) { + /* + * Most chipselects are active low, the inverted + * semantics are handled by special quirks in gpiolib, + * so initializing them GPIOD_OUT_LOW here means + * "unasserted", in most cases this will drive the physical + * line high. + */ + cs[i] = devm_gpiod_get_index_optional(dev, "cs", i, + GPIOD_OUT_LOW); + + if (cs[i]) { + /* + * If we find a CS GPIO, name it after the device and + * chip select line. + */ + char *gpioname; + + gpioname = devm_kasprintf(dev, GFP_KERNEL, "%s CS%d", + dev_name(dev), i); + if (!gpioname) + return -ENOMEM; + gpiod_set_consumer_name(cs[i], gpioname); + } + } + + return 0; +} + static int spi_controller_check_ops(struct spi_controller *ctlr) { /* @@ -2199,9 +2275,16 @@ int spi_register_controller(struct spi_controller *ctlr) return status; if (!spi_controller_is_slave(ctlr)) { - status = of_spi_register_master(ctlr); - if (status) - return status; + if (ctlr->use_gpio_descriptors) { + status = spi_get_gpio_descs(ctlr); + if (status) + return status; + } else { + /* Legacy code path for GPIOs from DT */ + status = of_spi_register_master(ctlr); + if (status) + return status; + } } /* even if it's just one always-selected device, there must @@ -2915,6 +2998,7 @@ static int __spi_validate(struct spi_device *spi, struct spi_message *message) * cs_change is set for each transfer. */ if ((spi->mode & SPI_CS_WORD) && (!(ctlr->mode_bits & SPI_CS_WORD) || + spi->cs_gpiod || gpio_is_valid(spi->cs_gpio))) { size_t maxsize; int ret; diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 314d922ca607..916bba47d156 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -12,6 +12,7 @@ #include #include #include +#include struct dma_chan; struct property_entry; @@ -116,7 +117,10 @@ void spi_statistics_add_transfer_stats(struct spi_statistics *stats, * @modalias: Name of the driver to use with this device, or an alias * for that name. This appears in the sysfs "modalias" attribute * for driver coldplugging, and in uevents used for hotplugging - * @cs_gpio: gpio number of the chipselect line (optional, -ENOENT when + * @cs_gpio: LEGACY: gpio number of the chipselect line (optional, -ENOENT when + * not using a GPIO line) use cs_gpiod in new drivers by opting in on + * the spi_master. + * @cs_gpiod: gpio descriptor of the chipselect line (optional, NULL when * not using a GPIO line) * * @statistics: statistics for the spi_device @@ -163,7 +167,8 @@ struct spi_device { void *controller_data; char modalias[SPI_NAME_SIZE]; const char *driver_override; - int cs_gpio; /* chip select gpio */ + int cs_gpio; /* LEGACY: chip select gpio */ + struct gpio_desc *cs_gpiod; /* chip select gpio desc */ /* the statistics */ struct spi_statistics statistics; @@ -376,9 +381,17 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv) * controller has native support for memory like operations. * @unprepare_message: undo any work done by prepare_message(). * @slave_abort: abort the ongoing transfer request on an SPI slave controller - * @cs_gpios: Array of GPIOs to use as chip select lines; one per CS - * number. Any individual value may be -ENOENT for CS lines that + * @cs_gpios: LEGACY: array of GPIO descs to use as chip select lines; one per + * CS number. Any individual value may be -ENOENT for CS lines that + * are not GPIOs (driven by the SPI controller itself). Use the cs_gpiods + * in new drivers. + * @cs_gpiods: Array of GPIO descs to use as chip select lines; one per CS + * number. Any individual value may be NULL for CS lines that * are not GPIOs (driven by the SPI controller itself). + * @use_gpio_descriptors: Turns on the code in the SPI core to parse and grab + * GPIO descriptors rather than using global GPIO numbers grabbed by the + * driver. This will fill in @cs_gpiods and @cs_gpios should not be used, + * and SPI devices will have the cs_gpiod assigned rather than cs_gpio. * @statistics: statistics for the spi_controller * @dma_tx: DMA transmit channel * @dma_rx: DMA receive channel @@ -557,6 +570,8 @@ struct spi_controller { /* gpio chip select */ int *cs_gpios; + struct gpio_desc **cs_gpiods; + bool use_gpio_descriptors; /* statistics */ struct spi_statistics statistics; -- cgit v1.2.3 From 5e6acc3e678ed3db746ab4fb53a980861cd711b6 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 12 Dec 2018 15:51:47 -0800 Subject: bcm2835-pm: Move bcm2835-watchdog's DT probe to an MFD. The PM block that the wdt driver was binding to actually has multiple features we want to expose (power domains, reset, watchdog). Move the DT attachment to a MFD driver and make WDT probe against MFD. Signed-off-by: Eric Anholt Reviewed-by: Guenter Roeck Acked-by: Stefan Wahren Signed-off-by: Stefan Wahren --- arch/arm/mach-bcm/Kconfig | 1 + drivers/mfd/Makefile | 1 + drivers/mfd/bcm2835-pm.c | 64 ++++++++++++++++++++++++++++++++++++++++++ drivers/watchdog/bcm2835_wdt.c | 26 ++++++----------- include/linux/mfd/bcm2835-pm.h | 13 +++++++++ 5 files changed, 88 insertions(+), 17 deletions(-) create mode 100644 drivers/mfd/bcm2835-pm.c create mode 100644 include/linux/mfd/bcm2835-pm.h (limited to 'include/linux') diff --git a/arch/arm/mach-bcm/Kconfig b/arch/arm/mach-bcm/Kconfig index a067adf9f1ee..4ef1e55f4a0b 100644 --- a/arch/arm/mach-bcm/Kconfig +++ b/arch/arm/mach-bcm/Kconfig @@ -167,6 +167,7 @@ config ARCH_BCM2835 select BCM2835_TIMER select PINCTRL select PINCTRL_BCM2835 + select MFD_CORE help This enables support for the Broadcom BCM2835 and BCM2836 SoCs. This SoC is used in the Raspberry Pi and Roku 2 devices. diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile index 12980a4ad460..ee6fb6af655e 100644 --- a/drivers/mfd/Makefile +++ b/drivers/mfd/Makefile @@ -10,6 +10,7 @@ obj-$(CONFIG_MFD_88PM805) += 88pm805.o 88pm80x.o obj-$(CONFIG_MFD_ACT8945A) += act8945a.o obj-$(CONFIG_MFD_SM501) += sm501.o obj-$(CONFIG_MFD_ASIC3) += asic3.o tmio_core.o +obj-$(CONFIG_ARCH_BCM2835) += bcm2835-pm.o obj-$(CONFIG_MFD_BCM590XX) += bcm590xx.o obj-$(CONFIG_MFD_BD9571MWV) += bd9571mwv.o cros_ec_core-objs := cros_ec.o diff --git a/drivers/mfd/bcm2835-pm.c b/drivers/mfd/bcm2835-pm.c new file mode 100644 index 000000000000..53839e6a81e7 --- /dev/null +++ b/drivers/mfd/bcm2835-pm.c @@ -0,0 +1,64 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * PM MFD driver for Broadcom BCM2835 + * + * This driver binds to the PM block and creates the MFD device for + * the WDT driver. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static const struct mfd_cell bcm2835_pm_devs[] = { + { .name = "bcm2835-wdt" }, +}; + +static int bcm2835_pm_probe(struct platform_device *pdev) +{ + struct resource *res; + struct device *dev = &pdev->dev; + struct bcm2835_pm *pm; + + pm = devm_kzalloc(dev, sizeof(*pm), GFP_KERNEL); + if (!pm) + return -ENOMEM; + platform_set_drvdata(pdev, pm); + + pm->dev = dev; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + pm->base = devm_ioremap_resource(dev, res); + if (IS_ERR(pm->base)) + return PTR_ERR(pm->base); + + return devm_mfd_add_devices(dev, -1, + bcm2835_pm_devs, ARRAY_SIZE(bcm2835_pm_devs), + NULL, 0, NULL); +} + +static const struct of_device_id bcm2835_pm_of_match[] = { + { .compatible = "brcm,bcm2835-pm-wdt", }, + {}, +}; +MODULE_DEVICE_TABLE(of, bcm2835_pm_of_match); + +static struct platform_driver bcm2835_pm_driver = { + .probe = bcm2835_pm_probe, + .driver = { + .name = "bcm2835-pm", + .of_match_table = bcm2835_pm_of_match, + }, +}; +module_platform_driver(bcm2835_pm_driver); + +MODULE_AUTHOR("Eric Anholt "); +MODULE_DESCRIPTION("Driver for Broadcom BCM2835 PM MFD"); +MODULE_LICENSE("GPL"); diff --git a/drivers/watchdog/bcm2835_wdt.c b/drivers/watchdog/bcm2835_wdt.c index ed05514cc2dc..1834524ae373 100644 --- a/drivers/watchdog/bcm2835_wdt.c +++ b/drivers/watchdog/bcm2835_wdt.c @@ -12,6 +12,7 @@ #include #include +#include #include #include #include @@ -47,6 +48,8 @@ struct bcm2835_wdt { spinlock_t lock; }; +static struct bcm2835_wdt *bcm2835_power_off_wdt; + static unsigned int heartbeat; static bool nowayout = WATCHDOG_NOWAYOUT; @@ -148,10 +151,7 @@ static struct watchdog_device bcm2835_wdt_wdd = { */ static void bcm2835_power_off(void) { - struct device_node *np = - of_find_compatible_node(NULL, NULL, "brcm,bcm2835-pm-wdt"); - struct platform_device *pdev = of_find_device_by_node(np); - struct bcm2835_wdt *wdt = platform_get_drvdata(pdev); + struct bcm2835_wdt *wdt = bcm2835_power_off_wdt; u32 val; /* @@ -169,7 +169,7 @@ static void bcm2835_power_off(void) static int bcm2835_wdt_probe(struct platform_device *pdev) { - struct resource *res; + struct bcm2835_pm *pm = dev_get_drvdata(pdev->dev.parent); struct device *dev = &pdev->dev; struct bcm2835_wdt *wdt; int err; @@ -181,10 +181,7 @@ static int bcm2835_wdt_probe(struct platform_device *pdev) spin_lock_init(&wdt->lock); - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - wdt->base = devm_ioremap_resource(dev, res); - if (IS_ERR(wdt->base)) - return PTR_ERR(wdt->base); + wdt->base = pm->base; watchdog_set_drvdata(&bcm2835_wdt_wdd, wdt); watchdog_init_timeout(&bcm2835_wdt_wdd, heartbeat, dev); @@ -211,8 +208,10 @@ static int bcm2835_wdt_probe(struct platform_device *pdev) return err; } - if (pm_power_off == NULL) + if (pm_power_off == NULL) { pm_power_off = bcm2835_power_off; + bcm2835_power_off_wdt = wdt; + } dev_info(dev, "Broadcom BCM2835 watchdog timer"); return 0; @@ -226,18 +225,11 @@ static int bcm2835_wdt_remove(struct platform_device *pdev) return 0; } -static const struct of_device_id bcm2835_wdt_of_match[] = { - { .compatible = "brcm,bcm2835-pm-wdt", }, - {}, -}; -MODULE_DEVICE_TABLE(of, bcm2835_wdt_of_match); - static struct platform_driver bcm2835_wdt_driver = { .probe = bcm2835_wdt_probe, .remove = bcm2835_wdt_remove, .driver = { .name = "bcm2835-wdt", - .of_match_table = bcm2835_wdt_of_match, }, }; module_platform_driver(bcm2835_wdt_driver); diff --git a/include/linux/mfd/bcm2835-pm.h b/include/linux/mfd/bcm2835-pm.h new file mode 100644 index 000000000000..b7d0ee1feffa --- /dev/null +++ b/include/linux/mfd/bcm2835-pm.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ + +#ifndef BCM2835_MFD_PM_H +#define BCM2835_MFD_PM_H + +#include + +struct bcm2835_pm { + struct device *dev; + void __iomem *base; +}; + +#endif /* BCM2835_MFD_PM_H */ -- cgit v1.2.3 From 670c672608a1ffcbc7ac0f872734843593bb8b15 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 12 Dec 2018 15:51:48 -0800 Subject: soc: bcm: bcm2835-pm: Add support for power domains under a new binding. This provides a free software alternative to raspberrypi-power.c's firmware calls to manage power domains. It also exposes a reset line, where previously the vc4 driver had to try to force power off the domain in order to trigger a reset. Signed-off-by: Eric Anholt Acked-by: Rob Herring Acked-by: Stefan Wahren Signed-off-by: Stefan Wahren --- drivers/mfd/bcm2835-pm.c | 36 +- drivers/soc/bcm/Kconfig | 11 + drivers/soc/bcm/Makefile | 1 + drivers/soc/bcm/bcm2835-power.c | 661 +++++++++++++++++++++++++++++++++++ include/dt-bindings/soc/bcm2835-pm.h | 28 ++ include/linux/mfd/bcm2835-pm.h | 1 + 6 files changed, 734 insertions(+), 4 deletions(-) create mode 100644 drivers/soc/bcm/bcm2835-power.c create mode 100644 include/dt-bindings/soc/bcm2835-pm.h (limited to 'include/linux') diff --git a/drivers/mfd/bcm2835-pm.c b/drivers/mfd/bcm2835-pm.c index 53839e6a81e7..42fe67f1538e 100644 --- a/drivers/mfd/bcm2835-pm.c +++ b/drivers/mfd/bcm2835-pm.c @@ -3,7 +3,7 @@ * PM MFD driver for Broadcom BCM2835 * * This driver binds to the PM block and creates the MFD device for - * the WDT driver. + * the WDT and power drivers. */ #include @@ -21,11 +21,16 @@ static const struct mfd_cell bcm2835_pm_devs[] = { { .name = "bcm2835-wdt" }, }; +static const struct mfd_cell bcm2835_power_devs[] = { + { .name = "bcm2835-power" }, +}; + static int bcm2835_pm_probe(struct platform_device *pdev) { struct resource *res; struct device *dev = &pdev->dev; struct bcm2835_pm *pm; + int ret; pm = devm_kzalloc(dev, sizeof(*pm), GFP_KERNEL); if (!pm) @@ -39,13 +44,36 @@ static int bcm2835_pm_probe(struct platform_device *pdev) if (IS_ERR(pm->base)) return PTR_ERR(pm->base); - return devm_mfd_add_devices(dev, -1, - bcm2835_pm_devs, ARRAY_SIZE(bcm2835_pm_devs), - NULL, 0, NULL); + ret = devm_mfd_add_devices(dev, -1, + bcm2835_pm_devs, ARRAY_SIZE(bcm2835_pm_devs), + NULL, 0, NULL); + if (ret) + return ret; + + /* We'll use the presence of the AXI ASB regs in the + * bcm2835-pm binding as the key for whether we can reference + * the full PM register range and support power domains. + */ + res = platform_get_resource(pdev, IORESOURCE_MEM, 1); + if (res) { + pm->asb = devm_ioremap_resource(dev, res); + if (IS_ERR(pm->asb)) + return PTR_ERR(pm->asb); + + ret = devm_mfd_add_devices(dev, -1, + bcm2835_power_devs, + ARRAY_SIZE(bcm2835_power_devs), + NULL, 0, NULL); + if (ret) + return ret; + } + + return 0; } static const struct of_device_id bcm2835_pm_of_match[] = { { .compatible = "brcm,bcm2835-pm-wdt", }, + { .compatible = "brcm,bcm2835-pm", }, {}, }; MODULE_DEVICE_TABLE(of, bcm2835_pm_of_match); diff --git a/drivers/soc/bcm/Kconfig b/drivers/soc/bcm/Kconfig index 055a845ed979..fe1af29560e9 100644 --- a/drivers/soc/bcm/Kconfig +++ b/drivers/soc/bcm/Kconfig @@ -1,5 +1,16 @@ menu "Broadcom SoC drivers" +config BCM2835_POWER + bool "BCM2835 power domain driver" + depends on ARCH_BCM2835 || (COMPILE_TEST && OF) + select PM_GENERIC_DOMAINS if PM + select RESET_CONTROLLER + help + This enables support for the BCM2835 power domains and reset + controller. Any usage of power domains by the Raspberry Pi + firmware means that Linux usage of the same power domain + must be accessed using the RASPBERRYPI_POWER driver + config RASPBERRYPI_POWER bool "Raspberry Pi power domain driver" depends on ARCH_BCM2835 || (COMPILE_TEST && OF) diff --git a/drivers/soc/bcm/Makefile b/drivers/soc/bcm/Makefile index dc4fced72d21..c81df4b2403c 100644 --- a/drivers/soc/bcm/Makefile +++ b/drivers/soc/bcm/Makefile @@ -1,2 +1,3 @@ +obj-$(CONFIG_BCM2835_POWER) += bcm2835-power.o obj-$(CONFIG_RASPBERRYPI_POWER) += raspberrypi-power.o obj-$(CONFIG_SOC_BRCMSTB) += brcmstb/ diff --git a/drivers/soc/bcm/bcm2835-power.c b/drivers/soc/bcm/bcm2835-power.c new file mode 100644 index 000000000000..48412957ec7a --- /dev/null +++ b/drivers/soc/bcm/bcm2835-power.c @@ -0,0 +1,661 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Power domain driver for Broadcom BCM2835 + * + * Copyright (C) 2018 Broadcom + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define PM_GNRIC 0x00 +#define PM_AUDIO 0x04 +#define PM_STATUS 0x18 +#define PM_RSTC 0x1c +#define PM_RSTS 0x20 +#define PM_WDOG 0x24 +#define PM_PADS0 0x28 +#define PM_PADS2 0x2c +#define PM_PADS3 0x30 +#define PM_PADS4 0x34 +#define PM_PADS5 0x38 +#define PM_PADS6 0x3c +#define PM_CAM0 0x44 +#define PM_CAM0_LDOHPEN BIT(2) +#define PM_CAM0_LDOLPEN BIT(1) +#define PM_CAM0_CTRLEN BIT(0) + +#define PM_CAM1 0x48 +#define PM_CAM1_LDOHPEN BIT(2) +#define PM_CAM1_LDOLPEN BIT(1) +#define PM_CAM1_CTRLEN BIT(0) + +#define PM_CCP2TX 0x4c +#define PM_CCP2TX_LDOEN BIT(1) +#define PM_CCP2TX_CTRLEN BIT(0) + +#define PM_DSI0 0x50 +#define PM_DSI0_LDOHPEN BIT(2) +#define PM_DSI0_LDOLPEN BIT(1) +#define PM_DSI0_CTRLEN BIT(0) + +#define PM_DSI1 0x54 +#define PM_DSI1_LDOHPEN BIT(2) +#define PM_DSI1_LDOLPEN BIT(1) +#define PM_DSI1_CTRLEN BIT(0) + +#define PM_HDMI 0x58 +#define PM_HDMI_RSTDR BIT(19) +#define PM_HDMI_LDOPD BIT(1) +#define PM_HDMI_CTRLEN BIT(0) + +#define PM_USB 0x5c +/* The power gates must be enabled with this bit before enabling the LDO in the + * USB block. + */ +#define PM_USB_CTRLEN BIT(0) + +#define PM_PXLDO 0x60 +#define PM_PXBG 0x64 +#define PM_DFT 0x68 +#define PM_SMPS 0x6c +#define PM_XOSC 0x70 +#define PM_SPAREW 0x74 +#define PM_SPARER 0x78 +#define PM_AVS_RSTDR 0x7c +#define PM_AVS_STAT 0x80 +#define PM_AVS_EVENT 0x84 +#define PM_AVS_INTEN 0x88 +#define PM_DUMMY 0xfc + +#define PM_IMAGE 0x108 +#define PM_GRAFX 0x10c +#define PM_PROC 0x110 +#define PM_ENAB BIT(12) +#define PM_ISPRSTN BIT(8) +#define PM_H264RSTN BIT(7) +#define PM_PERIRSTN BIT(6) +#define PM_V3DRSTN BIT(6) +#define PM_ISFUNC BIT(5) +#define PM_MRDONE BIT(4) +#define PM_MEMREP BIT(3) +#define PM_ISPOW BIT(2) +#define PM_POWOK BIT(1) +#define PM_POWUP BIT(0) +#define PM_INRUSH_SHIFT 13 +#define PM_INRUSH_3_5_MA 0 +#define PM_INRUSH_5_MA 1 +#define PM_INRUSH_10_MA 2 +#define PM_INRUSH_20_MA 3 +#define PM_INRUSH_MASK (3 << PM_INRUSH_SHIFT) + +#define PM_PASSWORD 0x5a000000 + +#define PM_WDOG_TIME_SET 0x000fffff +#define PM_RSTC_WRCFG_CLR 0xffffffcf +#define PM_RSTS_HADWRH_SET 0x00000040 +#define PM_RSTC_WRCFG_SET 0x00000030 +#define PM_RSTC_WRCFG_FULL_RESET 0x00000020 +#define PM_RSTC_RESET 0x00000102 + +#define PM_READ(reg) readl(power->base + (reg)) +#define PM_WRITE(reg, val) writel(PM_PASSWORD | (val), power->base + (reg)) + +#define ASB_BRDG_VERSION 0x00 +#define ASB_CPR_CTRL 0x04 + +#define ASB_V3D_S_CTRL 0x08 +#define ASB_V3D_M_CTRL 0x0c +#define ASB_ISP_S_CTRL 0x10 +#define ASB_ISP_M_CTRL 0x14 +#define ASB_H264_S_CTRL 0x18 +#define ASB_H264_M_CTRL 0x1c + +#define ASB_REQ_STOP BIT(0) +#define ASB_ACK BIT(1) +#define ASB_EMPTY BIT(2) +#define ASB_FULL BIT(3) + +#define ASB_AXI_BRDG_ID 0x20 + +#define ASB_READ(reg) readl(power->asb + (reg)) +#define ASB_WRITE(reg, val) writel(PM_PASSWORD | (val), power->asb + (reg)) + +struct bcm2835_power_domain { + struct generic_pm_domain base; + struct bcm2835_power *power; + u32 domain; + struct clk *clk; +}; + +struct bcm2835_power { + struct device *dev; + /* PM registers. */ + void __iomem *base; + /* AXI Async bridge registers. */ + void __iomem *asb; + + struct genpd_onecell_data pd_xlate; + struct bcm2835_power_domain domains[BCM2835_POWER_DOMAIN_COUNT]; + struct reset_controller_dev reset; +}; + +static int bcm2835_asb_enable(struct bcm2835_power *power, u32 reg) +{ + u64 start = ktime_get_ns(); + + /* Enable the module's async AXI bridges. */ + ASB_WRITE(reg, ASB_READ(reg) & ~ASB_REQ_STOP); + while (ASB_READ(reg) & ASB_ACK) { + cpu_relax(); + if (ktime_get_ns() - start >= 1000) + return -ETIMEDOUT; + } + + return 0; +} + +static int bcm2835_asb_disable(struct bcm2835_power *power, u32 reg) +{ + u64 start = ktime_get_ns(); + + /* Enable the module's async AXI bridges. */ + ASB_WRITE(reg, ASB_READ(reg) | ASB_REQ_STOP); + while (!(ASB_READ(reg) & ASB_ACK)) { + cpu_relax(); + if (ktime_get_ns() - start >= 1000) + return -ETIMEDOUT; + } + + return 0; +} + +static int bcm2835_power_power_off(struct bcm2835_power_domain *pd, u32 pm_reg) +{ + struct bcm2835_power *power = pd->power; + + /* Enable functional isolation */ + PM_WRITE(pm_reg, PM_READ(pm_reg) & ~PM_ISFUNC); + + /* Enable electrical isolation */ + PM_WRITE(pm_reg, PM_READ(pm_reg) & ~PM_ISPOW); + + /* Open the power switches. */ + PM_WRITE(pm_reg, PM_READ(pm_reg) & ~PM_POWUP); + + return 0; +} + +static int bcm2835_power_power_on(struct bcm2835_power_domain *pd, u32 pm_reg) +{ + struct bcm2835_power *power = pd->power; + struct device *dev = power->dev; + u64 start; + int ret; + int inrush; + bool powok; + + /* If it was already powered on by the fw, leave it that way. */ + if (PM_READ(pm_reg) & PM_POWUP) + return 0; + + /* Enable power. Allowing too much current at once may result + * in POWOK never getting set, so start low and ramp it up as + * necessary to succeed. + */ + powok = false; + for (inrush = PM_INRUSH_3_5_MA; inrush <= PM_INRUSH_20_MA; inrush++) { + PM_WRITE(pm_reg, + (PM_READ(pm_reg) & ~PM_INRUSH_MASK) | + (inrush << PM_INRUSH_SHIFT) | + PM_POWUP); + + start = ktime_get_ns(); + while (!(powok = !!(PM_READ(pm_reg) & PM_POWOK))) { + cpu_relax(); + if (ktime_get_ns() - start >= 3000) + break; + } + } + if (!powok) { + dev_err(dev, "Timeout waiting for %s power OK\n", + pd->base.name); + ret = -ETIMEDOUT; + goto err_disable_powup; + } + + /* Disable electrical isolation */ + PM_WRITE(pm_reg, PM_READ(pm_reg) | PM_ISPOW); + + /* Repair memory */ + PM_WRITE(pm_reg, PM_READ(pm_reg) | PM_MEMREP); + start = ktime_get_ns(); + while (!(PM_READ(pm_reg) & PM_MRDONE)) { + cpu_relax(); + if (ktime_get_ns() - start >= 1000) { + dev_err(dev, "Timeout waiting for %s memory repair\n", + pd->base.name); + ret = -ETIMEDOUT; + goto err_disable_ispow; + } + } + + /* Disable functional isolation */ + PM_WRITE(pm_reg, PM_READ(pm_reg) | PM_ISFUNC); + + return 0; + +err_disable_ispow: + PM_WRITE(pm_reg, PM_READ(pm_reg) & ~PM_ISPOW); +err_disable_powup: + PM_WRITE(pm_reg, PM_READ(pm_reg) & ~(PM_POWUP | PM_INRUSH_MASK)); + return ret; +} + +static int bcm2835_asb_power_on(struct bcm2835_power_domain *pd, + u32 pm_reg, + u32 asb_m_reg, + u32 asb_s_reg, + u32 reset_flags) +{ + struct bcm2835_power *power = pd->power; + int ret; + + ret = clk_prepare_enable(pd->clk); + if (ret) { + dev_err(power->dev, "Failed to enable clock for %s\n", + pd->base.name); + return ret; + } + + /* Wait 32 clocks for reset to propagate, 1 us will be enough */ + udelay(1); + + clk_disable_unprepare(pd->clk); + + /* Deassert the resets. */ + PM_WRITE(pm_reg, PM_READ(pm_reg) | reset_flags); + + ret = clk_prepare_enable(pd->clk); + if (ret) { + dev_err(power->dev, "Failed to enable clock for %s\n", + pd->base.name); + goto err_enable_resets; + } + + ret = bcm2835_asb_enable(power, asb_m_reg); + if (ret) { + dev_err(power->dev, "Failed to enable ASB master for %s\n", + pd->base.name); + goto err_disable_clk; + } + ret = bcm2835_asb_enable(power, asb_s_reg); + if (ret) { + dev_err(power->dev, "Failed to enable ASB slave for %s\n", + pd->base.name); + goto err_disable_asb_master; + } + + return 0; + +err_disable_asb_master: + bcm2835_asb_disable(power, asb_m_reg); +err_disable_clk: + clk_disable_unprepare(pd->clk); +err_enable_resets: + PM_WRITE(pm_reg, PM_READ(pm_reg) & ~reset_flags); + return ret; +} + +static int bcm2835_asb_power_off(struct bcm2835_power_domain *pd, + u32 pm_reg, + u32 asb_m_reg, + u32 asb_s_reg, + u32 reset_flags) +{ + struct bcm2835_power *power = pd->power; + int ret; + + ret = bcm2835_asb_disable(power, asb_s_reg); + if (ret) { + dev_warn(power->dev, "Failed to disable ASB slave for %s\n", + pd->base.name); + return ret; + } + ret = bcm2835_asb_disable(power, asb_m_reg); + if (ret) { + dev_warn(power->dev, "Failed to disable ASB master for %s\n", + pd->base.name); + bcm2835_asb_enable(power, asb_s_reg); + return ret; + } + + clk_disable_unprepare(pd->clk); + + /* Assert the resets. */ + PM_WRITE(pm_reg, PM_READ(pm_reg) & ~reset_flags); + + return 0; +} + +static int bcm2835_power_pd_power_on(struct generic_pm_domain *domain) +{ + struct bcm2835_power_domain *pd = + container_of(domain, struct bcm2835_power_domain, base); + struct bcm2835_power *power = pd->power; + + switch (pd->domain) { + case BCM2835_POWER_DOMAIN_GRAFX: + return bcm2835_power_power_on(pd, PM_GRAFX); + + case BCM2835_POWER_DOMAIN_GRAFX_V3D: + return bcm2835_asb_power_on(pd, PM_GRAFX, + ASB_V3D_M_CTRL, ASB_V3D_S_CTRL, + PM_V3DRSTN); + + case BCM2835_POWER_DOMAIN_IMAGE: + return bcm2835_power_power_on(pd, PM_IMAGE); + + case BCM2835_POWER_DOMAIN_IMAGE_PERI: + return bcm2835_asb_power_on(pd, PM_IMAGE, + 0, 0, + PM_PERIRSTN); + + case BCM2835_POWER_DOMAIN_IMAGE_ISP: + return bcm2835_asb_power_on(pd, PM_IMAGE, + ASB_ISP_M_CTRL, ASB_ISP_S_CTRL, + PM_ISPRSTN); + + case BCM2835_POWER_DOMAIN_IMAGE_H264: + return bcm2835_asb_power_on(pd, PM_IMAGE, + ASB_H264_M_CTRL, ASB_H264_S_CTRL, + PM_H264RSTN); + + case BCM2835_POWER_DOMAIN_USB: + PM_WRITE(PM_USB, PM_USB_CTRLEN); + return 0; + + case BCM2835_POWER_DOMAIN_DSI0: + PM_WRITE(PM_DSI0, PM_DSI0_CTRLEN); + PM_WRITE(PM_DSI0, PM_DSI0_CTRLEN | PM_DSI0_LDOHPEN); + return 0; + + case BCM2835_POWER_DOMAIN_DSI1: + PM_WRITE(PM_DSI1, PM_DSI1_CTRLEN); + PM_WRITE(PM_DSI1, PM_DSI1_CTRLEN | PM_DSI1_LDOHPEN); + return 0; + + case BCM2835_POWER_DOMAIN_CCP2TX: + PM_WRITE(PM_CCP2TX, PM_CCP2TX_CTRLEN); + PM_WRITE(PM_CCP2TX, PM_CCP2TX_CTRLEN | PM_CCP2TX_LDOEN); + return 0; + + case BCM2835_POWER_DOMAIN_HDMI: + PM_WRITE(PM_HDMI, PM_READ(PM_HDMI) | PM_HDMI_RSTDR); + PM_WRITE(PM_HDMI, PM_READ(PM_HDMI) | PM_HDMI_CTRLEN); + PM_WRITE(PM_HDMI, PM_READ(PM_HDMI) & ~PM_HDMI_LDOPD); + usleep_range(100, 200); + PM_WRITE(PM_HDMI, PM_READ(PM_HDMI) & ~PM_HDMI_RSTDR); + return 0; + + default: + dev_err(power->dev, "Invalid domain %d\n", pd->domain); + return -EINVAL; + } +} + +static int bcm2835_power_pd_power_off(struct generic_pm_domain *domain) +{ + struct bcm2835_power_domain *pd = + container_of(domain, struct bcm2835_power_domain, base); + struct bcm2835_power *power = pd->power; + + switch (pd->domain) { + case BCM2835_POWER_DOMAIN_GRAFX: + return bcm2835_power_power_off(pd, PM_GRAFX); + + case BCM2835_POWER_DOMAIN_GRAFX_V3D: + return bcm2835_asb_power_off(pd, PM_GRAFX, + ASB_V3D_M_CTRL, ASB_V3D_S_CTRL, + PM_V3DRSTN); + + case BCM2835_POWER_DOMAIN_IMAGE: + return bcm2835_power_power_off(pd, PM_IMAGE); + + case BCM2835_POWER_DOMAIN_IMAGE_PERI: + return bcm2835_asb_power_off(pd, PM_IMAGE, + 0, 0, + PM_PERIRSTN); + + case BCM2835_POWER_DOMAIN_IMAGE_ISP: + return bcm2835_asb_power_off(pd, PM_IMAGE, + ASB_ISP_M_CTRL, ASB_ISP_S_CTRL, + PM_ISPRSTN); + + case BCM2835_POWER_DOMAIN_IMAGE_H264: + return bcm2835_asb_power_off(pd, PM_IMAGE, + ASB_H264_M_CTRL, ASB_H264_S_CTRL, + PM_H264RSTN); + + case BCM2835_POWER_DOMAIN_USB: + PM_WRITE(PM_USB, 0); + return 0; + + case BCM2835_POWER_DOMAIN_DSI0: + PM_WRITE(PM_DSI0, PM_DSI0_CTRLEN); + PM_WRITE(PM_DSI0, 0); + return 0; + + case BCM2835_POWER_DOMAIN_DSI1: + PM_WRITE(PM_DSI1, PM_DSI1_CTRLEN); + PM_WRITE(PM_DSI1, 0); + return 0; + + case BCM2835_POWER_DOMAIN_CCP2TX: + PM_WRITE(PM_CCP2TX, PM_CCP2TX_CTRLEN); + PM_WRITE(PM_CCP2TX, 0); + return 0; + + case BCM2835_POWER_DOMAIN_HDMI: + PM_WRITE(PM_HDMI, PM_READ(PM_HDMI) | PM_HDMI_LDOPD); + PM_WRITE(PM_HDMI, PM_READ(PM_HDMI) & ~PM_HDMI_CTRLEN); + return 0; + + default: + dev_err(power->dev, "Invalid domain %d\n", pd->domain); + return -EINVAL; + } +} + +static void +bcm2835_init_power_domain(struct bcm2835_power *power, + int pd_xlate_index, const char *name) +{ + struct device *dev = power->dev; + struct bcm2835_power_domain *dom = &power->domains[pd_xlate_index]; + + dom->clk = devm_clk_get(dev->parent, name); + + dom->base.name = name; + dom->base.power_on = bcm2835_power_pd_power_on; + dom->base.power_off = bcm2835_power_pd_power_off; + + dom->domain = pd_xlate_index; + dom->power = power; + + /* XXX: on/off at boot? */ + pm_genpd_init(&dom->base, NULL, true); + + power->pd_xlate.domains[pd_xlate_index] = &dom->base; +} + +/** bcm2835_reset_reset - Resets a block that has a reset line in the + * PM block. + * + * The consumer of the reset controller must have the power domain up + * -- there's no reset ability with the power domain down. To reset + * the sub-block, we just disable its access to memory through the + * ASB, reset, and re-enable. + */ +static int bcm2835_reset_reset(struct reset_controller_dev *rcdev, + unsigned long id) +{ + struct bcm2835_power *power = container_of(rcdev, struct bcm2835_power, + reset); + struct bcm2835_power_domain *pd; + int ret; + + switch (id) { + case BCM2835_RESET_V3D: + pd = &power->domains[BCM2835_POWER_DOMAIN_GRAFX_V3D]; + break; + case BCM2835_RESET_H264: + pd = &power->domains[BCM2835_POWER_DOMAIN_IMAGE_H264]; + break; + case BCM2835_RESET_ISP: + pd = &power->domains[BCM2835_POWER_DOMAIN_IMAGE_ISP]; + break; + default: + dev_err(power->dev, "Bad reset id %ld\n", id); + return -EINVAL; + } + + ret = bcm2835_power_pd_power_off(&pd->base); + if (ret) + return ret; + + return bcm2835_power_pd_power_on(&pd->base); +} + +static int bcm2835_reset_status(struct reset_controller_dev *rcdev, + unsigned long id) +{ + struct bcm2835_power *power = container_of(rcdev, struct bcm2835_power, + reset); + + switch (id) { + case BCM2835_RESET_V3D: + return !PM_READ(PM_GRAFX & PM_V3DRSTN); + case BCM2835_RESET_H264: + return !PM_READ(PM_IMAGE & PM_H264RSTN); + case BCM2835_RESET_ISP: + return !PM_READ(PM_IMAGE & PM_ISPRSTN); + default: + return -EINVAL; + } +} + +const struct reset_control_ops bcm2835_reset_ops = { + .reset = bcm2835_reset_reset, + .status = bcm2835_reset_status, +}; + +static const char *const power_domain_names[] = { + [BCM2835_POWER_DOMAIN_GRAFX] = "grafx", + [BCM2835_POWER_DOMAIN_GRAFX_V3D] = "v3d", + + [BCM2835_POWER_DOMAIN_IMAGE] = "image", + [BCM2835_POWER_DOMAIN_IMAGE_PERI] = "peri_image", + [BCM2835_POWER_DOMAIN_IMAGE_H264] = "h264", + [BCM2835_POWER_DOMAIN_IMAGE_ISP] = "isp", + + [BCM2835_POWER_DOMAIN_USB] = "usb", + [BCM2835_POWER_DOMAIN_DSI0] = "dsi0", + [BCM2835_POWER_DOMAIN_DSI1] = "dsi1", + [BCM2835_POWER_DOMAIN_CAM0] = "cam0", + [BCM2835_POWER_DOMAIN_CAM1] = "cam1", + [BCM2835_POWER_DOMAIN_CCP2TX] = "ccp2tx", + [BCM2835_POWER_DOMAIN_HDMI] = "hdmi", +}; + +static int bcm2835_power_probe(struct platform_device *pdev) +{ + struct bcm2835_pm *pm = dev_get_drvdata(pdev->dev.parent); + struct device *dev = &pdev->dev; + struct bcm2835_power *power; + static const struct { + int parent, child; + } domain_deps[] = { + { BCM2835_POWER_DOMAIN_GRAFX, BCM2835_POWER_DOMAIN_GRAFX_V3D }, + { BCM2835_POWER_DOMAIN_IMAGE, BCM2835_POWER_DOMAIN_IMAGE_PERI }, + { BCM2835_POWER_DOMAIN_IMAGE, BCM2835_POWER_DOMAIN_IMAGE_H264 }, + { BCM2835_POWER_DOMAIN_IMAGE, BCM2835_POWER_DOMAIN_IMAGE_ISP }, + { BCM2835_POWER_DOMAIN_IMAGE_PERI, BCM2835_POWER_DOMAIN_USB }, + { BCM2835_POWER_DOMAIN_IMAGE_PERI, BCM2835_POWER_DOMAIN_CAM0 }, + { BCM2835_POWER_DOMAIN_IMAGE_PERI, BCM2835_POWER_DOMAIN_CAM1 }, + }; + int ret, i; + u32 id; + + power = devm_kzalloc(dev, sizeof(*power), GFP_KERNEL); + if (!power) + return -ENOMEM; + platform_set_drvdata(pdev, power); + + power->dev = dev; + power->base = pm->base; + power->asb = pm->asb; + + id = ASB_READ(ASB_AXI_BRDG_ID); + if (id != 0x62726467 /* "BRDG" */) { + dev_err(dev, "ASB register ID returned 0x%08x\n", id); + return -ENODEV; + } + + power->pd_xlate.domains = devm_kcalloc(dev, + ARRAY_SIZE(power_domain_names), + sizeof(*power->pd_xlate.domains), + GFP_KERNEL); + if (!power->pd_xlate.domains) + return -ENOMEM; + + power->pd_xlate.num_domains = ARRAY_SIZE(power_domain_names); + + for (i = 0; i < ARRAY_SIZE(power_domain_names); i++) + bcm2835_init_power_domain(power, i, power_domain_names[i]); + + for (i = 0; i < ARRAY_SIZE(domain_deps); i++) { + pm_genpd_add_subdomain(&power->domains[domain_deps[i].parent].base, + &power->domains[domain_deps[i].child].base); + } + + power->reset.owner = THIS_MODULE; + power->reset.nr_resets = BCM2835_RESET_COUNT; + power->reset.ops = &bcm2835_reset_ops; + power->reset.of_node = dev->parent->of_node; + + ret = devm_reset_controller_register(dev, &power->reset); + if (ret) + return ret; + + of_genpd_add_provider_onecell(dev->parent->of_node, &power->pd_xlate); + + dev_info(dev, "Broadcom BCM2835 power domains driver"); + return 0; +} + +static int bcm2835_power_remove(struct platform_device *pdev) +{ + return 0; +} + +static struct platform_driver bcm2835_power_driver = { + .probe = bcm2835_power_probe, + .remove = bcm2835_power_remove, + .driver = { + .name = "bcm2835-power", + }, +}; +module_platform_driver(bcm2835_power_driver); + +MODULE_AUTHOR("Eric Anholt "); +MODULE_DESCRIPTION("Driver for Broadcom BCM2835 PM power domains and reset"); +MODULE_LICENSE("GPL"); diff --git a/include/dt-bindings/soc/bcm2835-pm.h b/include/dt-bindings/soc/bcm2835-pm.h new file mode 100644 index 000000000000..153d75b8d99f --- /dev/null +++ b/include/dt-bindings/soc/bcm2835-pm.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: (GPL-2.0+ OR MIT) */ + +#ifndef _DT_BINDINGS_ARM_BCM2835_PM_H +#define _DT_BINDINGS_ARM_BCM2835_PM_H + +#define BCM2835_POWER_DOMAIN_GRAFX 0 +#define BCM2835_POWER_DOMAIN_GRAFX_V3D 1 +#define BCM2835_POWER_DOMAIN_IMAGE 2 +#define BCM2835_POWER_DOMAIN_IMAGE_PERI 3 +#define BCM2835_POWER_DOMAIN_IMAGE_ISP 4 +#define BCM2835_POWER_DOMAIN_IMAGE_H264 5 +#define BCM2835_POWER_DOMAIN_USB 6 +#define BCM2835_POWER_DOMAIN_DSI0 7 +#define BCM2835_POWER_DOMAIN_DSI1 8 +#define BCM2835_POWER_DOMAIN_CAM0 9 +#define BCM2835_POWER_DOMAIN_CAM1 10 +#define BCM2835_POWER_DOMAIN_CCP2TX 11 +#define BCM2835_POWER_DOMAIN_HDMI 12 + +#define BCM2835_POWER_DOMAIN_COUNT 13 + +#define BCM2835_RESET_V3D 0 +#define BCM2835_RESET_ISP 1 +#define BCM2835_RESET_H264 2 + +#define BCM2835_RESET_COUNT 3 + +#endif /* _DT_BINDINGS_ARM_BCM2835_PM_H */ diff --git a/include/linux/mfd/bcm2835-pm.h b/include/linux/mfd/bcm2835-pm.h index b7d0ee1feffa..ed37dc40e82a 100644 --- a/include/linux/mfd/bcm2835-pm.h +++ b/include/linux/mfd/bcm2835-pm.h @@ -8,6 +8,7 @@ struct bcm2835_pm { struct device *dev; void __iomem *base; + void __iomem *asb; }; #endif /* BCM2835_MFD_PM_H */ -- cgit v1.2.3 From 03c87b95ac04c2a34045641b25dded6e3e889556 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 9 Jan 2019 18:44:00 +0100 Subject: regulator: provide rdev_get_regmap() Provide a helper allowing to access regulator's regmap. Signed-off-by: Bartosz Golaszewski Signed-off-by: Mark Brown --- drivers/regulator/core.c | 6 ++++++ include/linux/regulator/driver.h | 1 + 2 files changed, 7 insertions(+) (limited to 'include/linux') diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 3f9d81b6e763..430a73dea487 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -5251,6 +5251,12 @@ struct device *rdev_get_dev(struct regulator_dev *rdev) } EXPORT_SYMBOL_GPL(rdev_get_dev); +struct regmap *rdev_get_regmap(struct regulator_dev *rdev) +{ + return rdev->regmap; +} +EXPORT_SYMBOL_GPL(rdev_get_regmap); + void *regulator_get_init_drvdata(struct regulator_init_data *reg_init_data) { return reg_init_data->driver_data; diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index 389bcaf7900f..795b38a06b6c 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -503,6 +503,7 @@ int regulator_notifier_call_chain(struct regulator_dev *rdev, void *rdev_get_drvdata(struct regulator_dev *rdev); struct device *rdev_get_dev(struct regulator_dev *rdev); +struct regmap *rdev_get_regmap(struct regulator_dev *rdev); int rdev_get_id(struct regulator_dev *rdev); int regulator_mode_to_status(unsigned int); -- cgit v1.2.3 From 412e60373245fd1dfae8d4d44c5d1406b3d90971 Mon Sep 17 00:00:00 2001 From: Martin Sperl Date: Tue, 8 Jan 2019 12:13:45 +0000 Subject: spi: core: avoid waking pump thread from spi_sync instead run teardown delayed When spi_sync is running alone with no other spi devices connected to the bus the worker thread is woken during spi_finalize_current_message to run the teardown code every time. This is totally unnecessary in the case that there is no message queued. On a multi-core system this results in one wakeup of the thread for each spi_message processed via spi_sync where in most cases the teardown does not happen as the hw is already in use. This patch now delays the teardown by 1 second by using a separate kthread_delayed_work for the teardown. This avoids waking the kthread too often. For spi_sync transfers in a tight loop (say 40k messages/s) this avoids the penalty of waking the worker thread 40k times/s. On a rasperry pi 3 with 4 cores the results in 32% of a single core only to find out that there is nothing in the queue and it can go back to sleep. With this patch applied the spi-worker is woken exactly once: after the load finishes and the spi bus is idle for 1 second. I believe I have also seen situations where during a spi_sync loop the worker thread (triggered by the last message finished) is slightly faster and _wins_ the race to process the message, so we are actually running the kthread and letting it do some work... This is also no longer observed with this patch applied as. Tested with a new CAN controller driver for the mcp2517fd which uses spi_sync for interrupt handling and spi_async for scheduling of can frames for transmission (in a different thread) Some statistics when receiving 100000 CAN frames with the mcp25xxfd driver on a Raspberry pi 3: without the patch: ------------------ root@raspcm3:~# for x in $(pgrep spi0) $(pgrep irq/94-mcp25xxf) ; do awk '{printf "%-20s %6i\n", $2,$15}' /proc/$x/stat; done (spi0) 5 (irq/94-mcp25xxf) 0 root@raspcm3:~# vmstat 1 procs -----------memory---------- ---swap-- -----io---- -system-- ------cpu----- r b swpd free buff cache si so bi bo in cs us sy id wa st 1 0 0 821960 13592 50848 0 0 80 2 1986 105 1 2 97 0 0 0 0 0 821968 13592 50876 0 0 0 0 8046 30 0 0 100 0 0 0 0 0 821936 13592 50876 0 0 0 0 8032 24 0 0 100 0 0 0 0 0 821936 13592 50876 0 0 0 0 8035 30 0 0 100 0 0 0 0 0 821936 13592 50876 0 0 0 0 8033 22 0 0 100 0 0 2 0 0 821936 13592 50876 0 0 0 0 11598 7129 0 3 97 0 0 1 0 0 821872 13592 50876 0 0 0 0 37741 59003 0 31 69 0 0 2 0 0 821840 13592 50876 0 0 0 0 37762 59078 0 29 71 0 0 2 0 0 821776 13592 50876 0 0 0 0 37593 58792 0 28 72 0 0 1 0 0 821744 13592 50876 0 0 0 0 37642 58881 0 30 70 0 0 2 0 0 821680 13592 50876 0 0 0 0 37490 58602 0 27 73 0 0 1 0 0 821648 13592 50876 0 0 0 0 37412 58418 0 29 71 0 0 1 0 0 821584 13592 50876 0 0 0 0 37337 58288 0 27 73 0 0 1 0 0 821552 13592 50876 0 0 0 0 37584 58774 0 27 73 0 0 0 0 0 821520 13592 50876 0 0 0 0 18363 20566 0 9 91 0 0 0 0 0 821520 13592 50876 0 0 0 0 8037 32 0 0 100 0 0 0 0 0 821520 13592 50876 0 0 0 0 8031 23 0 0 100 0 0 0 0 0 821520 13592 50876 0 0 0 0 8034 26 0 0 100 0 0 0 0 0 821520 13592 50876 0 0 0 0 8033 24 0 0 100 0 0 ^C root@raspcm3:~# for x in $(pgrep spi0) $(pgrep irq/94-mcp25xxf) ; do awk '{printf "%-20s %6i\n", $2,$15}' /proc/$x/stat; done (spi0) 228 (irq/94-mcp25xxf) 794 root@raspcm3:~# cat /proc/interrupts CPU0 CPU1 CPU2 CPU3 17: 34 0 0 0 ARMCTRL-level 1 Edge 3f00b880.mailbox 27: 1 0 0 0 ARMCTRL-level 35 Edge timer 33: 1416870 0 0 0 ARMCTRL-level 41 Edge 3f980000.usb, dwc2_hsotg:usb1 34: 1 0 0 0 ARMCTRL-level 42 Edge vc4 35: 0 0 0 0 ARMCTRL-level 43 Edge 3f004000.txp 40: 1753 0 0 0 ARMCTRL-level 48 Edge DMA IRQ 42: 11 0 0 0 ARMCTRL-level 50 Edge DMA IRQ 44: 11 0 0 0 ARMCTRL-level 52 Edge DMA IRQ 45: 0 0 0 0 ARMCTRL-level 53 Edge DMA IRQ 66: 0 0 0 0 ARMCTRL-level 74 Edge vc4 crtc 69: 0 0 0 0 ARMCTRL-level 77 Edge vc4 crtc 70: 0 0 0 0 ARMCTRL-level 78 Edge vc4 crtc 77: 20 0 0 0 ARMCTRL-level 85 Edge 3f205000.i2c, 3f804000.i2c, 3f805000.i2c 78: 6346 0 0 0 ARMCTRL-level 86 Edge 3f204000.spi 80: 205 0 0 0 ARMCTRL-level 88 Edge mmc0 81: 493 0 0 0 ARMCTRL-level 89 Edge uart-pl011 89: 0 0 0 0 bcm2836-timer 0 Edge arch_timer 90: 4291 3821 2180 1649 bcm2836-timer 1 Edge arch_timer 94: 14289 0 0 0 pinctrl-bcm2835 16 Level mcp25xxfd IPI0: 0 0 0 0 CPU wakeup interrupts IPI1: 0 0 0 0 Timer broadcast interrupts IPI2: 3645 242371 7919 1328 Rescheduling interrupts IPI3: 112 543 273 194 Function call interrupts IPI4: 0 0 0 0 CPU stop interrupts IPI5: 1 0 0 0 IRQ work interrupts IPI6: 0 0 0 0 completion interrupts Err: 0 top shows 93% for the mcp25xxfd interrupt handler, 31% for spi0. with the patch: --------------- root@raspcm3:~# for x in $(pgrep spi0) $(pgrep irq/94-mcp25xxf) ; do awk '{printf "%-20s %6i\n", $2,$15}' /proc/$x/stat; done (spi0) 0 (irq/94-mcp25xxf) 0 root@raspcm3:~# vmstat 1 procs -----------memory---------- ---swap-- -----io---- -system-- ------cpu----- 0 0 0 804768 13584 62628 0 0 0 0 8038 24 0 0 100 0 0 0 0 0 804768 13584 62628 0 0 0 0 8042 25 0 0 100 0 0 1 0 0 804704 13584 62628 0 0 0 0 9603 2967 0 20 80 0 0 1 0 0 804672 13584 62628 0 0 0 0 9828 3380 0 24 76 0 0 1 0 0 804608 13584 62628 0 0 0 0 9823 3375 0 23 77 0 0 1 0 0 804608 13584 62628 0 0 0 12 9829 3394 0 23 77 0 0 1 0 0 804544 13584 62628 0 0 0 0 9816 3362 0 22 78 0 0 1 0 0 804512 13584 62628 0 0 0 0 9817 3367 0 23 77 0 0 1 0 0 804448 13584 62628 0 0 0 0 9822 3370 0 22 78 0 0 1 0 0 804416 13584 62628 0 0 0 0 9815 3367 0 23 77 0 0 0 0 0 804352 13584 62628 0 0 0 84 9222 2250 0 14 86 0 0 0 0 0 804352 13592 62620 0 0 0 24 8131 209 0 0 93 7 0 0 0 0 804320 13592 62628 0 0 0 0 8041 27 0 0 100 0 0 0 0 0 804352 13592 62628 0 0 0 0 8040 26 0 0 100 0 0 root@raspcm3:~# for x in $(pgrep spi0) $(pgrep irq/94-mcp25xxf) ; do awk '{printf "%-20s %6i\n", $2,$15}' /proc/$x/stat; done (spi0) 0 (irq/94-mcp25xxf) 767 root@raspcm3:~# cat /proc/interrupts CPU0 CPU1 CPU2 CPU3 17: 29 0 0 0 ARMCTRL-level 1 Edge 3f00b880.mailbox 27: 1 0 0 0 ARMCTRL-level 35 Edge timer 33: 1024412 0 0 0 ARMCTRL-level 41 Edge 3f980000.usb, dwc2_hsotg:usb1 34: 1 0 0 0 ARMCTRL-level 42 Edge vc4 35: 0 0 0 0 ARMCTRL-level 43 Edge 3f004000.txp 40: 1773 0 0 0 ARMCTRL-level 48 Edge DMA IRQ 42: 11 0 0 0 ARMCTRL-level 50 Edge DMA IRQ 44: 11 0 0 0 ARMCTRL-level 52 Edge DMA IRQ 45: 0 0 0 0 ARMCTRL-level 53 Edge DMA IRQ 66: 0 0 0 0 ARMCTRL-level 74 Edge vc4 crtc 69: 0 0 0 0 ARMCTRL-level 77 Edge vc4 crtc 70: 0 0 0 0 ARMCTRL-level 78 Edge vc4 crtc 77: 20 0 0 0 ARMCTRL-level 85 Edge 3f205000.i2c, 3f804000.i2c, 3f805000.i2c 78: 6417 0 0 0 ARMCTRL-level 86 Edge 3f204000.spi 80: 237 0 0 0 ARMCTRL-level 88 Edge mmc0 81: 489 0 0 0 ARMCTRL-level 89 Edge uart-pl011 89: 0 0 0 0 bcm2836-timer 0 Edge arch_timer 90: 4048 3704 2383 1892 bcm2836-timer 1 Edge arch_timer 94: 14287 0 0 0 pinctrl-bcm2835 16 Level mcp25xxfd IPI0: 0 0 0 0 CPU wakeup interrupts IPI1: 0 0 0 0 Timer broadcast interrupts IPI2: 2361 2948 7890 1616 Rescheduling interrupts IPI3: 65 617 301 166 Function call interrupts IPI4: 0 0 0 0 CPU stop interrupts IPI5: 1 0 0 0 IRQ work interrupts IPI6: 0 0 0 0 completion interrupts Err: 0 top shows 91% for the mcp25xxfd interrupt handler, 0% for spi0 So we see that spi0 is no longer getting scheduled wasting CPU cycles There are a lot less context switches and corresponding Rescheduling interrupts All of these show that this improves efficiency of the system and reduces CPU utilization. Signed-off-by: Martin Sperl Signed-off-by: Mark Brown --- drivers/spi/spi.c | 122 +++++++++++++++++++++++++++++++++++------------- include/linux/spi/spi.h | 2 + 2 files changed, 91 insertions(+), 33 deletions(-) (limited to 'include/linux') diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 13f447a67d67..06b9139664a3 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -1225,7 +1225,7 @@ static void __spi_pump_messages(struct spi_controller *ctlr, bool in_kthread) return; } - /* If another context is idling the device then defer */ + /* If another context is idling the device then defer to kthread */ if (ctlr->idling) { kthread_queue_work(&ctlr->kworker, &ctlr->pump_messages); spin_unlock_irqrestore(&ctlr->queue_lock, flags); @@ -1239,34 +1239,10 @@ static void __spi_pump_messages(struct spi_controller *ctlr, bool in_kthread) return; } - /* Only do teardown in the thread */ - if (!in_kthread) { - kthread_queue_work(&ctlr->kworker, - &ctlr->pump_messages); - spin_unlock_irqrestore(&ctlr->queue_lock, flags); - return; - } - - ctlr->busy = false; - ctlr->idling = true; - spin_unlock_irqrestore(&ctlr->queue_lock, flags); - - kfree(ctlr->dummy_rx); - ctlr->dummy_rx = NULL; - kfree(ctlr->dummy_tx); - ctlr->dummy_tx = NULL; - if (ctlr->unprepare_transfer_hardware && - ctlr->unprepare_transfer_hardware(ctlr)) - dev_err(&ctlr->dev, - "failed to unprepare transfer hardware\n"); - if (ctlr->auto_runtime_pm) { - pm_runtime_mark_last_busy(ctlr->dev.parent); - pm_runtime_put_autosuspend(ctlr->dev.parent); - } - trace_spi_controller_idle(ctlr); - - spin_lock_irqsave(&ctlr->queue_lock, flags); - ctlr->idling = false; + /* schedule idle teardown with a delay of 1 second */ + kthread_mod_delayed_work(&ctlr->kworker, + &ctlr->pump_idle_teardown, + HZ); spin_unlock_irqrestore(&ctlr->queue_lock, flags); return; } @@ -1359,6 +1335,77 @@ static void spi_pump_messages(struct kthread_work *work) __spi_pump_messages(ctlr, true); } +/** + * spi_pump_idle_teardown - kthread delayed work function which tears down + * the controller settings after some delay + * @work: pointer to kthread work struct contained in the controller struct + */ +static void spi_pump_idle_teardown(struct kthread_work *work) +{ + struct spi_controller *ctlr = + container_of(work, struct spi_controller, + pump_idle_teardown.work); + unsigned long flags; + + /* Lock queue */ + spin_lock_irqsave(&ctlr->queue_lock, flags); + + /* Make sure we are not already running a message */ + if (ctlr->cur_msg) + goto out; + + /* if there is anything in the list then exit */ + if (!list_empty(&ctlr->queue)) + goto out; + + /* if the controller is running then exit */ + if (ctlr->running) + goto out; + + /* if the controller is busy then exit */ + if (ctlr->busy) + goto out; + + /* if the controller is idling then exit + * this is actually a bit strange and would indicate that + * this function is scheduled twice, which should not happen + */ + if (ctlr->idling) + goto out; + + /* set up the initial states */ + ctlr->busy = false; + ctlr->idling = true; + spin_unlock_irqrestore(&ctlr->queue_lock, flags); + + /* free dummy receive buffers */ + kfree(ctlr->dummy_rx); + ctlr->dummy_rx = NULL; + kfree(ctlr->dummy_tx); + ctlr->dummy_tx = NULL; + + /* unprepare hardware */ + if (ctlr->unprepare_transfer_hardware && + ctlr->unprepare_transfer_hardware(ctlr)) + dev_err(&ctlr->dev, + "failed to unprepare transfer hardware\n"); + /* handle pm */ + if (ctlr->auto_runtime_pm) { + pm_runtime_mark_last_busy(ctlr->dev.parent); + pm_runtime_put_autosuspend(ctlr->dev.parent); + } + + /* mark controller as idle */ + trace_spi_controller_idle(ctlr); + + /* finally put us from idling into stopped */ + spin_lock_irqsave(&ctlr->queue_lock, flags); + ctlr->idling = false; + +out: + spin_unlock_irqrestore(&ctlr->queue_lock, flags); +} + static int spi_init_queue(struct spi_controller *ctlr) { struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 }; @@ -1374,7 +1421,8 @@ static int spi_init_queue(struct spi_controller *ctlr) return PTR_ERR(ctlr->kworker_task); } kthread_init_work(&ctlr->pump_messages, spi_pump_messages); - + kthread_init_delayed_work(&ctlr->pump_idle_teardown, + spi_pump_idle_teardown); /* * Controller config will indicate if this controller should run the * message pump with high (realtime) priority to reduce the transfer @@ -1446,7 +1494,16 @@ void spi_finalize_current_message(struct spi_controller *ctlr) spin_lock_irqsave(&ctlr->queue_lock, flags); ctlr->cur_msg = NULL; ctlr->cur_msg_prepared = false; - kthread_queue_work(&ctlr->kworker, &ctlr->pump_messages); + + /* if there is something queued, then wake the queue */ + if (!list_empty(&ctlr->queue)) + kthread_queue_work(&ctlr->kworker, &ctlr->pump_messages); + else + /* otherwise schedule delayed teardown */ + kthread_mod_delayed_work(&ctlr->kworker, + &ctlr->pump_idle_teardown, + HZ); + spin_unlock_irqrestore(&ctlr->queue_lock, flags); trace_spi_message_done(mesg); @@ -1551,7 +1608,7 @@ static int __spi_queued_transfer(struct spi_device *spi, msg->status = -EINPROGRESS; list_add_tail(&msg->queue, &ctlr->queue); - if (!ctlr->busy && need_pump) + if (need_pump) kthread_queue_work(&ctlr->kworker, &ctlr->pump_messages); spin_unlock_irqrestore(&ctlr->queue_lock, flags); @@ -3726,4 +3783,3 @@ err0: * include needing to have boardinfo data structures be much more public. */ postcore_initcall(spi_init); - diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 916bba47d156..79ad62e2487c 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -334,6 +334,7 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv) * @kworker: thread struct for message pump * @kworker_task: pointer to task for message pump kworker thread * @pump_messages: work struct for scheduling work to the message pump + * @pump_idle_teardown: work structure for scheduling a teardown delayed * @queue_lock: spinlock to syncronise access to message queue * @queue: message queue * @idling: the device is entering idle state @@ -532,6 +533,7 @@ struct spi_controller { struct kthread_worker kworker; struct task_struct *kworker_task; struct kthread_work pump_messages; + struct kthread_delayed_work pump_idle_teardown; spinlock_t queue_lock; struct list_head queue; struct spi_message *cur_msg; -- cgit v1.2.3 From 19e99de9a53f9ece6baf8e9a15428aedd4b20c86 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Tue, 8 Jan 2019 10:15:36 +0100 Subject: ARM: davinci: remove dead code related to MAC address reading There are no more users of davinci_get_mac_addr(). Remove it. Signed-off-by: Bartosz Golaszewski Signed-off-by: Sekhar Nori --- arch/arm/mach-davinci/common.c | 15 --------------- include/linux/davinci_emac.h | 1 - 2 files changed, 16 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/mach-davinci/common.c b/arch/arm/mach-davinci/common.c index e1d0f0d841ff..0c638fe15dcb 100644 --- a/arch/arm/mach-davinci/common.c +++ b/arch/arm/mach-davinci/common.c @@ -26,21 +26,6 @@ EXPORT_SYMBOL(davinci_soc_info); void __iomem *davinci_intc_base; int davinci_intc_type; -void davinci_get_mac_addr(struct nvmem_device *nvmem, void *context) -{ - char *mac_addr = davinci_soc_info.emac_pdata->mac_addr; - off_t offset = (off_t)context; - - if (!IS_BUILTIN(CONFIG_NVMEM)) { - pr_warn("Cannot read MAC addr from EEPROM without CONFIG_NVMEM\n"); - return; - } - - /* Read MAC addr from EEPROM */ - if (nvmem_device_read(nvmem, offset, ETH_ALEN, mac_addr) == ETH_ALEN) - pr_info("Read MAC addr from EEPROM: %pM\n", mac_addr); -} - static int __init davinci_init_id(struct davinci_soc_info *soc_info) { int i; diff --git a/include/linux/davinci_emac.h b/include/linux/davinci_emac.h index 05b97144d342..28e6cf1356da 100644 --- a/include/linux/davinci_emac.h +++ b/include/linux/davinci_emac.h @@ -46,5 +46,4 @@ enum { EMAC_VERSION_2, /* DM646x */ }; -void davinci_get_mac_addr(struct nvmem_device *nvmem, void *context); #endif -- cgit v1.2.3 From cc2d22477779f189595db5c515bd5ef9c75a1f35 Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Mon, 7 Jan 2019 20:49:39 +0100 Subject: pwm: Drop per-chip dbg_show callback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This callback was introduced in commit 62099abf67a2 ("pwm: Add debugfs interface") in 2012 and up to now there is not a single user. So drop this unused code. Signed-off-by: Uwe Kleine-König [thierry.reding@gmail.com: remove kerneldoc for ->dbg_show()] Signed-off-by: Thierry Reding --- drivers/pwm/core.c | 5 +---- include/linux/pwm.h | 4 ---- 2 files changed, 1 insertion(+), 8 deletions(-) (limited to 'include/linux') diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c index 253a459fe0d8..3149204567f3 100644 --- a/drivers/pwm/core.c +++ b/drivers/pwm/core.c @@ -1036,10 +1036,7 @@ static int pwm_seq_show(struct seq_file *s, void *v) dev_name(chip->dev), chip->npwm, (chip->npwm != 1) ? "s" : ""); - if (chip->ops->dbg_show) - chip->ops->dbg_show(chip, s); - else - pwm_dbg_show(chip, s); + pwm_dbg_show(chip, s); return 0; } diff --git a/include/linux/pwm.h b/include/linux/pwm.h index d5199b507d79..6a544cb89de4 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -254,7 +254,6 @@ pwm_set_relative_duty_cycle(struct pwm_state *state, unsigned int duty_cycle, * @get_state: get the current PWM state. This function is only * called once per PWM device when the PWM chip is * registered. - * @dbg_show: optional routine to show contents in debugfs * @owner: helps prevent removal of modules exporting active PWMs */ struct pwm_ops { @@ -272,9 +271,6 @@ struct pwm_ops { struct pwm_state *state); void (*get_state)(struct pwm_chip *chip, struct pwm_device *pwm, struct pwm_state *state); -#ifdef CONFIG_DEBUG_FS - void (*dbg_show)(struct pwm_chip *chip, struct seq_file *s); -#endif struct module *owner; }; -- cgit v1.2.3 From 5d0a4c11896e8b83f816f135c24b184d4ba57741 Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Mon, 7 Jan 2019 20:49:41 +0100 Subject: pwm: Rearrange structures to group members by purpose MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In pwm_ops there are a few callbacks that are not supposed to be used by new drivers. Group them at the end of the structure and add a comment. Similarily for struct pwm_chip group the members that drivers shouldn't care about at the end and mark them as internal with another comment. Signed-off-by: Uwe Kleine-König Signed-off-by: Thierry Reding --- include/linux/pwm.h | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pwm.h b/include/linux/pwm.h index 6a544cb89de4..b628abfffacc 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -242,11 +242,7 @@ pwm_set_relative_duty_cycle(struct pwm_state *state, unsigned int duty_cycle, * struct pwm_ops - PWM controller operations * @request: optional hook for requesting a PWM * @free: optional hook for freeing a PWM - * @config: configure duty cycles and period length for this PWM - * @set_polarity: configure the polarity of this PWM * @capture: capture and report PWM signal - * @enable: enable PWM output toggling - * @disable: disable PWM output toggling * @apply: atomically apply a new PWM config. The state argument * should be adjusted with the real hardware config (if the * approximate the period or duty_cycle value, state should @@ -255,48 +251,55 @@ pwm_set_relative_duty_cycle(struct pwm_state *state, unsigned int duty_cycle, * called once per PWM device when the PWM chip is * registered. * @owner: helps prevent removal of modules exporting active PWMs + * @config: configure duty cycles and period length for this PWM + * @set_polarity: configure the polarity of this PWM + * @enable: enable PWM output toggling + * @disable: disable PWM output toggling */ struct pwm_ops { int (*request)(struct pwm_chip *chip, struct pwm_device *pwm); void (*free)(struct pwm_chip *chip, struct pwm_device *pwm); - int (*config)(struct pwm_chip *chip, struct pwm_device *pwm, - int duty_ns, int period_ns); - int (*set_polarity)(struct pwm_chip *chip, struct pwm_device *pwm, - enum pwm_polarity polarity); int (*capture)(struct pwm_chip *chip, struct pwm_device *pwm, struct pwm_capture *result, unsigned long timeout); - int (*enable)(struct pwm_chip *chip, struct pwm_device *pwm); - void (*disable)(struct pwm_chip *chip, struct pwm_device *pwm); int (*apply)(struct pwm_chip *chip, struct pwm_device *pwm, struct pwm_state *state); void (*get_state)(struct pwm_chip *chip, struct pwm_device *pwm, struct pwm_state *state); struct module *owner; + + /* Only used by legacy drivers */ + int (*config)(struct pwm_chip *chip, struct pwm_device *pwm, + int duty_ns, int period_ns); + int (*set_polarity)(struct pwm_chip *chip, struct pwm_device *pwm, + enum pwm_polarity polarity); + int (*enable)(struct pwm_chip *chip, struct pwm_device *pwm); + void (*disable)(struct pwm_chip *chip, struct pwm_device *pwm); }; /** * struct pwm_chip - abstract a PWM controller * @dev: device providing the PWMs - * @list: list node for internal use * @ops: callbacks for this PWM controller * @base: number of first PWM controlled by this chip * @npwm: number of PWMs controlled by this chip - * @pwms: array of PWM devices allocated by the framework * @of_xlate: request a PWM device given a device tree PWM specifier * @of_pwm_n_cells: number of cells expected in the device tree PWM specifier + * @list: list node for internal use + * @pwms: array of PWM devices allocated by the framework */ struct pwm_chip { struct device *dev; - struct list_head list; const struct pwm_ops *ops; int base; unsigned int npwm; - struct pwm_device *pwms; - struct pwm_device * (*of_xlate)(struct pwm_chip *pc, const struct of_phandle_args *args); unsigned int of_pwm_n_cells; + + /* only used internally by the PWM framework */ + struct list_head list; + struct pwm_device *pwms; }; /** -- cgit v1.2.3 From 5a1c18b761ddb299a06746948b9ec2814b04fa92 Mon Sep 17 00:00:00 2001 From: RafaÅ‚ MiÅ‚ecki Date: Wed, 2 Jan 2019 00:00:01 +0100 Subject: bcma: keep a direct pointer to the struct device MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Accessing struct device is pretty useful/common so having a direct pointer: 1) Simplifies some code 2) Makes bcma_bus_get_host_dev() unneeded 3) Allows further improvements like using dev_* printing helpers Signed-off-by: RafaÅ‚ MiÅ‚ecki Signed-off-by: Kalle Valo --- drivers/bcma/bcma_private.h | 1 - drivers/bcma/driver_gpio.c | 2 +- drivers/bcma/host_pci.c | 2 ++ drivers/bcma/host_soc.c | 4 ++-- drivers/bcma/main.c | 45 ++++++++++----------------------------------- include/linux/bcma/bcma.h | 11 +++-------- 6 files changed, 18 insertions(+), 47 deletions(-) (limited to 'include/linux') diff --git a/drivers/bcma/bcma_private.h b/drivers/bcma/bcma_private.h index a4aac370f21f..1f0e66310b23 100644 --- a/drivers/bcma/bcma_private.h +++ b/drivers/bcma/bcma_private.h @@ -33,7 +33,6 @@ int __init bcma_bus_early_register(struct bcma_bus *bus); int bcma_bus_suspend(struct bcma_bus *bus); int bcma_bus_resume(struct bcma_bus *bus); #endif -struct device *bcma_bus_get_host_dev(struct bcma_bus *bus); /* scan.c */ void bcma_detect_chip(struct bcma_bus *bus); diff --git a/drivers/bcma/driver_gpio.c b/drivers/bcma/driver_gpio.c index 2c0ffb77d738..a5df3d111334 100644 --- a/drivers/bcma/driver_gpio.c +++ b/drivers/bcma/driver_gpio.c @@ -183,7 +183,7 @@ int bcma_gpio_init(struct bcma_drv_cc *cc) chip->direction_input = bcma_gpio_direction_input; chip->direction_output = bcma_gpio_direction_output; chip->owner = THIS_MODULE; - chip->parent = bcma_bus_get_host_dev(bus); + chip->parent = bus->dev; #if IS_BUILTIN(CONFIG_OF) chip->of_node = cc->core->dev.of_node; #endif diff --git a/drivers/bcma/host_pci.c b/drivers/bcma/host_pci.c index 63410ecfe640..f52239feb4cb 100644 --- a/drivers/bcma/host_pci.c +++ b/drivers/bcma/host_pci.c @@ -196,6 +196,8 @@ static int bcma_host_pci_probe(struct pci_dev *dev, goto err_pci_release_regions; } + bus->dev = &dev->dev; + /* Map MMIO */ err = -ENOMEM; bus->mmio = pci_iomap(dev, 0, ~0UL); diff --git a/drivers/bcma/host_soc.c b/drivers/bcma/host_soc.c index 2dce34789329..c8073b509a2b 100644 --- a/drivers/bcma/host_soc.c +++ b/drivers/bcma/host_soc.c @@ -179,7 +179,6 @@ int __init bcma_host_soc_register(struct bcma_soc *soc) /* Host specific */ bus->hosttype = BCMA_HOSTTYPE_SOC; bus->ops = &bcma_host_soc_ops; - bus->host_pdev = NULL; /* Initialize struct, detect chip */ bcma_init_bus(bus); @@ -213,6 +212,8 @@ static int bcma_host_soc_probe(struct platform_device *pdev) if (!bus) return -ENOMEM; + bus->dev = dev; + /* Map MMIO */ bus->mmio = of_iomap(np, 0); if (!bus->mmio) @@ -221,7 +222,6 @@ static int bcma_host_soc_probe(struct platform_device *pdev) /* Host specific */ bus->hosttype = BCMA_HOSTTYPE_SOC; bus->ops = &bcma_host_soc_ops; - bus->host_pdev = pdev; /* Initialize struct, detect chip */ bcma_init_bus(bus); diff --git a/drivers/bcma/main.c b/drivers/bcma/main.c index fc1f4acdd189..6535614a7dc1 100644 --- a/drivers/bcma/main.c +++ b/drivers/bcma/main.c @@ -223,8 +223,8 @@ unsigned int bcma_core_irq(struct bcma_device *core, int num) mips_irq = bcma_core_mips_irq(core); return mips_irq <= 4 ? mips_irq + 2 : 0; } - if (bus->host_pdev) - return bcma_of_get_irq(&bus->host_pdev->dev, core, num); + if (bus->dev) + return bcma_of_get_irq(bus->dev, core, num); return 0; case BCMA_HOSTTYPE_SDIO: return 0; @@ -239,18 +239,18 @@ void bcma_prepare_core(struct bcma_bus *bus, struct bcma_device *core) core->dev.release = bcma_release_core_dev; core->dev.bus = &bcma_bus_type; dev_set_name(&core->dev, "bcma%d:%d", bus->num, core->core_index); - core->dev.parent = bcma_bus_get_host_dev(bus); - if (core->dev.parent) - bcma_of_fill_device(core->dev.parent, core); + core->dev.parent = bus->dev; + if (bus->dev) + bcma_of_fill_device(bus->dev, core); switch (bus->hosttype) { case BCMA_HOSTTYPE_PCI: - core->dma_dev = &bus->host_pci->dev; + core->dma_dev = bus->dev; core->irq = bus->host_pci->irq; break; case BCMA_HOSTTYPE_SOC: - if (IS_ENABLED(CONFIG_OF) && bus->host_pdev) { - core->dma_dev = &bus->host_pdev->dev; + if (IS_ENABLED(CONFIG_OF) && bus->dev) { + core->dma_dev = bus->dev; } else { core->dev.dma_mask = &core->dev.coherent_dma_mask; core->dma_dev = &core->dev; @@ -261,28 +261,6 @@ void bcma_prepare_core(struct bcma_bus *bus, struct bcma_device *core) } } -struct device *bcma_bus_get_host_dev(struct bcma_bus *bus) -{ - switch (bus->hosttype) { - case BCMA_HOSTTYPE_PCI: - if (bus->host_pci) - return &bus->host_pci->dev; - else - return NULL; - case BCMA_HOSTTYPE_SOC: - if (bus->host_pdev) - return &bus->host_pdev->dev; - else - return NULL; - case BCMA_HOSTTYPE_SDIO: - if (bus->host_sdio) - return &bus->host_sdio->dev; - else - return NULL; - } - return NULL; -} - void bcma_init_bus(struct bcma_bus *bus) { mutex_lock(&bcma_buses_mutex); @@ -402,7 +380,6 @@ int bcma_bus_register(struct bcma_bus *bus) { int err; struct bcma_device *core; - struct device *dev; /* Scan for devices (cores) */ err = bcma_bus_scan(bus); @@ -425,10 +402,8 @@ int bcma_bus_register(struct bcma_bus *bus) bcma_core_pci_early_init(&bus->drv_pci[0]); } - dev = bcma_bus_get_host_dev(bus); - if (dev) { - of_platform_default_populate(dev->of_node, NULL, dev); - } + if (bus->dev) + of_platform_default_populate(bus->dev->of_node, NULL, bus->dev); /* Cores providing flash access go before SPROM init */ list_for_each_entry(core, &bus->cores, list) { diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h index ef61f3607e99..60b94b944e9f 100644 --- a/include/linux/bcma/bcma.h +++ b/include/linux/bcma/bcma.h @@ -332,6 +332,8 @@ extern int bcma_arch_register_fallback_sprom( struct ssb_sprom *out)); struct bcma_bus { + struct device *dev; + /* The MMIO area. */ void __iomem *mmio; @@ -339,14 +341,7 @@ struct bcma_bus { enum bcma_hosttype hosttype; bool host_is_pcie2; /* Used for BCMA_HOSTTYPE_PCI only */ - union { - /* Pointer to the PCI bus (only for BCMA_HOSTTYPE_PCI) */ - struct pci_dev *host_pci; - /* Pointer to the SDIO device (only for BCMA_HOSTTYPE_SDIO) */ - struct sdio_func *host_sdio; - /* Pointer to platform device (only for BCMA_HOSTTYPE_SOC) */ - struct platform_device *host_pdev; - }; + struct pci_dev *host_pci; /* PCI bus pointer (BCMA_HOSTTYPE_PCI only) */ struct bcma_chipinfo chipinfo; -- cgit v1.2.3 From c1a85a00ea66cb6f0bd0f14e47c28c2b0999799f Mon Sep 17 00:00:00 2001 From: Micah Morton Date: Mon, 7 Jan 2019 16:10:53 -0800 Subject: LSM: generalize flag passing to security_capable This patch provides a general mechanism for passing flags to the security_capable LSM hook. It replaces the specific 'audit' flag that is used to tell security_capable whether it should log an audit message for the given capability check. The reason for generalizing this flag passing is so we can add an additional flag that signifies whether security_capable is being called by a setid syscall (which is needed by the proposed SafeSetID LSM). Signed-off-by: Micah Morton Reviewed-by: Kees Cook Signed-off-by: James Morris --- include/linux/lsm_hooks.h | 8 +++++--- include/linux/security.h | 28 ++++++++++++++-------------- kernel/capability.c | 22 +++++++++++++--------- kernel/seccomp.c | 4 ++-- security/apparmor/capability.c | 14 +++++++------- security/apparmor/include/capability.h | 2 +- security/apparmor/ipc.c | 3 ++- security/apparmor/lsm.c | 4 ++-- security/apparmor/resource.c | 2 +- security/commoncap.c | 17 +++++++++-------- security/security.c | 14 +++++--------- security/selinux/hooks.c | 18 +++++++++--------- security/smack/smack_access.c | 2 +- 13 files changed, 71 insertions(+), 67 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 40511a8a5ae6..195707210975 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -1270,7 +1270,7 @@ * @cred contains the credentials to use. * @ns contains the user namespace we want the capability in * @cap contains the capability . - * @audit contains whether to write an audit message or not + * @opts contains options for the capable check * Return 0 if the capability is granted for @tsk. * @syslog: * Check permission before accessing the kernel message ring or changing @@ -1446,8 +1446,10 @@ union security_list_options { const kernel_cap_t *effective, const kernel_cap_t *inheritable, const kernel_cap_t *permitted); - int (*capable)(const struct cred *cred, struct user_namespace *ns, - int cap, int audit); + int (*capable)(const struct cred *cred, + struct user_namespace *ns, + int cap, + unsigned int opts); int (*quotactl)(int cmds, int type, int id, struct super_block *sb); int (*quota_on)(struct dentry *dentry); int (*syslog)(int type); diff --git a/include/linux/security.h b/include/linux/security.h index b2c5333ed4b5..13537a49ae97 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -54,9 +54,12 @@ struct xattr; struct xfrm_sec_ctx; struct mm_struct; +/* Default (no) options for the capable function */ +#define CAP_OPT_NONE 0x0 /* If capable should audit the security request */ -#define SECURITY_CAP_NOAUDIT 0 -#define SECURITY_CAP_AUDIT 1 +#define CAP_OPT_NOAUDIT BIT(1) +/* If capable is being called by a setid function */ +#define CAP_OPT_INSETID BIT(2) /* LSM Agnostic defines for sb_set_mnt_opts */ #define SECURITY_LSM_NATIVE_LABELS 1 @@ -72,7 +75,7 @@ enum lsm_event { /* These functions are in security/commoncap.c */ extern int cap_capable(const struct cred *cred, struct user_namespace *ns, - int cap, int audit); + int cap, unsigned int opts); extern int cap_settime(const struct timespec64 *ts, const struct timezone *tz); extern int cap_ptrace_access_check(struct task_struct *child, unsigned int mode); extern int cap_ptrace_traceme(struct task_struct *parent); @@ -207,10 +210,10 @@ int security_capset(struct cred *new, const struct cred *old, const kernel_cap_t *effective, const kernel_cap_t *inheritable, const kernel_cap_t *permitted); -int security_capable(const struct cred *cred, struct user_namespace *ns, - int cap); -int security_capable_noaudit(const struct cred *cred, struct user_namespace *ns, - int cap); +int security_capable(const struct cred *cred, + struct user_namespace *ns, + int cap, + unsigned int opts); int security_quotactl(int cmds, int type, int id, struct super_block *sb); int security_quota_on(struct dentry *dentry); int security_syslog(int type); @@ -464,14 +467,11 @@ static inline int security_capset(struct cred *new, } static inline int security_capable(const struct cred *cred, - struct user_namespace *ns, int cap) + struct user_namespace *ns, + int cap, + unsigned int opts) { - return cap_capable(cred, ns, cap, SECURITY_CAP_AUDIT); -} - -static inline int security_capable_noaudit(const struct cred *cred, - struct user_namespace *ns, int cap) { - return cap_capable(cred, ns, cap, SECURITY_CAP_NOAUDIT); + return cap_capable(cred, ns, cap, opts); } static inline int security_quotactl(int cmds, int type, int id, diff --git a/kernel/capability.c b/kernel/capability.c index 1e1c0236f55b..7718d7dcadc7 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -299,7 +299,7 @@ bool has_ns_capability(struct task_struct *t, int ret; rcu_read_lock(); - ret = security_capable(__task_cred(t), ns, cap); + ret = security_capable(__task_cred(t), ns, cap, CAP_OPT_NONE); rcu_read_unlock(); return (ret == 0); @@ -340,7 +340,7 @@ bool has_ns_capability_noaudit(struct task_struct *t, int ret; rcu_read_lock(); - ret = security_capable_noaudit(__task_cred(t), ns, cap); + ret = security_capable(__task_cred(t), ns, cap, CAP_OPT_NOAUDIT); rcu_read_unlock(); return (ret == 0); @@ -363,7 +363,9 @@ bool has_capability_noaudit(struct task_struct *t, int cap) return has_ns_capability_noaudit(t, &init_user_ns, cap); } -static bool ns_capable_common(struct user_namespace *ns, int cap, bool audit) +static bool ns_capable_common(struct user_namespace *ns, + int cap, + unsigned int opts) { int capable; @@ -372,8 +374,7 @@ static bool ns_capable_common(struct user_namespace *ns, int cap, bool audit) BUG(); } - capable = audit ? security_capable(current_cred(), ns, cap) : - security_capable_noaudit(current_cred(), ns, cap); + capable = security_capable(current_cred(), ns, cap, opts); if (capable == 0) { current->flags |= PF_SUPERPRIV; return true; @@ -394,7 +395,7 @@ static bool ns_capable_common(struct user_namespace *ns, int cap, bool audit) */ bool ns_capable(struct user_namespace *ns, int cap) { - return ns_capable_common(ns, cap, true); + return ns_capable_common(ns, cap, CAP_OPT_NONE); } EXPORT_SYMBOL(ns_capable); @@ -412,7 +413,7 @@ EXPORT_SYMBOL(ns_capable); */ bool ns_capable_noaudit(struct user_namespace *ns, int cap) { - return ns_capable_common(ns, cap, false); + return ns_capable_common(ns, cap, CAP_OPT_NOAUDIT); } EXPORT_SYMBOL(ns_capable_noaudit); @@ -448,10 +449,11 @@ EXPORT_SYMBOL(capable); bool file_ns_capable(const struct file *file, struct user_namespace *ns, int cap) { + if (WARN_ON_ONCE(!cap_valid(cap))) return false; - if (security_capable(file->f_cred, ns, cap) == 0) + if (security_capable(file->f_cred, ns, cap, CAP_OPT_NONE) == 0) return true; return false; @@ -500,10 +502,12 @@ bool ptracer_capable(struct task_struct *tsk, struct user_namespace *ns) { int ret = 0; /* An absent tracer adds no restrictions */ const struct cred *cred; + rcu_read_lock(); cred = rcu_dereference(tsk->ptracer_cred); if (cred) - ret = security_capable_noaudit(cred, ns, CAP_SYS_PTRACE); + ret = security_capable(cred, ns, CAP_SYS_PTRACE, + CAP_OPT_NOAUDIT); rcu_read_unlock(); return (ret == 0); } diff --git a/kernel/seccomp.c b/kernel/seccomp.c index d7f538847b84..38a77800def6 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -443,8 +443,8 @@ static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog) * behavior of privileged children. */ if (!task_no_new_privs(current) && - security_capable_noaudit(current_cred(), current_user_ns(), - CAP_SYS_ADMIN) != 0) + security_capable(current_cred(), current_user_ns(), + CAP_SYS_ADMIN, CAP_OPT_NOAUDIT) != 0) return ERR_PTR(-EACCES); /* Allocate a new seccomp_filter */ diff --git a/security/apparmor/capability.c b/security/apparmor/capability.c index 253ef6e9d445..752f73980e30 100644 --- a/security/apparmor/capability.c +++ b/security/apparmor/capability.c @@ -110,13 +110,13 @@ static int audit_caps(struct common_audit_data *sa, struct aa_profile *profile, * profile_capable - test if profile allows use of capability @cap * @profile: profile being enforced (NOT NULL, NOT unconfined) * @cap: capability to test if allowed - * @audit: whether an audit record should be generated + * @opts: CAP_OPT_NOAUDIT bit determines whether audit record is generated * @sa: audit data (MAY BE NULL indicating no auditing) * * Returns: 0 if allowed else -EPERM */ -static int profile_capable(struct aa_profile *profile, int cap, int audit, - struct common_audit_data *sa) +static int profile_capable(struct aa_profile *profile, int cap, + unsigned int opts, struct common_audit_data *sa) { int error; @@ -126,7 +126,7 @@ static int profile_capable(struct aa_profile *profile, int cap, int audit, else error = -EPERM; - if (audit == SECURITY_CAP_NOAUDIT) { + if (opts & CAP_OPT_NOAUDIT) { if (!COMPLAIN_MODE(profile)) return error; /* audit the cap request in complain mode but note that it @@ -142,13 +142,13 @@ static int profile_capable(struct aa_profile *profile, int cap, int audit, * aa_capable - test permission to use capability * @label: label being tested for capability (NOT NULL) * @cap: capability to be tested - * @audit: whether an audit record should be generated + * @opts: CAP_OPT_NOAUDIT bit determines whether audit record is generated * * Look up capability in profile capability set. * * Returns: 0 on success, or else an error code. */ -int aa_capable(struct aa_label *label, int cap, int audit) +int aa_capable(struct aa_label *label, int cap, unsigned int opts) { struct aa_profile *profile; int error = 0; @@ -156,7 +156,7 @@ int aa_capable(struct aa_label *label, int cap, int audit) sa.u.cap = cap; error = fn_for_each_confined(label, profile, - profile_capable(profile, cap, audit, &sa)); + profile_capable(profile, cap, opts, &sa)); return error; } diff --git a/security/apparmor/include/capability.h b/security/apparmor/include/capability.h index e0304e2aeb7f..1b3663b6ab12 100644 --- a/security/apparmor/include/capability.h +++ b/security/apparmor/include/capability.h @@ -40,7 +40,7 @@ struct aa_caps { extern struct aa_sfs_entry aa_sfs_entry_caps[]; -int aa_capable(struct aa_label *label, int cap, int audit); +int aa_capable(struct aa_label *label, int cap, unsigned int opts); static inline void aa_free_cap_rules(struct aa_caps *caps) { diff --git a/security/apparmor/ipc.c b/security/apparmor/ipc.c index 527ea1557120..aacd1e95cb59 100644 --- a/security/apparmor/ipc.c +++ b/security/apparmor/ipc.c @@ -107,7 +107,8 @@ static int profile_tracer_perm(struct aa_profile *tracer, aad(sa)->label = &tracer->label; aad(sa)->peer = tracee; aad(sa)->request = 0; - aad(sa)->error = aa_capable(&tracer->label, CAP_SYS_PTRACE, 1); + aad(sa)->error = aa_capable(&tracer->label, CAP_SYS_PTRACE, + CAP_OPT_NONE); return aa_audit(AUDIT_APPARMOR_AUTO, tracer, sa, audit_ptrace_cb); } diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index 60ef71268ccf..b6c395e2acd0 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -172,14 +172,14 @@ static int apparmor_capget(struct task_struct *target, kernel_cap_t *effective, } static int apparmor_capable(const struct cred *cred, struct user_namespace *ns, - int cap, int audit) + int cap, unsigned int opts) { struct aa_label *label; int error = 0; label = aa_get_newest_cred_label(cred); if (!unconfined(label)) - error = aa_capable(label, cap, audit); + error = aa_capable(label, cap, opts); aa_put_label(label); return error; diff --git a/security/apparmor/resource.c b/security/apparmor/resource.c index 95fd26d09757..552ed09cb47e 100644 --- a/security/apparmor/resource.c +++ b/security/apparmor/resource.c @@ -124,7 +124,7 @@ int aa_task_setrlimit(struct aa_label *label, struct task_struct *task, */ if (label != peer && - aa_capable(label, CAP_SYS_RESOURCE, SECURITY_CAP_NOAUDIT) != 0) + aa_capable(label, CAP_SYS_RESOURCE, CAP_OPT_NOAUDIT) != 0) error = fn_for_each(label, profile, audit_resource(profile, resource, new_rlim->rlim_max, peer, diff --git a/security/commoncap.c b/security/commoncap.c index 52e04136bfa8..188eaf59f82f 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -68,7 +68,7 @@ static void warn_setuid_and_fcaps_mixed(const char *fname) * kernel's capable() and has_capability() returns 1 for this case. */ int cap_capable(const struct cred *cred, struct user_namespace *targ_ns, - int cap, int audit) + int cap, unsigned int opts) { struct user_namespace *ns = targ_ns; @@ -222,12 +222,11 @@ int cap_capget(struct task_struct *target, kernel_cap_t *effective, */ static inline int cap_inh_is_capped(void) { - /* they are so limited unless the current task has the CAP_SETPCAP * capability */ if (cap_capable(current_cred(), current_cred()->user_ns, - CAP_SETPCAP, SECURITY_CAP_AUDIT) == 0) + CAP_SETPCAP, CAP_OPT_NONE) == 0) return 0; return 1; } @@ -1208,8 +1207,9 @@ int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, || ((old->securebits & SECURE_ALL_LOCKS & ~arg2)) /*[2]*/ || (arg2 & ~(SECURE_ALL_LOCKS | SECURE_ALL_BITS)) /*[3]*/ || (cap_capable(current_cred(), - current_cred()->user_ns, CAP_SETPCAP, - SECURITY_CAP_AUDIT) != 0) /*[4]*/ + current_cred()->user_ns, + CAP_SETPCAP, + CAP_OPT_NONE) != 0) /*[4]*/ /* * [1] no changing of bits that are locked * [2] no unlocking of locks @@ -1304,9 +1304,10 @@ int cap_vm_enough_memory(struct mm_struct *mm, long pages) { int cap_sys_admin = 0; - if (cap_capable(current_cred(), &init_user_ns, CAP_SYS_ADMIN, - SECURITY_CAP_NOAUDIT) == 0) + if (cap_capable(current_cred(), &init_user_ns, + CAP_SYS_ADMIN, CAP_OPT_NOAUDIT) == 0) cap_sys_admin = 1; + return cap_sys_admin; } @@ -1325,7 +1326,7 @@ int cap_mmap_addr(unsigned long addr) if (addr < dac_mmap_min_addr) { ret = cap_capable(current_cred(), &init_user_ns, CAP_SYS_RAWIO, - SECURITY_CAP_AUDIT); + CAP_OPT_NONE); /* set PF_SUPERPRIV if it turns out we allow the low mmap */ if (ret == 0) current->flags |= PF_SUPERPRIV; diff --git a/security/security.c b/security/security.c index 953fc3ea18a9..a618e22df5c6 100644 --- a/security/security.c +++ b/security/security.c @@ -689,16 +689,12 @@ int security_capset(struct cred *new, const struct cred *old, effective, inheritable, permitted); } -int security_capable(const struct cred *cred, struct user_namespace *ns, - int cap) +int security_capable(const struct cred *cred, + struct user_namespace *ns, + int cap, + unsigned int opts) { - return call_int_hook(capable, 0, cred, ns, cap, SECURITY_CAP_AUDIT); -} - -int security_capable_noaudit(const struct cred *cred, struct user_namespace *ns, - int cap) -{ - return call_int_hook(capable, 0, cred, ns, cap, SECURITY_CAP_NOAUDIT); + return call_int_hook(capable, 0, cred, ns, cap, opts); } int security_quotactl(int cmds, int type, int id, struct super_block *sb) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index d98e1d8d18f6..b2ee49f938f1 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -1578,7 +1578,7 @@ static inline u32 signal_to_av(int sig) /* Check whether a task is allowed to use a capability. */ static int cred_has_capability(const struct cred *cred, - int cap, int audit, bool initns) + int cap, unsigned int opts, bool initns) { struct common_audit_data ad; struct av_decision avd; @@ -1605,7 +1605,7 @@ static int cred_has_capability(const struct cred *cred, rc = avc_has_perm_noaudit(&selinux_state, sid, sid, sclass, av, 0, &avd); - if (audit == SECURITY_CAP_AUDIT) { + if (!(opts & CAP_OPT_NOAUDIT)) { int rc2 = avc_audit(&selinux_state, sid, sid, sclass, av, &avd, rc, &ad, 0); if (rc2) @@ -2125,9 +2125,9 @@ static int selinux_capset(struct cred *new, const struct cred *old, */ static int selinux_capable(const struct cred *cred, struct user_namespace *ns, - int cap, int audit) + int cap, unsigned int opts) { - return cred_has_capability(cred, cap, audit, ns == &init_user_ns); + return cred_has_capability(cred, cap, opts, ns == &init_user_ns); } static int selinux_quotactl(int cmds, int type, int id, struct super_block *sb) @@ -2201,7 +2201,7 @@ static int selinux_vm_enough_memory(struct mm_struct *mm, long pages) int rc, cap_sys_admin = 0; rc = cred_has_capability(current_cred(), CAP_SYS_ADMIN, - SECURITY_CAP_NOAUDIT, true); + CAP_OPT_NOAUDIT, true); if (rc == 0) cap_sys_admin = 1; @@ -2988,11 +2988,11 @@ static int selinux_inode_getattr(const struct path *path) static bool has_cap_mac_admin(bool audit) { const struct cred *cred = current_cred(); - int cap_audit = audit ? SECURITY_CAP_AUDIT : SECURITY_CAP_NOAUDIT; + unsigned int opts = audit ? CAP_OPT_NONE : CAP_OPT_NOAUDIT; - if (cap_capable(cred, &init_user_ns, CAP_MAC_ADMIN, cap_audit)) + if (cap_capable(cred, &init_user_ns, CAP_MAC_ADMIN, opts)) return false; - if (cred_has_capability(cred, CAP_MAC_ADMIN, cap_audit, true)) + if (cred_has_capability(cred, CAP_MAC_ADMIN, opts, true)) return false; return true; } @@ -3387,7 +3387,7 @@ static int selinux_file_ioctl(struct file *file, unsigned int cmd, case KDSKBENT: case KDSKBSENT: error = cred_has_capability(cred, CAP_SYS_TTY_CONFIG, - SECURITY_CAP_AUDIT, true); + CAP_OPT_NONE, true); break; /* default case assumes that the command will go diff --git a/security/smack/smack_access.c b/security/smack/smack_access.c index 489d49a20b47..fe2ce3a65822 100644 --- a/security/smack/smack_access.c +++ b/security/smack/smack_access.c @@ -640,7 +640,7 @@ bool smack_privileged_cred(int cap, const struct cred *cred) struct smack_known_list_elem *sklep; int rc; - rc = cap_capable(cred, &init_user_ns, cap, SECURITY_CAP_AUDIT); + rc = cap_capable(cred, &init_user_ns, cap, CAP_OPT_NONE); if (rc) return false; -- cgit v1.2.3 From bec9ba7f37631e794cbfaa4c2274074d631217a9 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 16 Dec 2018 19:12:18 -0800 Subject: crypto: cipher - remove struct cipher_desc 'struct cipher_desc' is unused. Remove it. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- include/linux/crypto.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 902ec171fc6d..c3c98a62e503 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -188,14 +188,6 @@ struct blkcipher_desc { u32 flags; }; -struct cipher_desc { - struct crypto_tfm *tfm; - void (*crfn)(struct crypto_tfm *tfm, u8 *dst, const u8 *src); - unsigned int (*prfn)(const struct cipher_desc *desc, u8 *dst, - const u8 *src, unsigned int nbytes); - void *info; -}; - /** * DOC: Block Cipher Algorithm Definitions * -- cgit v1.2.3 From 5b438f4ba315db4f8c1489d175656798d58c014f Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Fri, 11 Jan 2019 13:04:57 +0800 Subject: iommu/vt-d: Support page request in scalable mode VT-d Rev3.0 has made a few changes to the page request interface, 1. widened PRQ descriptor from 128 bits to 256 bits; 2. removed streaming response type; 3. introduced private data that requires page response even the request is not last request in group (LPIG). This is a supplement to commit 1c4f88b7f1f92 ("iommu/vt-d: Shared virtual address in scalable mode") and makes the svm code compliant with VT-d Rev3.0. Cc: Ashok Raj Cc: Liu Yi L Cc: Kevin Tian Signed-off-by: Jacob Pan Fixes: 1c4f88b7f1f92 ("iommu/vt-d: Shared virtual address in scalable mode") Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel-svm.c | 77 ++++++++++++++++++++++++++------------------- include/linux/intel-iommu.h | 21 ++++++------- include/linux/intel-svm.h | 2 +- 3 files changed, 55 insertions(+), 45 deletions(-) (limited to 'include/linux') diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c index a2a2aa4439aa..79add5716552 100644 --- a/drivers/iommu/intel-svm.c +++ b/drivers/iommu/intel-svm.c @@ -470,20 +470,31 @@ EXPORT_SYMBOL_GPL(intel_svm_is_pasid_valid); /* Page request queue descriptor */ struct page_req_dsc { - u64 srr:1; - u64 bof:1; - u64 pasid_present:1; - u64 lpig:1; - u64 pasid:20; - u64 bus:8; - u64 private:23; - u64 prg_index:9; - u64 rd_req:1; - u64 wr_req:1; - u64 exe_req:1; - u64 priv_req:1; - u64 devfn:8; - u64 addr:52; + union { + struct { + u64 type:8; + u64 pasid_present:1; + u64 priv_data_present:1; + u64 rsvd:6; + u64 rid:16; + u64 pasid:20; + u64 exe_req:1; + u64 pm_req:1; + u64 rsvd2:10; + }; + u64 qw_0; + }; + union { + struct { + u64 rd_req:1; + u64 wr_req:1; + u64 lpig:1; + u64 prg_index:9; + u64 addr:52; + }; + u64 qw_1; + }; + u64 priv_data[2]; }; #define PRQ_RING_MASK ((0x1000 << PRQ_ORDER) - 0x10) @@ -596,7 +607,7 @@ static irqreturn_t prq_event_thread(int irq, void *d) /* Accounting for major/minor faults? */ rcu_read_lock(); list_for_each_entry_rcu(sdev, &svm->devs, list) { - if (sdev->sid == PCI_DEVID(req->bus, req->devfn)) + if (sdev->sid == req->rid) break; } /* Other devices can go away, but the drivers are not permitted @@ -609,33 +620,35 @@ static irqreturn_t prq_event_thread(int irq, void *d) if (sdev && sdev->ops && sdev->ops->fault_cb) { int rwxp = (req->rd_req << 3) | (req->wr_req << 2) | - (req->exe_req << 1) | (req->priv_req); - sdev->ops->fault_cb(sdev->dev, req->pasid, req->addr, req->private, rwxp, result); + (req->exe_req << 1) | (req->pm_req); + sdev->ops->fault_cb(sdev->dev, req->pasid, req->addr, + req->priv_data, rwxp, result); } /* We get here in the error case where the PASID lookup failed, and these can be NULL. Do not use them below this point! */ sdev = NULL; svm = NULL; no_pasid: - if (req->lpig) { - /* Page Group Response */ + if (req->lpig || req->priv_data_present) { + /* + * Per VT-d spec. v3.0 ch7.7, system software must + * respond with page group response if private data + * is present (PDP) or last page in group (LPIG) bit + * is set. This is an additional VT-d feature beyond + * PCI ATS spec. + */ resp.qw0 = QI_PGRP_PASID(req->pasid) | - QI_PGRP_DID((req->bus << 8) | req->devfn) | + QI_PGRP_DID(req->rid) | QI_PGRP_PASID_P(req->pasid_present) | + QI_PGRP_PDP(req->pasid_present) | + QI_PGRP_RESP_CODE(result) | QI_PGRP_RESP_TYPE; resp.qw1 = QI_PGRP_IDX(req->prg_index) | - QI_PGRP_PRIV(req->private) | - QI_PGRP_RESP_CODE(result); - } else if (req->srr) { - /* Page Stream Response */ - resp.qw0 = QI_PSTRM_IDX(req->prg_index) | - QI_PSTRM_PRIV(req->private) | - QI_PSTRM_BUS(req->bus) | - QI_PSTRM_PASID(req->pasid) | - QI_PSTRM_RESP_TYPE; - resp.qw1 = QI_PSTRM_ADDR(address) | - QI_PSTRM_DEVFN(req->devfn) | - QI_PSTRM_RESP_CODE(result); + QI_PGRP_LPIG(req->lpig); + + if (req->priv_data_present) + memcpy(&resp.qw2, req->priv_data, + sizeof(req->priv_data)); } resp.qw2 = 0; resp.qw3 = 0; diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 0605f3bf6e79..fa364de9db18 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -374,20 +374,17 @@ enum { #define QI_DEV_EIOTLB_PFSID(pfsid) (((u64)(pfsid & 0xf) << 12) | ((u64)(pfsid & 0xfff) << 52)) #define QI_DEV_EIOTLB_MAX_INVS 32 -#define QI_PGRP_IDX(idx) (((u64)(idx)) << 55) -#define QI_PGRP_PRIV(priv) (((u64)(priv)) << 32) -#define QI_PGRP_RESP_CODE(res) ((u64)(res)) -#define QI_PGRP_PASID(pasid) (((u64)(pasid)) << 32) -#define QI_PGRP_DID(did) (((u64)(did)) << 16) +/* Page group response descriptor QW0 */ #define QI_PGRP_PASID_P(p) (((u64)(p)) << 4) +#define QI_PGRP_PDP(p) (((u64)(p)) << 5) +#define QI_PGRP_RESP_CODE(res) (((u64)(res)) << 12) +#define QI_PGRP_DID(rid) (((u64)(rid)) << 16) +#define QI_PGRP_PASID(pasid) (((u64)(pasid)) << 32) + +/* Page group response descriptor QW1 */ +#define QI_PGRP_LPIG(x) (((u64)(x)) << 2) +#define QI_PGRP_IDX(idx) (((u64)(idx)) << 3) -#define QI_PSTRM_ADDR(addr) (((u64)(addr)) & VTD_PAGE_MASK) -#define QI_PSTRM_DEVFN(devfn) (((u64)(devfn)) << 4) -#define QI_PSTRM_RESP_CODE(res) ((u64)(res)) -#define QI_PSTRM_IDX(idx) (((u64)(idx)) << 55) -#define QI_PSTRM_PRIV(priv) (((u64)(priv)) << 32) -#define QI_PSTRM_BUS(bus) (((u64)(bus)) << 24) -#define QI_PSTRM_PASID(pasid) (((u64)(pasid)) << 4) #define QI_RESP_SUCCESS 0x0 #define QI_RESP_INVALID 0x1 diff --git a/include/linux/intel-svm.h b/include/linux/intel-svm.h index 99bc5b3ae26e..e3f76315ca4d 100644 --- a/include/linux/intel-svm.h +++ b/include/linux/intel-svm.h @@ -20,7 +20,7 @@ struct device; struct svm_dev_ops { void (*fault_cb)(struct device *dev, int pasid, u64 address, - u32 private, int rwxp, int response); + void *private, int rwxp, int response); }; /* Values for rxwp in fault_cb callback */ -- cgit v1.2.3 From 19514910d021c93c7823ec32067e6b7dea224f0f Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Wed, 9 Jan 2019 13:43:19 +0100 Subject: livepatch: Change unsigned long old_addr -> void *old_func in struct klp_func The address of the to be patched function and new function is stored in struct klp_func as: void *new_func; unsigned long old_addr; The different naming scheme and type are derived from the way the addresses are set. @old_addr is assigned at runtime using kallsyms-based search. @new_func is statically initialized, for example: static struct klp_func funcs[] = { { .old_name = "cmdline_proc_show", .new_func = livepatch_cmdline_proc_show, }, { } }; This patch changes unsigned long old_addr -> void *old_func. It removes some confusion when these address are later used in the code. It is motivated by a followup patch that adds special NOP struct klp_func where we want to assign func->new_func = func->old_addr respectively func->new_func = func->old_func. This patch does not modify the existing behavior. Suggested-by: Josh Poimboeuf Signed-off-by: Petr Mladek Acked-by: Miroslav Benes Acked-by: Joe Lawrence Acked-by: Alice Ferrazzi Acked-by: Josh Poimboeuf Signed-off-by: Jiri Kosina --- include/linux/livepatch.h | 4 ++-- kernel/livepatch/core.c | 6 +++--- kernel/livepatch/patch.c | 18 ++++++++++-------- kernel/livepatch/patch.h | 4 ++-- kernel/livepatch/transition.c | 4 ++-- 5 files changed, 19 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/livepatch.h b/include/linux/livepatch.h index aec44b1d9582..634e13876380 100644 --- a/include/linux/livepatch.h +++ b/include/linux/livepatch.h @@ -40,7 +40,7 @@ * @new_func: pointer to the patched function code * @old_sympos: a hint indicating which symbol position the old function * can be found (optional) - * @old_addr: the address of the function being patched + * @old_func: pointer to the function being patched * @kobj: kobject for sysfs resources * @stack_node: list node for klp_ops func_stack list * @old_size: size of the old function @@ -77,7 +77,7 @@ struct klp_func { unsigned long old_sympos; /* internal */ - unsigned long old_addr; + void *old_func; struct kobject kobj; struct list_head stack_node; unsigned long old_size, new_size; diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c index 5b77a7314e01..cb59c7fb94cb 100644 --- a/kernel/livepatch/core.c +++ b/kernel/livepatch/core.c @@ -648,7 +648,7 @@ static void klp_free_object_loaded(struct klp_object *obj) obj->mod = NULL; klp_for_each_func(obj, func) - func->old_addr = 0; + func->old_func = NULL; } /* @@ -721,11 +721,11 @@ static int klp_init_object_loaded(struct klp_patch *patch, klp_for_each_func(obj, func) { ret = klp_find_object_symbol(obj->name, func->old_name, func->old_sympos, - &func->old_addr); + (unsigned long *)&func->old_func); if (ret) return ret; - ret = kallsyms_lookup_size_offset(func->old_addr, + ret = kallsyms_lookup_size_offset((unsigned long)func->old_func, &func->old_size, NULL); if (!ret) { pr_err("kallsyms size lookup failed for '%s'\n", diff --git a/kernel/livepatch/patch.c b/kernel/livepatch/patch.c index 7702cb4064fc..825022d70912 100644 --- a/kernel/livepatch/patch.c +++ b/kernel/livepatch/patch.c @@ -34,7 +34,7 @@ static LIST_HEAD(klp_ops); -struct klp_ops *klp_find_ops(unsigned long old_addr) +struct klp_ops *klp_find_ops(void *old_func) { struct klp_ops *ops; struct klp_func *func; @@ -42,7 +42,7 @@ struct klp_ops *klp_find_ops(unsigned long old_addr) list_for_each_entry(ops, &klp_ops, node) { func = list_first_entry(&ops->func_stack, struct klp_func, stack_node); - if (func->old_addr == old_addr) + if (func->old_func == old_func) return ops; } @@ -142,17 +142,18 @@ static void klp_unpatch_func(struct klp_func *func) if (WARN_ON(!func->patched)) return; - if (WARN_ON(!func->old_addr)) + if (WARN_ON(!func->old_func)) return; - ops = klp_find_ops(func->old_addr); + ops = klp_find_ops(func->old_func); if (WARN_ON(!ops)) return; if (list_is_singular(&ops->func_stack)) { unsigned long ftrace_loc; - ftrace_loc = klp_get_ftrace_location(func->old_addr); + ftrace_loc = + klp_get_ftrace_location((unsigned long)func->old_func); if (WARN_ON(!ftrace_loc)) return; @@ -174,17 +175,18 @@ static int klp_patch_func(struct klp_func *func) struct klp_ops *ops; int ret; - if (WARN_ON(!func->old_addr)) + if (WARN_ON(!func->old_func)) return -EINVAL; if (WARN_ON(func->patched)) return -EINVAL; - ops = klp_find_ops(func->old_addr); + ops = klp_find_ops(func->old_func); if (!ops) { unsigned long ftrace_loc; - ftrace_loc = klp_get_ftrace_location(func->old_addr); + ftrace_loc = + klp_get_ftrace_location((unsigned long)func->old_func); if (!ftrace_loc) { pr_err("failed to find location for function '%s'\n", func->old_name); diff --git a/kernel/livepatch/patch.h b/kernel/livepatch/patch.h index e72d8250d04b..a9b16e513656 100644 --- a/kernel/livepatch/patch.h +++ b/kernel/livepatch/patch.h @@ -10,7 +10,7 @@ * struct klp_ops - structure for tracking registered ftrace ops structs * * A single ftrace_ops is shared between all enabled replacement functions - * (klp_func structs) which have the same old_addr. This allows the switch + * (klp_func structs) which have the same old_func. This allows the switch * between function versions to happen instantaneously by updating the klp_ops * struct's func_stack list. The winner is the klp_func at the top of the * func_stack (front of the list). @@ -25,7 +25,7 @@ struct klp_ops { struct ftrace_ops fops; }; -struct klp_ops *klp_find_ops(unsigned long old_addr); +struct klp_ops *klp_find_ops(void *old_func); int klp_patch_object(struct klp_object *obj); void klp_unpatch_object(struct klp_object *obj); diff --git a/kernel/livepatch/transition.c b/kernel/livepatch/transition.c index 304d5eb8a98c..f27a378ad5e1 100644 --- a/kernel/livepatch/transition.c +++ b/kernel/livepatch/transition.c @@ -224,11 +224,11 @@ static int klp_check_stack_func(struct klp_func *func, * Check for the to-be-patched function * (the previous func). */ - ops = klp_find_ops(func->old_addr); + ops = klp_find_ops(func->old_func); if (list_is_singular(&ops->func_stack)) { /* original function */ - func_addr = func->old_addr; + func_addr = (unsigned long)func->old_func; func_size = func->old_size; } else { /* previously patched function */ -- cgit v1.2.3 From 0430f78bf38f9972f0cf0522709cc63d49fa164c Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Wed, 9 Jan 2019 13:43:21 +0100 Subject: livepatch: Consolidate klp_free functions The code for freeing livepatch structures is a bit scattered and tricky: + direct calls to klp_free_*_limited() and kobject_put() are used to release partially initialized objects + klp_free_patch() removes the patch from the public list and releases all objects except for patch->kobj + object_put(&patch->kobj) and the related wait_for_completion() are called directly outside klp_mutex; this code is duplicated; Now, we are going to remove the registration stage to simplify the API and the code. This would require handling more situations in klp_enable_patch() error paths. More importantly, we are going to add a feature called atomic replace. It will need to dynamically create func and object structures. We will want to reuse the existing init() and free() functions. This would create even more error path scenarios. This patch implements more straightforward free functions: + checks kobj_added flag instead of @limit[*] + initializes patch->list early so that the check for empty list always works + The action(s) that has to be done outside klp_mutex are done in separate klp_free_patch_finish() function. It waits only when patch->kobj was really released via the _start() part. The patch does not change the existing behavior. [*] We need our own flag to track that the kobject was successfully added to the hierarchy. Note that kobj.state_initialized only indicates that kobject has been initialized, not whether is has been added (and needs to be removed on cleanup). Signed-off-by: Petr Mladek Cc: Josh Poimboeuf Cc: Miroslav Benes Cc: Jessica Yu Cc: Jiri Kosina Cc: Jason Baron Acked-by: Miroslav Benes Acked-by: Josh Poimboeuf Signed-off-by: Jiri Kosina --- include/linux/livepatch.h | 6 ++ kernel/livepatch/core.c | 137 +++++++++++++++++++++++++++++++--------------- 2 files changed, 98 insertions(+), 45 deletions(-) (limited to 'include/linux') diff --git a/include/linux/livepatch.h b/include/linux/livepatch.h index 634e13876380..6978785bc059 100644 --- a/include/linux/livepatch.h +++ b/include/linux/livepatch.h @@ -45,6 +45,7 @@ * @stack_node: list node for klp_ops func_stack list * @old_size: size of the old function * @new_size: size of the new function + * @kobj_added: @kobj has been added and needs freeing * @patched: the func has been added to the klp_ops list * @transition: the func is currently being applied or reverted * @@ -81,6 +82,7 @@ struct klp_func { struct kobject kobj; struct list_head stack_node; unsigned long old_size, new_size; + bool kobj_added; bool patched; bool transition; }; @@ -117,6 +119,7 @@ struct klp_callbacks { * @kobj: kobject for sysfs resources * @mod: kernel module associated with the patched object * (NULL for vmlinux) + * @kobj_added: @kobj has been added and needs freeing * @patched: the object's funcs have been added to the klp_ops list */ struct klp_object { @@ -128,6 +131,7 @@ struct klp_object { /* internal */ struct kobject kobj; struct module *mod; + bool kobj_added; bool patched; }; @@ -137,6 +141,7 @@ struct klp_object { * @objs: object entries for kernel objects to be patched * @list: list node for global list of registered patches * @kobj: kobject for sysfs resources + * @kobj_added: @kobj has been added and needs freeing * @enabled: the patch is enabled (but operation may be incomplete) * @finish: for waiting till it is safe to remove the patch module */ @@ -148,6 +153,7 @@ struct klp_patch { /* internal */ struct list_head list; struct kobject kobj; + bool kobj_added; bool enabled; struct completion finish; }; diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c index 20589da35194..6f0d9095f662 100644 --- a/kernel/livepatch/core.c +++ b/kernel/livepatch/core.c @@ -465,17 +465,15 @@ static struct kobj_type klp_ktype_func = { .sysfs_ops = &kobj_sysfs_ops, }; -/* - * Free all functions' kobjects in the array up to some limit. When limit is - * NULL, all kobjects are freed. - */ -static void klp_free_funcs_limited(struct klp_object *obj, - struct klp_func *limit) +static void klp_free_funcs(struct klp_object *obj) { struct klp_func *func; - for (func = obj->funcs; func->old_name && func != limit; func++) - kobject_put(&func->kobj); + klp_for_each_func(obj, func) { + /* Might be called from klp_init_patch() error path. */ + if (func->kobj_added) + kobject_put(&func->kobj); + } } /* Clean up when a patched object is unloaded */ @@ -489,30 +487,60 @@ static void klp_free_object_loaded(struct klp_object *obj) func->old_func = NULL; } -/* - * Free all objects' kobjects in the array up to some limit. When limit is - * NULL, all kobjects are freed. - */ -static void klp_free_objects_limited(struct klp_patch *patch, - struct klp_object *limit) +static void klp_free_objects(struct klp_patch *patch) { struct klp_object *obj; - for (obj = patch->objs; obj->funcs && obj != limit; obj++) { - klp_free_funcs_limited(obj, NULL); - kobject_put(&obj->kobj); + klp_for_each_object(patch, obj) { + klp_free_funcs(obj); + + /* Might be called from klp_init_patch() error path. */ + if (obj->kobj_added) + kobject_put(&obj->kobj); } } -static void klp_free_patch(struct klp_patch *patch) +/* + * This function implements the free operations that can be called safely + * under klp_mutex. + * + * The operation must be completed by calling klp_free_patch_finish() + * outside klp_mutex. + */ +static void klp_free_patch_start(struct klp_patch *patch) { - klp_free_objects_limited(patch, NULL); if (!list_empty(&patch->list)) list_del(&patch->list); + + klp_free_objects(patch); +} + +/* + * This function implements the free part that must be called outside + * klp_mutex. + * + * It must be called after klp_free_patch_start(). And it has to be + * the last function accessing the livepatch structures when the patch + * gets disabled. + */ +static void klp_free_patch_finish(struct klp_patch *patch) +{ + /* + * Avoid deadlock with enabled_store() sysfs callback by + * calling this outside klp_mutex. It is safe because + * this is called when the patch gets disabled and it + * cannot get enabled again. + */ + if (patch->kobj_added) { + kobject_put(&patch->kobj); + wait_for_completion(&patch->finish); + } } static int klp_init_func(struct klp_object *obj, struct klp_func *func) { + int ret; + if (!func->old_name || !func->new_func) return -EINVAL; @@ -528,9 +556,13 @@ static int klp_init_func(struct klp_object *obj, struct klp_func *func) * object. If the user selects 0 for old_sympos, then 1 will be used * since a unique symbol will be the first occurrence. */ - return kobject_init_and_add(&func->kobj, &klp_ktype_func, - &obj->kobj, "%s,%lu", func->old_name, - func->old_sympos ? func->old_sympos : 1); + ret = kobject_init_and_add(&func->kobj, &klp_ktype_func, + &obj->kobj, "%s,%lu", func->old_name, + func->old_sympos ? func->old_sympos : 1); + if (!ret) + func->kobj_added = true; + + return ret; } /* Arches may override this to finish any remaining arch-specific tasks */ @@ -589,9 +621,6 @@ static int klp_init_object(struct klp_patch *patch, struct klp_object *obj) int ret; const char *name; - if (!obj->funcs) - return -EINVAL; - if (klp_is_module(obj) && strlen(obj->name) >= MODULE_NAME_LEN) return -EINVAL; @@ -605,46 +634,66 @@ static int klp_init_object(struct klp_patch *patch, struct klp_object *obj) &patch->kobj, "%s", name); if (ret) return ret; + obj->kobj_added = true; klp_for_each_func(obj, func) { ret = klp_init_func(obj, func); if (ret) - goto free; + return ret; } - if (klp_is_object_loaded(obj)) { + if (klp_is_object_loaded(obj)) ret = klp_init_object_loaded(patch, obj); - if (ret) - goto free; - } - - return 0; -free: - klp_free_funcs_limited(obj, func); - kobject_put(&obj->kobj); return ret; } -static int klp_init_patch(struct klp_patch *patch) +static int klp_init_patch_early(struct klp_patch *patch) { struct klp_object *obj; - int ret; + struct klp_func *func; if (!patch->objs) return -EINVAL; - mutex_lock(&klp_mutex); - + INIT_LIST_HEAD(&patch->list); + patch->kobj_added = false; patch->enabled = false; init_completion(&patch->finish); + klp_for_each_object(patch, obj) { + if (!obj->funcs) + return -EINVAL; + + obj->kobj_added = false; + + klp_for_each_func(obj, func) + func->kobj_added = false; + } + + return 0; +} + +static int klp_init_patch(struct klp_patch *patch) +{ + struct klp_object *obj; + int ret; + + mutex_lock(&klp_mutex); + + ret = klp_init_patch_early(patch); + if (ret) { + mutex_unlock(&klp_mutex); + return ret; + } + ret = kobject_init_and_add(&patch->kobj, &klp_ktype_patch, klp_root_kobj, "%s", patch->mod->name); if (ret) { mutex_unlock(&klp_mutex); return ret; } + patch->kobj_added = true; klp_for_each_object(patch, obj) { ret = klp_init_object(patch, obj); @@ -659,12 +708,11 @@ static int klp_init_patch(struct klp_patch *patch) return 0; free: - klp_free_objects_limited(patch, obj); + klp_free_patch_start(patch); mutex_unlock(&klp_mutex); - kobject_put(&patch->kobj); - wait_for_completion(&patch->finish); + klp_free_patch_finish(patch); return ret; } @@ -693,12 +741,11 @@ int klp_unregister_patch(struct klp_patch *patch) goto err; } - klp_free_patch(patch); + klp_free_patch_start(patch); mutex_unlock(&klp_mutex); - kobject_put(&patch->kobj); - wait_for_completion(&patch->finish); + klp_free_patch_finish(patch); return 0; err: -- cgit v1.2.3 From 68007289bf3cd937a5b8fc4987d2787167bd06ca Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Wed, 9 Jan 2019 13:43:22 +0100 Subject: livepatch: Don't block the removal of patches loaded after a forced transition module_put() is currently never called in klp_complete_transition() when klp_force is set. As a result, we might keep the reference count even when klp_enable_patch() fails and klp_cancel_transition() is called. This might give the impression that a module might get blocked in some strange init state. Fortunately, it is not the case. The reference count is ignored when mod->init fails and erroneous modules are always removed. Anyway, this might be confusing. Instead, this patch moves the global klp_forced flag into struct klp_patch. As a result, we block only modules that might still be in use after a forced transition. Newly loaded livepatches might be eventually completely removed later. It is not a big deal. But the code is at least consistent with the reality. Signed-off-by: Petr Mladek Acked-by: Joe Lawrence Acked-by: Miroslav Benes Acked-by: Josh Poimboeuf Signed-off-by: Jiri Kosina --- include/linux/livepatch.h | 2 ++ kernel/livepatch/core.c | 4 +++- kernel/livepatch/core.h | 1 + kernel/livepatch/transition.c | 10 +++++----- 4 files changed, 11 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/livepatch.h b/include/linux/livepatch.h index 6978785bc059..6a9165d9b090 100644 --- a/include/linux/livepatch.h +++ b/include/linux/livepatch.h @@ -143,6 +143,7 @@ struct klp_object { * @kobj: kobject for sysfs resources * @kobj_added: @kobj has been added and needs freeing * @enabled: the patch is enabled (but operation may be incomplete) + * @forced: was involved in a forced transition * @finish: for waiting till it is safe to remove the patch module */ struct klp_patch { @@ -155,6 +156,7 @@ struct klp_patch { struct kobject kobj; bool kobj_added; bool enabled; + bool forced; struct completion finish; }; diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c index 6f0d9095f662..e77c5017ae0c 100644 --- a/kernel/livepatch/core.c +++ b/kernel/livepatch/core.c @@ -45,7 +45,8 @@ */ DEFINE_MUTEX(klp_mutex); -static LIST_HEAD(klp_patches); +/* Registered patches */ +LIST_HEAD(klp_patches); static struct kobject *klp_root_kobj; @@ -659,6 +660,7 @@ static int klp_init_patch_early(struct klp_patch *patch) INIT_LIST_HEAD(&patch->list); patch->kobj_added = false; patch->enabled = false; + patch->forced = false; init_completion(&patch->finish); klp_for_each_object(patch, obj) { diff --git a/kernel/livepatch/core.h b/kernel/livepatch/core.h index 48a83d4364cf..d0cb5390e247 100644 --- a/kernel/livepatch/core.h +++ b/kernel/livepatch/core.h @@ -5,6 +5,7 @@ #include extern struct mutex klp_mutex; +extern struct list_head klp_patches; static inline bool klp_is_object_loaded(struct klp_object *obj) { diff --git a/kernel/livepatch/transition.c b/kernel/livepatch/transition.c index f27a378ad5e1..a4c921364003 100644 --- a/kernel/livepatch/transition.c +++ b/kernel/livepatch/transition.c @@ -33,8 +33,6 @@ struct klp_patch *klp_transition_patch; static int klp_target_state = KLP_UNDEFINED; -static bool klp_forced = false; - /* * This work can be performed periodically to finish patching or unpatching any * "straggler" tasks which failed to transition in the first attempt. @@ -137,10 +135,10 @@ static void klp_complete_transition(void) klp_target_state == KLP_PATCHED ? "patching" : "unpatching"); /* - * klp_forced set implies unbounded increase of module's ref count if + * patch->forced set implies unbounded increase of module's ref count if * the module is disabled/enabled in a loop. */ - if (!klp_forced && klp_target_state == KLP_UNPATCHED) + if (!klp_transition_patch->forced && klp_target_state == KLP_UNPATCHED) module_put(klp_transition_patch->mod); klp_target_state = KLP_UNDEFINED; @@ -620,6 +618,7 @@ void klp_send_signals(void) */ void klp_force_transition(void) { + struct klp_patch *patch; struct task_struct *g, *task; unsigned int cpu; @@ -633,5 +632,6 @@ void klp_force_transition(void) for_each_possible_cpu(cpu) klp_update_patch_state(idle_task(cpu)); - klp_forced = true; + list_for_each_entry(patch, &klp_patches, list) + patch->forced = true; } -- cgit v1.2.3 From 958ef1e39d24d6cb8bf2a7406130a98c9564230f Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Wed, 9 Jan 2019 13:43:23 +0100 Subject: livepatch: Simplify API by removing registration step The possibility to re-enable a registered patch was useful for immediate patches where the livepatch module had to stay until the system reboot. The improved consistency model allows to achieve the same result by unloading and loading the livepatch module again. Also we are going to add a feature called atomic replace. It will allow to create a patch that would replace all already registered patches. The aim is to handle dependent patches more securely. It will obsolete the stack of patches that helped to handle the dependencies so far. Then it might be unclear when a cumulative patch re-enabling is safe. It would be complicated to support the many modes. Instead we could actually make the API and code easier to understand. Therefore, remove the two step public API. All the checks and init calls are moved from klp_register_patch() to klp_enabled_patch(). Also the patch is automatically freed, including the sysfs interface when the transition to the disabled state is completed. As a result, there is never a disabled patch on the top of the stack. Therefore we do not need to check the stack in __klp_enable_patch(). And we could simplify the check in __klp_disable_patch(). Also the API and logic is much easier. It is enough to call klp_enable_patch() in module_init() call. The patch can be disabled by writing '0' into /sys/kernel/livepatch//enabled. Then the module can be removed once the transition finishes and sysfs interface is freed. The only problem is how to free the structures and kobjects safely. The operation is triggered from the sysfs interface. We could not put the related kobject from there because it would cause lock inversion between klp_mutex and kernfs locks, see kn->count lockdep map. Therefore, offload the free task to a workqueue. It is perfectly fine: + The patch can no longer be used in the livepatch operations. + The module could not be removed until the free operation finishes and module_put() is called. + The operation is asynchronous already when the first klp_try_complete_transition() fails and another call is queued with a delay. Suggested-by: Josh Poimboeuf Signed-off-by: Petr Mladek Acked-by: Miroslav Benes Acked-by: Josh Poimboeuf Signed-off-by: Jiri Kosina --- Documentation/livepatch/livepatch.txt | 135 +++++-------- include/linux/livepatch.h | 7 +- kernel/livepatch/core.c | 280 +++++++++------------------ kernel/livepatch/core.h | 2 + kernel/livepatch/transition.c | 19 +- samples/livepatch/livepatch-callbacks-demo.c | 13 +- samples/livepatch/livepatch-sample.c | 13 +- samples/livepatch/livepatch-shadow-fix1.c | 14 +- samples/livepatch/livepatch-shadow-fix2.c | 14 +- 9 files changed, 168 insertions(+), 329 deletions(-) (limited to 'include/linux') diff --git a/Documentation/livepatch/livepatch.txt b/Documentation/livepatch/livepatch.txt index 2d7ed09dbd59..8f56490a4bb6 100644 --- a/Documentation/livepatch/livepatch.txt +++ b/Documentation/livepatch/livepatch.txt @@ -12,12 +12,11 @@ Table of Contents: 4. Livepatch module 4.1. New functions 4.2. Metadata - 4.3. Livepatch module handling 5. Livepatch life-cycle - 5.1. Registration + 5.1. Loading 5.2. Enabling 5.3. Disabling - 5.4. Unregistration + 5.4. Removing 6. Sysfs 7. Limitations @@ -298,117 +297,89 @@ into three levels: see the "Consistency model" section. -4.3. Livepatch module handling ------------------------------- - -The usual behavior is that the new functions will get used when -the livepatch module is loaded. For this, the module init() function -has to register the patch (struct klp_patch) and enable it. See the -section "Livepatch life-cycle" below for more details about these -two operations. - -Module removal is only safe when there are no users of the underlying -functions. This is the reason why the force feature permanently disables -the removal. The forced tasks entered the functions but we cannot say -that they returned back. Therefore it cannot be decided when the -livepatch module can be safely removed. When the system is successfully -transitioned to a new patch state (patched/unpatched) without being -forced it is guaranteed that no task sleeps or runs in the old code. - - 5. Livepatch life-cycle ======================= -Livepatching defines four basic operations that define the life cycle of each -live patch: registration, enabling, disabling and unregistration. There are -several reasons why it is done this way. - -First, the patch is applied only when all patched symbols for already -loaded objects are found. The error handling is much easier if this -check is done before particular functions get redirected. +Livepatching can be described by four basic operations: +loading, enabling, disabling, removing. -Second, it might take some time until the entire system is migrated with -the hybrid consistency model being used. The patch revert might block -the livepatch module removal for too long. Therefore it is useful to -revert the patch using a separate operation that might be called -explicitly. But it does not make sense to remove all information until -the livepatch module is really removed. +5.1. Loading +------------ -5.1. Registration ------------------ +The only reasonable way is to enable the patch when the livepatch kernel +module is being loaded. For this, klp_enable_patch() has to be called +in the module_init() callback. There are two main reasons: -Each patch first has to be registered using klp_register_patch(). This makes -the patch known to the livepatch framework. Also it does some preliminary -computing and checks. +First, only the module has an easy access to the related struct klp_patch. -In particular, the patch is added into the list of known patches. The -addresses of the patched functions are found according to their names. -The special relocations, mentioned in the section "New functions", are -applied. The relevant entries are created under -/sys/kernel/livepatch/. The patch is rejected when any operation -fails. +Second, the error code might be used to refuse loading the module when +the patch cannot get enabled. 5.2. Enabling ------------- -Registered patches might be enabled either by calling klp_enable_patch() or -by writing '1' to /sys/kernel/livepatch//enabled. The system will -start using the new implementation of the patched functions at this stage. +The livepatch gets enabled by calling klp_enable_patch() from +the module_init() callback. The system will start using the new +implementation of the patched functions at this stage. -When a patch is enabled, livepatch enters into a transition state where -tasks are converging to the patched state. This is indicated by a value -of '1' in /sys/kernel/livepatch//transition. Once all tasks have -been patched, the 'transition' value changes to '0'. For more -information about this process, see the "Consistency model" section. +First, the addresses of the patched functions are found according to their +names. The special relocations, mentioned in the section "New functions", +are applied. The relevant entries are created under +/sys/kernel/livepatch/. The patch is rejected when any above +operation fails. -If an original function is patched for the first time, a function -specific struct klp_ops is created and an universal ftrace handler is -registered. +Second, livepatch enters into a transition state where tasks are converging +to the patched state. If an original function is patched for the first +time, a function specific struct klp_ops is created and an universal +ftrace handler is registered[*]. This stage is indicated by a value of '1' +in /sys/kernel/livepatch//transition. For more information about +this process, see the "Consistency model" section. -Functions might be patched multiple times. The ftrace handler is registered -only once for the given function. Further patches just add an entry to the -list (see field `func_stack`) of the struct klp_ops. The last added -entry is chosen by the ftrace handler and becomes the active function -replacement. +Finally, once all tasks have been patched, the 'transition' value changes +to '0'. -Note that the patches might be enabled in a different order than they were -registered. +[*] Note that functions might be patched multiple times. The ftrace handler + is registered only once for a given function. Further patches just add + an entry to the list (see field `func_stack`) of the struct klp_ops. + The right implementation is selected by the ftrace handler, see + the "Consistency model" section. 5.3. Disabling -------------- -Enabled patches might get disabled either by calling klp_disable_patch() or -by writing '0' to /sys/kernel/livepatch//enabled. At this stage -either the code from the previously enabled patch or even the original -code gets used. +Enabled patches might get disabled by writing '0' to +/sys/kernel/livepatch//enabled. -When a patch is disabled, livepatch enters into a transition state where -tasks are converging to the unpatched state. This is indicated by a -value of '1' in /sys/kernel/livepatch//transition. Once all tasks -have been unpatched, the 'transition' value changes to '0'. For more -information about this process, see the "Consistency model" section. +First, livepatch enters into a transition state where tasks are converging +to the unpatched state. The system starts using either the code from +the previously enabled patch or even the original one. This stage is +indicated by a value of '1' in /sys/kernel/livepatch//transition. +For more information about this process, see the "Consistency model" +section. -Here all the functions (struct klp_func) associated with the to-be-disabled +Second, once all tasks have been unpatched, the 'transition' value changes +to '0'. All the functions (struct klp_func) associated with the to-be-disabled patch are removed from the corresponding struct klp_ops. The ftrace handler is unregistered and the struct klp_ops is freed when the func_stack list becomes empty. -Patches must be disabled in exactly the reverse order in which they were -enabled. It makes the problem and the implementation much easier. +Third, the sysfs interface is destroyed. +Note that patches must be disabled in exactly the reverse order in which +they were enabled. It makes the problem and the implementation much easier. -5.4. Unregistration -------------------- -Disabled patches might be unregistered by calling klp_unregister_patch(). -This can be done only when the patch is disabled and the code is no longer -used. It must be called before the livepatch module gets unloaded. +5.4. Removing +------------- -At this stage, all the relevant sys-fs entries are removed and the patch -is removed from the list of known patches. +Module removal is only safe when there are no users of functions provided +by the module. This is the reason why the force feature permanently +disables the removal. Only when the system is successfully transitioned +to a new patch state (patched/unpatched) without being forced it is +guaranteed that no task sleeps or runs in the old code. 6. Sysfs diff --git a/include/linux/livepatch.h b/include/linux/livepatch.h index 6a9165d9b090..8f9c19c69744 100644 --- a/include/linux/livepatch.h +++ b/include/linux/livepatch.h @@ -139,11 +139,12 @@ struct klp_object { * struct klp_patch - patch structure for live patching * @mod: reference to the live patch module * @objs: object entries for kernel objects to be patched - * @list: list node for global list of registered patches + * @list: list node for global list of actively used patches * @kobj: kobject for sysfs resources * @kobj_added: @kobj has been added and needs freeing * @enabled: the patch is enabled (but operation may be incomplete) * @forced: was involved in a forced transition + * @free_work: patch cleanup from workqueue-context * @finish: for waiting till it is safe to remove the patch module */ struct klp_patch { @@ -157,6 +158,7 @@ struct klp_patch { bool kobj_added; bool enabled; bool forced; + struct work_struct free_work; struct completion finish; }; @@ -168,10 +170,7 @@ struct klp_patch { func->old_name || func->new_func || func->old_sympos; \ func++) -int klp_register_patch(struct klp_patch *); -int klp_unregister_patch(struct klp_patch *); int klp_enable_patch(struct klp_patch *); -int klp_disable_patch(struct klp_patch *); void arch_klp_init_object_loaded(struct klp_patch *patch, struct klp_object *obj); diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c index e77c5017ae0c..bd41b03a72d5 100644 --- a/kernel/livepatch/core.c +++ b/kernel/livepatch/core.c @@ -45,7 +45,11 @@ */ DEFINE_MUTEX(klp_mutex); -/* Registered patches */ +/* + * Actively used patches: enabled or in transition. Note that replaced + * or disabled patches are not listed even though the related kernel + * module still can be loaded. + */ LIST_HEAD(klp_patches); static struct kobject *klp_root_kobj; @@ -83,17 +87,6 @@ static void klp_find_object_module(struct klp_object *obj) mutex_unlock(&module_mutex); } -static bool klp_is_patch_registered(struct klp_patch *patch) -{ - struct klp_patch *mypatch; - - list_for_each_entry(mypatch, &klp_patches, list) - if (mypatch == patch) - return true; - - return false; -} - static bool klp_initialized(void) { return !!klp_root_kobj; @@ -292,7 +285,6 @@ static int klp_write_object_relocations(struct module *pmod, * /sys/kernel/livepatch/// */ static int __klp_disable_patch(struct klp_patch *patch); -static int __klp_enable_patch(struct klp_patch *patch); static ssize_t enabled_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) @@ -309,40 +301,32 @@ static ssize_t enabled_store(struct kobject *kobj, struct kobj_attribute *attr, mutex_lock(&klp_mutex); - if (!klp_is_patch_registered(patch)) { - /* - * Module with the patch could either disappear meanwhile or is - * not properly initialized yet. - */ - ret = -EINVAL; - goto err; - } - if (patch->enabled == enabled) { /* already in requested state */ ret = -EINVAL; - goto err; + goto out; } - if (patch == klp_transition_patch) { + /* + * Allow to reverse a pending transition in both ways. It might be + * necessary to complete the transition without forcing and breaking + * the system integrity. + * + * Do not allow to re-enable a disabled patch. + */ + if (patch == klp_transition_patch) klp_reverse_transition(); - } else if (enabled) { - ret = __klp_enable_patch(patch); - if (ret) - goto err; - } else { + else if (!enabled) ret = __klp_disable_patch(patch); - if (ret) - goto err; - } + else + ret = -EINVAL; +out: mutex_unlock(&klp_mutex); + if (ret) + return ret; return count; - -err: - mutex_unlock(&klp_mutex); - return ret; } static ssize_t enabled_show(struct kobject *kobj, @@ -508,7 +492,7 @@ static void klp_free_objects(struct klp_patch *patch) * The operation must be completed by calling klp_free_patch_finish() * outside klp_mutex. */ -static void klp_free_patch_start(struct klp_patch *patch) +void klp_free_patch_start(struct klp_patch *patch) { if (!list_empty(&patch->list)) list_del(&patch->list); @@ -536,6 +520,23 @@ static void klp_free_patch_finish(struct klp_patch *patch) kobject_put(&patch->kobj); wait_for_completion(&patch->finish); } + + /* Put the module after the last access to struct klp_patch. */ + if (!patch->forced) + module_put(patch->mod); +} + +/* + * The livepatch might be freed from sysfs interface created by the patch. + * This work allows to wait until the interface is destroyed in a separate + * context. + */ +static void klp_free_patch_work_fn(struct work_struct *work) +{ + struct klp_patch *patch = + container_of(work, struct klp_patch, free_work); + + klp_free_patch_finish(patch); } static int klp_init_func(struct klp_object *obj, struct klp_func *func) @@ -661,6 +662,7 @@ static int klp_init_patch_early(struct klp_patch *patch) patch->kobj_added = false; patch->enabled = false; patch->forced = false; + INIT_WORK(&patch->free_work, klp_free_patch_work_fn); init_completion(&patch->finish); klp_for_each_object(patch, obj) { @@ -673,6 +675,9 @@ static int klp_init_patch_early(struct klp_patch *patch) func->kobj_added = false; } + if (!try_module_get(patch->mod)) + return -ENODEV; + return 0; } @@ -681,115 +686,22 @@ static int klp_init_patch(struct klp_patch *patch) struct klp_object *obj; int ret; - mutex_lock(&klp_mutex); - - ret = klp_init_patch_early(patch); - if (ret) { - mutex_unlock(&klp_mutex); - return ret; - } - ret = kobject_init_and_add(&patch->kobj, &klp_ktype_patch, klp_root_kobj, "%s", patch->mod->name); - if (ret) { - mutex_unlock(&klp_mutex); + if (ret) return ret; - } patch->kobj_added = true; klp_for_each_object(patch, obj) { ret = klp_init_object(patch, obj); if (ret) - goto free; + return ret; } list_add_tail(&patch->list, &klp_patches); - mutex_unlock(&klp_mutex); - - return 0; - -free: - klp_free_patch_start(patch); - - mutex_unlock(&klp_mutex); - - klp_free_patch_finish(patch); - - return ret; -} - -/** - * klp_unregister_patch() - unregisters a patch - * @patch: Disabled patch to be unregistered - * - * Frees the data structures and removes the sysfs interface. - * - * Return: 0 on success, otherwise error - */ -int klp_unregister_patch(struct klp_patch *patch) -{ - int ret; - - mutex_lock(&klp_mutex); - - if (!klp_is_patch_registered(patch)) { - ret = -EINVAL; - goto err; - } - - if (patch->enabled) { - ret = -EBUSY; - goto err; - } - - klp_free_patch_start(patch); - - mutex_unlock(&klp_mutex); - - klp_free_patch_finish(patch); - return 0; -err: - mutex_unlock(&klp_mutex); - return ret; -} -EXPORT_SYMBOL_GPL(klp_unregister_patch); - -/** - * klp_register_patch() - registers a patch - * @patch: Patch to be registered - * - * Initializes the data structure associated with the patch and - * creates the sysfs interface. - * - * There is no need to take the reference on the patch module here. It is done - * later when the patch is enabled. - * - * Return: 0 on success, otherwise error - */ -int klp_register_patch(struct klp_patch *patch) -{ - if (!patch || !patch->mod) - return -EINVAL; - - if (!is_livepatch_module(patch->mod)) { - pr_err("module %s is not marked as a livepatch module\n", - patch->mod->name); - return -EINVAL; - } - - if (!klp_initialized()) - return -ENODEV; - - if (!klp_have_reliable_stack()) { - pr_err("This architecture doesn't have support for the livepatch consistency model.\n"); - return -ENOSYS; - } - - return klp_init_patch(patch); } -EXPORT_SYMBOL_GPL(klp_register_patch); static int __klp_disable_patch(struct klp_patch *patch) { @@ -802,8 +714,7 @@ static int __klp_disable_patch(struct klp_patch *patch) return -EBUSY; /* enforce stacking: only the last enabled patch can be disabled */ - if (!list_is_last(&patch->list, &klp_patches) && - list_next_entry(patch, list)->enabled) + if (!list_is_last(&patch->list, &klp_patches)) return -EBUSY; klp_init_transition(patch, KLP_UNPATCHED); @@ -822,44 +733,12 @@ static int __klp_disable_patch(struct klp_patch *patch) smp_wmb(); klp_start_transition(); - klp_try_complete_transition(); patch->enabled = false; + klp_try_complete_transition(); return 0; } -/** - * klp_disable_patch() - disables a registered patch - * @patch: The registered, enabled patch to be disabled - * - * Unregisters the patched functions from ftrace. - * - * Return: 0 on success, otherwise error - */ -int klp_disable_patch(struct klp_patch *patch) -{ - int ret; - - mutex_lock(&klp_mutex); - - if (!klp_is_patch_registered(patch)) { - ret = -EINVAL; - goto err; - } - - if (!patch->enabled) { - ret = -EINVAL; - goto err; - } - - ret = __klp_disable_patch(patch); - -err: - mutex_unlock(&klp_mutex); - return ret; -} -EXPORT_SYMBOL_GPL(klp_disable_patch); - static int __klp_enable_patch(struct klp_patch *patch) { struct klp_object *obj; @@ -871,17 +750,8 @@ static int __klp_enable_patch(struct klp_patch *patch) if (WARN_ON(patch->enabled)) return -EINVAL; - /* enforce stacking: only the first disabled patch can be enabled */ - if (patch->list.prev != &klp_patches && - !list_prev_entry(patch, list)->enabled) - return -EBUSY; - - /* - * A reference is taken on the patch module to prevent it from being - * unloaded. - */ - if (!try_module_get(patch->mod)) - return -ENODEV; + if (!patch->kobj_added) + return -EINVAL; pr_notice("enabling patch '%s'\n", patch->mod->name); @@ -916,8 +786,8 @@ static int __klp_enable_patch(struct klp_patch *patch) } klp_start_transition(); - klp_try_complete_transition(); patch->enabled = true; + klp_try_complete_transition(); return 0; err: @@ -928,11 +798,15 @@ err: } /** - * klp_enable_patch() - enables a registered patch - * @patch: The registered, disabled patch to be enabled + * klp_enable_patch() - enable the livepatch + * @patch: patch to be enabled * - * Performs the needed symbol lookups and code relocations, - * then registers the patched functions with ftrace. + * Initializes the data structure associated with the patch, creates the sysfs + * interface, performs the needed symbol lookups and code relocations, + * registers the patched functions with ftrace. + * + * This function is supposed to be called from the livepatch module_init() + * callback. * * Return: 0 on success, otherwise error */ @@ -940,17 +814,51 @@ int klp_enable_patch(struct klp_patch *patch) { int ret; + if (!patch || !patch->mod) + return -EINVAL; + + if (!is_livepatch_module(patch->mod)) { + pr_err("module %s is not marked as a livepatch module\n", + patch->mod->name); + return -EINVAL; + } + + if (!klp_initialized()) + return -ENODEV; + + if (!klp_have_reliable_stack()) { + pr_err("This architecture doesn't have support for the livepatch consistency model.\n"); + return -ENOSYS; + } + + mutex_lock(&klp_mutex); - if (!klp_is_patch_registered(patch)) { - ret = -EINVAL; - goto err; + ret = klp_init_patch_early(patch); + if (ret) { + mutex_unlock(&klp_mutex); + return ret; } + ret = klp_init_patch(patch); + if (ret) + goto err; + ret = __klp_enable_patch(patch); + if (ret) + goto err; + + mutex_unlock(&klp_mutex); + + return 0; err: + klp_free_patch_start(patch); + mutex_unlock(&klp_mutex); + + klp_free_patch_finish(patch); + return ret; } EXPORT_SYMBOL_GPL(klp_enable_patch); diff --git a/kernel/livepatch/core.h b/kernel/livepatch/core.h index d0cb5390e247..d4eefc520c08 100644 --- a/kernel/livepatch/core.h +++ b/kernel/livepatch/core.h @@ -7,6 +7,8 @@ extern struct mutex klp_mutex; extern struct list_head klp_patches; +void klp_free_patch_start(struct klp_patch *patch); + static inline bool klp_is_object_loaded(struct klp_object *obj) { return !obj->name || obj->mod; diff --git a/kernel/livepatch/transition.c b/kernel/livepatch/transition.c index a4c921364003..c9917a24b3a4 100644 --- a/kernel/livepatch/transition.c +++ b/kernel/livepatch/transition.c @@ -134,13 +134,6 @@ static void klp_complete_transition(void) pr_notice("'%s': %s complete\n", klp_transition_patch->mod->name, klp_target_state == KLP_PATCHED ? "patching" : "unpatching"); - /* - * patch->forced set implies unbounded increase of module's ref count if - * the module is disabled/enabled in a loop. - */ - if (!klp_transition_patch->forced && klp_target_state == KLP_UNPATCHED) - module_put(klp_transition_patch->mod); - klp_target_state = KLP_UNDEFINED; klp_transition_patch = NULL; } @@ -357,6 +350,7 @@ void klp_try_complete_transition(void) { unsigned int cpu; struct task_struct *g, *task; + struct klp_patch *patch; bool complete = true; WARN_ON_ONCE(klp_target_state == KLP_UNDEFINED); @@ -405,7 +399,18 @@ void klp_try_complete_transition(void) } /* we're done, now cleanup the data structures */ + patch = klp_transition_patch; klp_complete_transition(); + + /* + * It would make more sense to free the patch in + * klp_complete_transition() but it is called also + * from klp_cancel_transition(). + */ + if (!patch->enabled) { + klp_free_patch_start(patch); + schedule_work(&patch->free_work); + } } /* diff --git a/samples/livepatch/livepatch-callbacks-demo.c b/samples/livepatch/livepatch-callbacks-demo.c index 72f9e6d1387b..62d97953ad02 100644 --- a/samples/livepatch/livepatch-callbacks-demo.c +++ b/samples/livepatch/livepatch-callbacks-demo.c @@ -195,22 +195,11 @@ static struct klp_patch patch = { static int livepatch_callbacks_demo_init(void) { - int ret; - - ret = klp_register_patch(&patch); - if (ret) - return ret; - ret = klp_enable_patch(&patch); - if (ret) { - WARN_ON(klp_unregister_patch(&patch)); - return ret; - } - return 0; + return klp_enable_patch(&patch); } static void livepatch_callbacks_demo_exit(void) { - WARN_ON(klp_unregister_patch(&patch)); } module_init(livepatch_callbacks_demo_init); diff --git a/samples/livepatch/livepatch-sample.c b/samples/livepatch/livepatch-sample.c index 2d554dd930e2..01c9cf003ca2 100644 --- a/samples/livepatch/livepatch-sample.c +++ b/samples/livepatch/livepatch-sample.c @@ -69,22 +69,11 @@ static struct klp_patch patch = { static int livepatch_init(void) { - int ret; - - ret = klp_register_patch(&patch); - if (ret) - return ret; - ret = klp_enable_patch(&patch); - if (ret) { - WARN_ON(klp_unregister_patch(&patch)); - return ret; - } - return 0; + return klp_enable_patch(&patch); } static void livepatch_exit(void) { - WARN_ON(klp_unregister_patch(&patch)); } module_init(livepatch_init); diff --git a/samples/livepatch/livepatch-shadow-fix1.c b/samples/livepatch/livepatch-shadow-fix1.c index e8f1bd6b29b1..a5a5cac21d4d 100644 --- a/samples/livepatch/livepatch-shadow-fix1.c +++ b/samples/livepatch/livepatch-shadow-fix1.c @@ -157,25 +157,13 @@ static struct klp_patch patch = { static int livepatch_shadow_fix1_init(void) { - int ret; - - ret = klp_register_patch(&patch); - if (ret) - return ret; - ret = klp_enable_patch(&patch); - if (ret) { - WARN_ON(klp_unregister_patch(&patch)); - return ret; - } - return 0; + return klp_enable_patch(&patch); } static void livepatch_shadow_fix1_exit(void) { /* Cleanup any existing SV_LEAK shadow variables */ klp_shadow_free_all(SV_LEAK, livepatch_fix1_dummy_leak_dtor); - - WARN_ON(klp_unregister_patch(&patch)); } module_init(livepatch_shadow_fix1_init); diff --git a/samples/livepatch/livepatch-shadow-fix2.c b/samples/livepatch/livepatch-shadow-fix2.c index b34c7bf83356..52de947b5526 100644 --- a/samples/livepatch/livepatch-shadow-fix2.c +++ b/samples/livepatch/livepatch-shadow-fix2.c @@ -129,25 +129,13 @@ static struct klp_patch patch = { static int livepatch_shadow_fix2_init(void) { - int ret; - - ret = klp_register_patch(&patch); - if (ret) - return ret; - ret = klp_enable_patch(&patch); - if (ret) { - WARN_ON(klp_unregister_patch(&patch)); - return ret; - } - return 0; + return klp_enable_patch(&patch); } static void livepatch_shadow_fix2_exit(void) { /* Cleanup any existing SV_COUNTER shadow variables */ klp_shadow_free_all(SV_COUNTER, NULL); - - WARN_ON(klp_unregister_patch(&patch)); } module_init(livepatch_shadow_fix2_init); -- cgit v1.2.3 From 20e55025958e18e671d92c7adea00c301ac93c43 Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Wed, 9 Jan 2019 13:43:24 +0100 Subject: livepatch: Use lists to manage patches, objects and functions Currently klp_patch contains a pointer to a statically allocated array of struct klp_object and struct klp_objects contains a pointer to a statically allocated array of klp_func. In order to allow for the dynamic allocation of objects and functions, link klp_patch, klp_object, and klp_func together via linked lists. This allows us to more easily allocate new objects and functions, while having the iterator be a simple linked list walk. The static structures are added to the lists early. It allows to add the dynamically allocated objects before klp_init_object() and klp_init_func() calls. Therefore it reduces the further changes to the code. This patch does not change the existing behavior. Signed-off-by: Jason Baron [pmladek@suse.com: Initialize lists before init calls] Signed-off-by: Petr Mladek Acked-by: Miroslav Benes Acked-by: Joe Lawrence Cc: Josh Poimboeuf Cc: Jiri Kosina Acked-by: Josh Poimboeuf Signed-off-by: Jiri Kosina --- include/linux/livepatch.h | 19 +++++++++++++++++-- kernel/livepatch/core.c | 9 +++++++-- 2 files changed, 24 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/livepatch.h b/include/linux/livepatch.h index 8f9c19c69744..e117e20ff771 100644 --- a/include/linux/livepatch.h +++ b/include/linux/livepatch.h @@ -24,6 +24,7 @@ #include #include #include +#include #if IS_ENABLED(CONFIG_LIVEPATCH) @@ -42,6 +43,7 @@ * can be found (optional) * @old_func: pointer to the function being patched * @kobj: kobject for sysfs resources + * @node: list node for klp_object func_list * @stack_node: list node for klp_ops func_stack list * @old_size: size of the old function * @new_size: size of the new function @@ -80,6 +82,7 @@ struct klp_func { /* internal */ void *old_func; struct kobject kobj; + struct list_head node; struct list_head stack_node; unsigned long old_size, new_size; bool kobj_added; @@ -117,6 +120,8 @@ struct klp_callbacks { * @funcs: function entries for functions to be patched in the object * @callbacks: functions to be executed pre/post (un)patching * @kobj: kobject for sysfs resources + * @func_list: dynamic list of the function entries + * @node: list node for klp_patch obj_list * @mod: kernel module associated with the patched object * (NULL for vmlinux) * @kobj_added: @kobj has been added and needs freeing @@ -130,6 +135,8 @@ struct klp_object { /* internal */ struct kobject kobj; + struct list_head func_list; + struct list_head node; struct module *mod; bool kobj_added; bool patched; @@ -141,6 +148,7 @@ struct klp_object { * @objs: object entries for kernel objects to be patched * @list: list node for global list of actively used patches * @kobj: kobject for sysfs resources + * @obj_list: dynamic list of the object entries * @kobj_added: @kobj has been added and needs freeing * @enabled: the patch is enabled (but operation may be incomplete) * @forced: was involved in a forced transition @@ -155,6 +163,7 @@ struct klp_patch { /* internal */ struct list_head list; struct kobject kobj; + struct list_head obj_list; bool kobj_added; bool enabled; bool forced; @@ -162,14 +171,20 @@ struct klp_patch { struct completion finish; }; -#define klp_for_each_object(patch, obj) \ +#define klp_for_each_object_static(patch, obj) \ for (obj = patch->objs; obj->funcs || obj->name; obj++) -#define klp_for_each_func(obj, func) \ +#define klp_for_each_object(patch, obj) \ + list_for_each_entry(obj, &patch->obj_list, node) + +#define klp_for_each_func_static(obj, func) \ for (func = obj->funcs; \ func->old_name || func->new_func || func->old_sympos; \ func++) +#define klp_for_each_func(obj, func) \ + list_for_each_entry(func, &obj->func_list, node) + int klp_enable_patch(struct klp_patch *); void arch_klp_init_object_loaded(struct klp_patch *patch, diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c index bd41b03a72d5..37d0d3645fa6 100644 --- a/kernel/livepatch/core.c +++ b/kernel/livepatch/core.c @@ -659,20 +659,25 @@ static int klp_init_patch_early(struct klp_patch *patch) return -EINVAL; INIT_LIST_HEAD(&patch->list); + INIT_LIST_HEAD(&patch->obj_list); patch->kobj_added = false; patch->enabled = false; patch->forced = false; INIT_WORK(&patch->free_work, klp_free_patch_work_fn); init_completion(&patch->finish); - klp_for_each_object(patch, obj) { + klp_for_each_object_static(patch, obj) { if (!obj->funcs) return -EINVAL; + INIT_LIST_HEAD(&obj->func_list); obj->kobj_added = false; + list_add_tail(&obj->node, &patch->obj_list); - klp_for_each_func(obj, func) + klp_for_each_func_static(obj, func) { func->kobj_added = false; + list_add_tail(&func->node, &obj->func_list); + } } if (!try_module_get(patch->mod)) -- cgit v1.2.3 From e1452b607c48c642caf57299f4da83aa002f8533 Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Wed, 9 Jan 2019 13:43:25 +0100 Subject: livepatch: Add atomic replace Sometimes we would like to revert a particular fix. Currently, this is not easy because we want to keep all other fixes active and we could revert only the last applied patch. One solution would be to apply new patch that implemented all the reverted functions like in the original code. It would work as expected but there will be unnecessary redirections. In addition, it would also require knowing which functions need to be reverted at build time. Another problem is when there are many patches that touch the same functions. There might be dependencies between patches that are not enforced on the kernel side. Also it might be pretty hard to actually prepare the patch and ensure compatibility with the other patches. Atomic replace && cumulative patches: A better solution would be to create cumulative patch and say that it replaces all older ones. This patch adds a new "replace" flag to struct klp_patch. When it is enabled, a set of 'nop' klp_func will be dynamically created for all functions that are already being patched but that will no longer be modified by the new patch. They are used as a new target during the patch transition. The idea is to handle Nops' structures like the static ones. When the dynamic structures are allocated, we initialize all values that are normally statically defined. The only exception is "new_func" in struct klp_func. It has to point to the original function and the address is known only when the object (module) is loaded. Note that we really need to set it. The address is used, for example, in klp_check_stack_func(). Nevertheless we still need to distinguish the dynamically allocated structures in some operations. For this, we add "nop" flag into struct klp_func and "dynamic" flag into struct klp_object. They need special handling in the following situations: + The structures are added into the lists of objects and functions immediately. In fact, the lists were created for this purpose. + The address of the original function is known only when the patched object (module) is loaded. Therefore it is copied later in klp_init_object_loaded(). + The ftrace handler must not set PC to func->new_func. It would cause infinite loop because the address points back to the beginning of the original function. + The various free() functions must free the structure itself. Note that other ways to detect the dynamic structures are not considered safe. For example, even the statically defined struct klp_object might include empty funcs array. It might be there just to run some callbacks. Also note that the safe iterator must be used in the free() functions. Otherwise already freed structures might get accessed. Special callbacks handling: The callbacks from the replaced patches are _not_ called by intention. It would be pretty hard to define a reasonable semantic and implement it. It might even be counter-productive. The new patch is cumulative. It is supposed to include most of the changes from older patches. In most cases, it will not want to call pre_unpatch() post_unpatch() callbacks from the replaced patches. It would disable/break things for no good reasons. Also it should be easier to handle various scenarios in a single script in the new patch than think about interactions caused by running many scripts from older patches. Not to say that the old scripts even would not expect to be called in this situation. Removing replaced patches: One nice effect of the cumulative patches is that the code from the older patches is no longer used. Therefore the replaced patches can be removed. It has several advantages: + Nops' structs will no longer be necessary and might be removed. This would save memory, restore performance (no ftrace handler), allow clear view on what is really patched. + Disabling the patch will cause using the original code everywhere. Therefore the livepatch callbacks could handle only one scenario. Note that the complication is already complex enough when the patch gets enabled. It is currently solved by calling callbacks only from the new cumulative patch. + The state is clean in both the sysfs interface and lsmod. The modules with the replaced livepatches might even get removed from the system. Some people actually expected this behavior from the beginning. After all a cumulative patch is supposed to "completely" replace an existing one. It is like when a new version of an application replaces an older one. This patch does the first step. It removes the replaced patches from the list of patches. It is safe. The consistency model ensures that they are no longer used. By other words, each process works only with the structures from klp_transition_patch. The removal is done by a special function. It combines actions done by __disable_patch() and klp_complete_transition(). But it is a fast track without all the transaction-related stuff. Signed-off-by: Jason Baron [pmladek@suse.com: Split, reuse existing code, simplified] Signed-off-by: Petr Mladek Cc: Josh Poimboeuf Cc: Jessica Yu Cc: Jiri Kosina Cc: Miroslav Benes Acked-by: Miroslav Benes Acked-by: Josh Poimboeuf Signed-off-by: Jiri Kosina --- Documentation/livepatch/livepatch.txt | 31 ++++- include/linux/livepatch.h | 12 ++ kernel/livepatch/core.c | 232 ++++++++++++++++++++++++++++++++-- kernel/livepatch/core.h | 1 + kernel/livepatch/patch.c | 8 ++ kernel/livepatch/transition.c | 3 + 6 files changed, 273 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/Documentation/livepatch/livepatch.txt b/Documentation/livepatch/livepatch.txt index 8f56490a4bb6..2a70f43166f6 100644 --- a/Documentation/livepatch/livepatch.txt +++ b/Documentation/livepatch/livepatch.txt @@ -15,8 +15,9 @@ Table of Contents: 5. Livepatch life-cycle 5.1. Loading 5.2. Enabling - 5.3. Disabling - 5.4. Removing + 5.3. Replacing + 5.4. Disabling + 5.5. Removing 6. Sysfs 7. Limitations @@ -300,8 +301,12 @@ into three levels: 5. Livepatch life-cycle ======================= -Livepatching can be described by four basic operations: -loading, enabling, disabling, removing. +Livepatching can be described by five basic operations: +loading, enabling, replacing, disabling, removing. + +Where the replacing and the disabling operations are mutually +exclusive. They have the same result for the given patch but +not for the system. 5.1. Loading @@ -347,7 +352,21 @@ to '0'. the "Consistency model" section. -5.3. Disabling +5.3. Replacing +-------------- + +All enabled patches might get replaced by a cumulative patch that +has the .replace flag set. + +Once the new patch is enabled and the 'transition' finishes then +all the functions (struct klp_func) associated with the replaced +patches are removed from the corresponding struct klp_ops. Also +the ftrace handler is unregistered and the struct klp_ops is +freed when the related function is not modified by the new patch +and func_stack list becomes empty. + + +5.4. Disabling -------------- Enabled patches might get disabled by writing '0' to @@ -372,7 +391,7 @@ Note that patches must be disabled in exactly the reverse order in which they were enabled. It makes the problem and the implementation much easier. -5.4. Removing +5.5. Removing ------------- Module removal is only safe when there are no users of functions provided diff --git a/include/linux/livepatch.h b/include/linux/livepatch.h index e117e20ff771..53551f470722 100644 --- a/include/linux/livepatch.h +++ b/include/linux/livepatch.h @@ -48,6 +48,7 @@ * @old_size: size of the old function * @new_size: size of the new function * @kobj_added: @kobj has been added and needs freeing + * @nop: temporary patch to use the original code again; dyn. allocated * @patched: the func has been added to the klp_ops list * @transition: the func is currently being applied or reverted * @@ -86,6 +87,7 @@ struct klp_func { struct list_head stack_node; unsigned long old_size, new_size; bool kobj_added; + bool nop; bool patched; bool transition; }; @@ -125,6 +127,7 @@ struct klp_callbacks { * @mod: kernel module associated with the patched object * (NULL for vmlinux) * @kobj_added: @kobj has been added and needs freeing + * @dynamic: temporary object for nop functions; dynamically allocated * @patched: the object's funcs have been added to the klp_ops list */ struct klp_object { @@ -139,6 +142,7 @@ struct klp_object { struct list_head node; struct module *mod; bool kobj_added; + bool dynamic; bool patched; }; @@ -146,6 +150,7 @@ struct klp_object { * struct klp_patch - patch structure for live patching * @mod: reference to the live patch module * @objs: object entries for kernel objects to be patched + * @replace: replace all actively used patches * @list: list node for global list of actively used patches * @kobj: kobject for sysfs resources * @obj_list: dynamic list of the object entries @@ -159,6 +164,7 @@ struct klp_patch { /* external */ struct module *mod; struct klp_object *objs; + bool replace; /* internal */ struct list_head list; @@ -174,6 +180,9 @@ struct klp_patch { #define klp_for_each_object_static(patch, obj) \ for (obj = patch->objs; obj->funcs || obj->name; obj++) +#define klp_for_each_object_safe(patch, obj, tmp_obj) \ + list_for_each_entry_safe(obj, tmp_obj, &patch->obj_list, node) + #define klp_for_each_object(patch, obj) \ list_for_each_entry(obj, &patch->obj_list, node) @@ -182,6 +191,9 @@ struct klp_patch { func->old_name || func->new_func || func->old_sympos; \ func++) +#define klp_for_each_func_safe(obj, func, tmp_func) \ + list_for_each_entry_safe(func, tmp_func, &obj->func_list, node) + #define klp_for_each_func(obj, func) \ list_for_each_entry(func, &obj->func_list, node) diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c index 37d0d3645fa6..ecb7660f1d8b 100644 --- a/kernel/livepatch/core.c +++ b/kernel/livepatch/core.c @@ -92,6 +92,40 @@ static bool klp_initialized(void) return !!klp_root_kobj; } +static struct klp_func *klp_find_func(struct klp_object *obj, + struct klp_func *old_func) +{ + struct klp_func *func; + + klp_for_each_func(obj, func) { + if ((strcmp(old_func->old_name, func->old_name) == 0) && + (old_func->old_sympos == func->old_sympos)) { + return func; + } + } + + return NULL; +} + +static struct klp_object *klp_find_object(struct klp_patch *patch, + struct klp_object *old_obj) +{ + struct klp_object *obj; + + klp_for_each_object(patch, obj) { + if (klp_is_module(old_obj)) { + if (klp_is_module(obj) && + strcmp(old_obj->name, obj->name) == 0) { + return obj; + } + } else if (!klp_is_module(obj)) { + return obj; + } + } + + return NULL; +} + struct klp_find_arg { const char *objname; const char *name; @@ -418,6 +452,121 @@ static struct attribute *klp_patch_attrs[] = { NULL }; +static void klp_free_object_dynamic(struct klp_object *obj) +{ + kfree(obj->name); + kfree(obj); +} + +static struct klp_object *klp_alloc_object_dynamic(const char *name) +{ + struct klp_object *obj; + + obj = kzalloc(sizeof(*obj), GFP_KERNEL); + if (!obj) + return NULL; + + if (name) { + obj->name = kstrdup(name, GFP_KERNEL); + if (!obj->name) { + kfree(obj); + return NULL; + } + } + + INIT_LIST_HEAD(&obj->func_list); + obj->dynamic = true; + + return obj; +} + +static void klp_free_func_nop(struct klp_func *func) +{ + kfree(func->old_name); + kfree(func); +} + +static struct klp_func *klp_alloc_func_nop(struct klp_func *old_func, + struct klp_object *obj) +{ + struct klp_func *func; + + func = kzalloc(sizeof(*func), GFP_KERNEL); + if (!func) + return NULL; + + if (old_func->old_name) { + func->old_name = kstrdup(old_func->old_name, GFP_KERNEL); + if (!func->old_name) { + kfree(func); + return NULL; + } + } + + /* + * func->new_func is same as func->old_func. These addresses are + * set when the object is loaded, see klp_init_object_loaded(). + */ + func->old_sympos = old_func->old_sympos; + func->nop = true; + + return func; +} + +static int klp_add_object_nops(struct klp_patch *patch, + struct klp_object *old_obj) +{ + struct klp_object *obj; + struct klp_func *func, *old_func; + + obj = klp_find_object(patch, old_obj); + + if (!obj) { + obj = klp_alloc_object_dynamic(old_obj->name); + if (!obj) + return -ENOMEM; + + list_add_tail(&obj->node, &patch->obj_list); + } + + klp_for_each_func(old_obj, old_func) { + func = klp_find_func(obj, old_func); + if (func) + continue; + + func = klp_alloc_func_nop(old_func, obj); + if (!func) + return -ENOMEM; + + list_add_tail(&func->node, &obj->func_list); + } + + return 0; +} + +/* + * Add 'nop' functions which simply return to the caller to run + * the original function. The 'nop' functions are added to a + * patch to facilitate a 'replace' mode. + */ +static int klp_add_nops(struct klp_patch *patch) +{ + struct klp_patch *old_patch; + struct klp_object *old_obj; + + list_for_each_entry(old_patch, &klp_patches, list) { + klp_for_each_object(old_patch, old_obj) { + int err; + + err = klp_add_object_nops(patch, old_obj); + if (err) + return err; + } + } + + return 0; +} + static void klp_kobj_release_patch(struct kobject *kobj) { struct klp_patch *patch; @@ -434,6 +583,12 @@ static struct kobj_type klp_ktype_patch = { static void klp_kobj_release_object(struct kobject *kobj) { + struct klp_object *obj; + + obj = container_of(kobj, struct klp_object, kobj); + + if (obj->dynamic) + klp_free_object_dynamic(obj); } static struct kobj_type klp_ktype_object = { @@ -443,6 +598,12 @@ static struct kobj_type klp_ktype_object = { static void klp_kobj_release_func(struct kobject *kobj) { + struct klp_func *func; + + func = container_of(kobj, struct klp_func, kobj); + + if (func->nop) + klp_free_func_nop(func); } static struct kobj_type klp_ktype_func = { @@ -452,12 +613,15 @@ static struct kobj_type klp_ktype_func = { static void klp_free_funcs(struct klp_object *obj) { - struct klp_func *func; + struct klp_func *func, *tmp_func; - klp_for_each_func(obj, func) { + klp_for_each_func_safe(obj, func, tmp_func) { /* Might be called from klp_init_patch() error path. */ - if (func->kobj_added) + if (func->kobj_added) { kobject_put(&func->kobj); + } else if (func->nop) { + klp_free_func_nop(func); + } } } @@ -468,20 +632,27 @@ static void klp_free_object_loaded(struct klp_object *obj) obj->mod = NULL; - klp_for_each_func(obj, func) + klp_for_each_func(obj, func) { func->old_func = NULL; + + if (func->nop) + func->new_func = NULL; + } } static void klp_free_objects(struct klp_patch *patch) { - struct klp_object *obj; + struct klp_object *obj, *tmp_obj; - klp_for_each_object(patch, obj) { + klp_for_each_object_safe(patch, obj, tmp_obj) { klp_free_funcs(obj); /* Might be called from klp_init_patch() error path. */ - if (obj->kobj_added) + if (obj->kobj_added) { kobject_put(&obj->kobj); + } else if (obj->dynamic) { + klp_free_object_dynamic(obj); + } } } @@ -543,7 +714,14 @@ static int klp_init_func(struct klp_object *obj, struct klp_func *func) { int ret; - if (!func->old_name || !func->new_func) + if (!func->old_name) + return -EINVAL; + + /* + * NOPs get the address later. The patched module must be loaded, + * see klp_init_object_loaded(). + */ + if (!func->new_func && !func->nop) return -EINVAL; if (strlen(func->old_name) >= KSYM_NAME_LEN) @@ -605,6 +783,9 @@ static int klp_init_object_loaded(struct klp_patch *patch, return -ENOENT; } + if (func->nop) + func->new_func = func->old_func; + ret = kallsyms_lookup_size_offset((unsigned long)func->new_func, &func->new_size, NULL); if (!ret) { @@ -697,6 +878,12 @@ static int klp_init_patch(struct klp_patch *patch) return ret; patch->kobj_added = true; + if (patch->replace) { + ret = klp_add_nops(patch); + if (ret) + return ret; + } + klp_for_each_object(patch, obj) { ret = klp_init_object(patch, obj); if (ret) @@ -868,6 +1055,35 @@ err: } EXPORT_SYMBOL_GPL(klp_enable_patch); +/* + * This function removes replaced patches. + * + * We could be pretty aggressive here. It is called in the situation where + * these structures are no longer accessible. All functions are redirected + * by the klp_transition_patch. They use either a new code or they are in + * the original code because of the special nop function patches. + * + * The only exception is when the transition was forced. In this case, + * klp_ftrace_handler() might still see the replaced patch on the stack. + * Fortunately, it is carefully designed to work with removed functions + * thanks to RCU. We only have to keep the patches on the system. Also + * this is handled transparently by patch->module_put. + */ +void klp_discard_replaced_patches(struct klp_patch *new_patch) +{ + struct klp_patch *old_patch, *tmp_patch; + + list_for_each_entry_safe(old_patch, tmp_patch, &klp_patches, list) { + if (old_patch == new_patch) + return; + + old_patch->enabled = false; + klp_unpatch_objects(old_patch); + klp_free_patch_start(old_patch); + schedule_work(&old_patch->free_work); + } +} + /* * Remove parts of patches that touch a given kernel module. The list of * patches processed might be limited. When limit is NULL, all patches diff --git a/kernel/livepatch/core.h b/kernel/livepatch/core.h index d4eefc520c08..f6a853adcc00 100644 --- a/kernel/livepatch/core.h +++ b/kernel/livepatch/core.h @@ -8,6 +8,7 @@ extern struct mutex klp_mutex; extern struct list_head klp_patches; void klp_free_patch_start(struct klp_patch *patch); +void klp_discard_replaced_patches(struct klp_patch *new_patch); static inline bool klp_is_object_loaded(struct klp_object *obj) { diff --git a/kernel/livepatch/patch.c b/kernel/livepatch/patch.c index 825022d70912..0ff466ab4b5a 100644 --- a/kernel/livepatch/patch.c +++ b/kernel/livepatch/patch.c @@ -118,7 +118,15 @@ static void notrace klp_ftrace_handler(unsigned long ip, } } + /* + * NOPs are used to replace existing patches with original code. + * Do nothing! Setting pc would cause an infinite loop. + */ + if (func->nop) + goto unlock; + klp_arch_set_pc(regs, (unsigned long)func->new_func); + unlock: preempt_enable_notrace(); } diff --git a/kernel/livepatch/transition.c b/kernel/livepatch/transition.c index c9917a24b3a4..f4c5908a9731 100644 --- a/kernel/livepatch/transition.c +++ b/kernel/livepatch/transition.c @@ -85,6 +85,9 @@ static void klp_complete_transition(void) klp_transition_patch->mod->name, klp_target_state == KLP_PATCHED ? "patching" : "unpatching"); + if (klp_transition_patch->replace && klp_target_state == KLP_PATCHED) + klp_discard_replaced_patches(klp_transition_patch); + if (klp_target_state == KLP_UNPATCHED) { /* * All tasks have transitioned to KLP_UNPATCHED so we can now -- cgit v1.2.3 From afb77422819ff60612e9b7d36461b9b2bc8e038e Mon Sep 17 00:00:00 2001 From: Ioana Ciornei Date: Mon, 10 Dec 2018 16:50:19 +0000 Subject: bus: fsl-mc: automatically add a device_link on fsl_mc_[portal,object]_allocate Allocatable devices can be acquired by drivers on the fsl-mc bus using the fsl_mc_portal_allocate or fsl_mc_object_allocate functions. Add a device link between the consumer device and the supplier device so that proper resource management is achieved. Also, adding a link between these devices ensures that a proper unbind order is respected (ie before the supplier device is unbound from its respective driver all consumer devices will be notified and unbound first). Signed-off-by: Ioana Ciornei Reviewed-by: Laurentiu Tudor Signed-off-by: Li Yang --- drivers/bus/fsl-mc/fsl-mc-allocator.c | 11 +++++++++++ drivers/bus/fsl-mc/mc-io.c | 13 +++++++++++++ include/linux/fsl/mc.h | 1 + 3 files changed, 25 insertions(+) (limited to 'include/linux') diff --git a/drivers/bus/fsl-mc/fsl-mc-allocator.c b/drivers/bus/fsl-mc/fsl-mc-allocator.c index e906ecfe23dd..8ad77246f322 100644 --- a/drivers/bus/fsl-mc/fsl-mc-allocator.c +++ b/drivers/bus/fsl-mc/fsl-mc-allocator.c @@ -295,6 +295,14 @@ int __must_check fsl_mc_object_allocate(struct fsl_mc_device *mc_dev, if (!mc_adev) goto error; + mc_adev->consumer_link = device_link_add(&mc_dev->dev, + &mc_adev->dev, + DL_FLAG_AUTOREMOVE_CONSUMER); + if (!mc_adev->consumer_link) { + error = -EINVAL; + goto error; + } + *new_mc_adev = mc_adev; return 0; error: @@ -321,6 +329,9 @@ void fsl_mc_object_free(struct fsl_mc_device *mc_adev) return; fsl_mc_resource_free(resource); + + device_link_del(mc_adev->consumer_link); + mc_adev->consumer_link = NULL; } EXPORT_SYMBOL_GPL(fsl_mc_object_free); diff --git a/drivers/bus/fsl-mc/mc-io.c b/drivers/bus/fsl-mc/mc-io.c index 7226cfc49b6f..3ae574a58cce 100644 --- a/drivers/bus/fsl-mc/mc-io.c +++ b/drivers/bus/fsl-mc/mc-io.c @@ -209,9 +209,19 @@ int __must_check fsl_mc_portal_allocate(struct fsl_mc_device *mc_dev, if (error < 0) goto error_cleanup_resource; + dpmcp_dev->consumer_link = device_link_add(&mc_dev->dev, + &dpmcp_dev->dev, + DL_FLAG_AUTOREMOVE_CONSUMER); + if (!dpmcp_dev->consumer_link) { + error = -EINVAL; + goto error_cleanup_mc_io; + } + *new_mc_io = mc_io; return 0; +error_cleanup_mc_io: + fsl_destroy_mc_io(mc_io); error_cleanup_resource: fsl_mc_resource_free(resource); return error; @@ -244,6 +254,9 @@ void fsl_mc_portal_free(struct fsl_mc_io *mc_io) fsl_destroy_mc_io(mc_io); fsl_mc_resource_free(resource); + + device_link_del(dpmcp_dev->consumer_link); + dpmcp_dev->consumer_link = NULL; } EXPORT_SYMBOL_GPL(fsl_mc_portal_free); diff --git a/include/linux/fsl/mc.h b/include/linux/fsl/mc.h index 741f567253ef..975553a9f75d 100644 --- a/include/linux/fsl/mc.h +++ b/include/linux/fsl/mc.h @@ -193,6 +193,7 @@ struct fsl_mc_device { struct resource *regions; struct fsl_mc_device_irq **irqs; struct fsl_mc_resource *resource; + struct device_link *consumer_link; }; #define to_fsl_mc_device(_dev) \ -- cgit v1.2.3 From 98a455d91e7116ca417bc37da6aa2dd633206a6f Mon Sep 17 00:00:00 2001 From: Shunyong Yang Date: Tue, 18 Dec 2018 14:02:45 +0800 Subject: ACPI / tables: table override from built-in initrd In some scenario, we need to build initrd with kernel in a single image. This can simplify system deployment process by downloading the whole system once, such as in IC verification. This patch adds support to override ACPI tables from built-in initrd. Signed-off-by: Shunyong Yang [ rjw: Minor cleanups ] Signed-off-by: Rafael J. Wysocki --- Documentation/acpi/initrd_table_override.txt | 4 ++++ drivers/acpi/Kconfig | 10 ++++++++++ drivers/acpi/tables.c | 12 ++++++++++-- include/linux/initrd.h | 3 +++ 4 files changed, 27 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/Documentation/acpi/initrd_table_override.txt b/Documentation/acpi/initrd_table_override.txt index eb651a6aa285..30437a6db373 100644 --- a/Documentation/acpi/initrd_table_override.txt +++ b/Documentation/acpi/initrd_table_override.txt @@ -14,6 +14,10 @@ upgrade the ACPI execution environment that is defined by the ACPI tables via upgrading the ACPI tables provided by the BIOS with an instrumented, modified, more recent version one, or installing brand new ACPI tables. +When building initrd with kernel in a single image, option +ACPI_TABLE_OVERRIDE_VIA_BUILTIN_INITRD should also be true for this +feature to work. + For a full list of ACPI tables that can be upgraded/installed, take a look at the char *table_sigs[MAX_ACPI_SIGNATURE]; definition in drivers/acpi/tables.c. diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index 90ff0a47c12e..4e015c77e48e 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -357,6 +357,16 @@ config ACPI_TABLE_UPGRADE initrd, therefore it's safe to say Y. See Documentation/acpi/initrd_table_override.txt for details +config ACPI_TABLE_OVERRIDE_VIA_BUILTIN_INITRD + bool "Override ACPI tables from built-in initrd" + depends on ACPI_TABLE_UPGRADE + depends on INITRAMFS_SOURCE!="" && INITRAMFS_COMPRESSION="" + help + This option provides functionality to override arbitrary ACPI tables + from built-in uncompressed initrd. + + See Documentation/acpi/initrd_table_override.txt for details + config ACPI_DEBUG bool "Debug Statements" help diff --git a/drivers/acpi/tables.c b/drivers/acpi/tables.c index 48eabb6c2d4f..8fccbe49612a 100644 --- a/drivers/acpi/tables.c +++ b/drivers/acpi/tables.c @@ -473,14 +473,22 @@ static DECLARE_BITMAP(acpi_initrd_installed, NR_ACPI_INITRD_TABLES); void __init acpi_table_upgrade(void) { - void *data = (void *)initrd_start; - size_t size = initrd_end - initrd_start; + void *data; + size_t size; int sig, no, table_nr = 0, total_offset = 0; long offset = 0; struct acpi_table_header *table; char cpio_path[32] = "kernel/firmware/acpi/"; struct cpio_data file; + if (IS_ENABLED(CONFIG_ACPI_TABLE_OVERRIDE_VIA_BUILTIN_INITRD)) { + data = __initramfs_start; + size = __initramfs_size; + } else { + data = (void *)initrd_start; + size = initrd_end - initrd_start; + } + if (data == NULL || size == 0) return; diff --git a/include/linux/initrd.h b/include/linux/initrd.h index 14beaff9b445..d77fe34fb00a 100644 --- a/include/linux/initrd.h +++ b/include/linux/initrd.h @@ -25,3 +25,6 @@ extern phys_addr_t phys_initrd_start; extern unsigned long phys_initrd_size; extern unsigned int real_root_dev; + +extern char __initramfs_start[]; +extern unsigned long __initramfs_size; -- cgit v1.2.3 From 73f5a82bb3c9fce550da4a74a32b8cb064b50663 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 13 Jan 2019 15:57:04 +0200 Subject: RDMA/mad: Reduce MAD scope to mlx5_ib only Management Datagram Interface (MAD) is applicable only when physical port is Infiniband. It makes MAD command logic to be completely unrelated to eth/core parts of mlx5. Signed-off-by: Leon Romanovsky Acked-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/cmd.c | 37 ++++++++++++ drivers/infiniband/hw/mlx5/cmd.h | 2 + drivers/infiniband/hw/mlx5/mad.c | 11 ++-- drivers/infiniband/hw/mlx5/mlx5_ib.h | 3 - drivers/net/ethernet/mellanox/mlx5/core/Makefile | 2 +- drivers/net/ethernet/mellanox/mlx5/core/mad.c | 75 ------------------------ include/linux/mlx5/driver.h | 2 - 7 files changed, 47 insertions(+), 85 deletions(-) delete mode 100644 drivers/net/ethernet/mellanox/mlx5/core/mad.c (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c index 356bccc715ee..6bcc63aaa50b 100644 --- a/drivers/infiniband/hw/mlx5/cmd.c +++ b/drivers/infiniband/hw/mlx5/cmd.c @@ -345,3 +345,40 @@ int mlx5_cmd_alloc_q_counter(struct mlx5_core_dev *dev, u16 *counter_id, counter_set_id); return err; } + +int mlx5_cmd_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb, + u16 opmod, u8 port) +{ + int outlen = MLX5_ST_SZ_BYTES(mad_ifc_out); + int inlen = MLX5_ST_SZ_BYTES(mad_ifc_in); + int err = -ENOMEM; + void *data; + void *resp; + u32 *out; + u32 *in; + + in = kzalloc(inlen, GFP_KERNEL); + out = kzalloc(outlen, GFP_KERNEL); + if (!in || !out) + goto out; + + MLX5_SET(mad_ifc_in, in, opcode, MLX5_CMD_OP_MAD_IFC); + MLX5_SET(mad_ifc_in, in, op_mod, opmod); + MLX5_SET(mad_ifc_in, in, port, port); + + data = MLX5_ADDR_OF(mad_ifc_in, in, mad); + memcpy(data, inb, MLX5_FLD_SZ_BYTES(mad_ifc_in, mad)); + + err = mlx5_cmd_exec(dev, in, inlen, out, outlen); + if (err) + goto out; + + resp = MLX5_ADDR_OF(mad_ifc_out, out, response_mad_packet); + memcpy(outb, resp, + MLX5_FLD_SZ_BYTES(mad_ifc_out, response_mad_packet)); + +out: + kfree(out); + kfree(in); + return err; +} diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h index 1e76dc67a369..923a7b93f507 100644 --- a/drivers/infiniband/hw/mlx5/cmd.h +++ b/drivers/infiniband/hw/mlx5/cmd.h @@ -63,4 +63,6 @@ int mlx5_cmd_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn, u16 uid); int mlx5_cmd_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn, u16 uid); int mlx5_cmd_alloc_q_counter(struct mlx5_core_dev *dev, u16 *counter_id, u16 uid); +int mlx5_cmd_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb, + u16 opmod, u8 port); #endif /* MLX5_IB_CMD_H */ diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c index 558638468edb..6c529e6f3a01 100644 --- a/drivers/infiniband/hw/mlx5/mad.c +++ b/drivers/infiniband/hw/mlx5/mad.c @@ -36,6 +36,7 @@ #include #include #include "mlx5_ib.h" +#include "cmd.h" enum { MLX5_IB_VENDOR_CLASS1 = 0x9, @@ -51,9 +52,10 @@ static bool can_do_mad_ifc(struct mlx5_ib_dev *dev, u8 port_num, return dev->mdev->port_caps[port_num - 1].has_smi; } -int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey, - u8 port, const struct ib_wc *in_wc, const struct ib_grh *in_grh, - const void *in_mad, void *response_mad) +static int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, + int ignore_bkey, u8 port, const struct ib_wc *in_wc, + const struct ib_grh *in_grh, const void *in_mad, + void *response_mad) { u8 op_modifier = 0; @@ -68,7 +70,8 @@ int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey, if (ignore_bkey || !in_wc) op_modifier |= 0x2; - return mlx5_core_mad_ifc(dev->mdev, in_mad, response_mad, op_modifier, port); + return mlx5_cmd_mad_ifc(dev->mdev, in_mad, response_mad, op_modifier, + port); } static int process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index b06d3b1efea8..efe383c0ac86 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1038,9 +1038,6 @@ void mlx5_ib_db_unmap_user(struct mlx5_ib_ucontext *context, struct mlx5_db *db) void __mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq); void mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq); void mlx5_ib_free_srq_wqe(struct mlx5_ib_srq *srq, int wqe_index); -int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey, - u8 port, const struct ib_wc *in_wc, const struct ib_grh *in_grh, - const void *in_mad, void *response_mad); struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, u32 flags, struct ib_udata *udata); int mlx5_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 9de9abacf7f6..0257731e6d42 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -13,7 +13,7 @@ obj-$(CONFIG_MLX5_CORE) += mlx5_core.o # mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ health.o mcg.o cq.o alloc.o qp.o port.o mr.o pd.o \ - mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o \ + transobj.o vport.o sriov.o fs_cmd.o fs_core.o \ fs_counters.o rl.o lag.o dev.o events.o wq.o lib/gid.o \ lib/devcom.o diag/fs_tracepoint.o diag/fw_tracer.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mad.c b/drivers/net/ethernet/mellanox/mlx5/core/mad.c deleted file mode 100644 index 3a3b0005fd2b..000000000000 --- a/drivers/net/ethernet/mellanox/mlx5/core/mad.c +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include -#include -#include -#include -#include "mlx5_core.h" - -int mlx5_core_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb, - u16 opmod, u8 port) -{ - int outlen = MLX5_ST_SZ_BYTES(mad_ifc_out); - int inlen = MLX5_ST_SZ_BYTES(mad_ifc_in); - int err = -ENOMEM; - void *data; - void *resp; - u32 *out; - u32 *in; - - in = kzalloc(inlen, GFP_KERNEL); - out = kzalloc(outlen, GFP_KERNEL); - if (!in || !out) - goto out; - - MLX5_SET(mad_ifc_in, in, opcode, MLX5_CMD_OP_MAD_IFC); - MLX5_SET(mad_ifc_in, in, op_mod, opmod); - MLX5_SET(mad_ifc_in, in, port, port); - - data = MLX5_ADDR_OF(mad_ifc_in, in, mad); - memcpy(data, inb, MLX5_FLD_SZ_BYTES(mad_ifc_in, mad)); - - err = mlx5_cmd_exec(dev, in, inlen, out, outlen); - if (err) - goto out; - - resp = MLX5_ADDR_OF(mad_ifc_out, out, response_mad_packet); - memcpy(outb, resp, - MLX5_FLD_SZ_BYTES(mad_ifc_out, response_mad_packet)); - -out: - kfree(out); - kfree(in); - return err; -} -EXPORT_SYMBOL_GPL(mlx5_core_mad_ifc); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 54299251d40d..4e444863054a 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -897,8 +897,6 @@ int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey, u32 *out, int outlen); int mlx5_core_alloc_pd(struct mlx5_core_dev *dev, u32 *pdn); int mlx5_core_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn); -int mlx5_core_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb, - u16 opmod, u8 port); int mlx5_pagealloc_init(struct mlx5_core_dev *dev); void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev); void mlx5_pagealloc_start(struct mlx5_core_dev *dev); -- cgit v1.2.3 From 16118794ede91aac1a73abe15de22d3de9d2b775 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 11 Jan 2019 14:33:17 +0100 Subject: posix-cpu-timers: Remove private interval storage Posix CPU timers store the interval in private storage for historical reasons (it_interval used to be a non scalar representation on 32bit systems). This is gone and there is no reason for duplicated storage anymore. Use it_interval everywhere. Signed-off-by: Thomas Gleixner Cc: John Stultz Cc: Peter Zijlstra Cc: "H.J. Lu" Link: https://lkml.kernel.org/r/20190111133500.945255655@linutronix.de --- include/linux/posix-timers.h | 2 +- kernel/time/posix-cpu-timers.c | 13 ++++++------- 2 files changed, 7 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index e96581ca7c9d..b20798fc5191 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -12,7 +12,7 @@ struct siginfo; struct cpu_timer_list { struct list_head entry; - u64 expires, incr; + u64 expires; struct task_struct *task; int firing; }; diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index 80f955210861..0a426f4e3125 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -67,13 +67,13 @@ static void bump_cpu_timer(struct k_itimer *timer, u64 now) int i; u64 delta, incr; - if (timer->it.cpu.incr == 0) + if (!timer->it_interval) return; if (now < timer->it.cpu.expires) return; - incr = timer->it.cpu.incr; + incr = timer->it_interval; delta = now + incr - timer->it.cpu.expires; /* Don't use (incr*2 < delta), incr*2 might overflow. */ @@ -520,7 +520,7 @@ static void cpu_timer_fire(struct k_itimer *timer) */ wake_up_process(timer->it_process); timer->it.cpu.expires = 0; - } else if (timer->it.cpu.incr == 0) { + } else if (!timer->it_interval) { /* * One-shot timer. Clear it as soon as it's fired. */ @@ -606,7 +606,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, */ ret = 0; - old_incr = timer->it.cpu.incr; + old_incr = timer->it_interval; old_expires = timer->it.cpu.expires; if (unlikely(timer->it.cpu.firing)) { timer->it.cpu.firing = -1; @@ -684,8 +684,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, * Install the new reload setting, and * set up the signal and overrun bookkeeping. */ - timer->it.cpu.incr = timespec64_to_ns(&new->it_interval); - timer->it_interval = ns_to_ktime(timer->it.cpu.incr); + timer->it_interval = timespec64_to_ktime(new->it_interval); /* * This acts as a modification timestamp for the timer, @@ -724,7 +723,7 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec64 *itp /* * Easy part: convert the reload time. */ - itp->it_interval = ns_to_timespec64(timer->it.cpu.incr); + itp->it_interval = ktime_to_timespec64(timer->it_interval); if (!timer->it.cpu.expires) return; -- cgit v1.2.3 From 8a62ffe2753a845272f4f2100b5fca0b6053ff6f Mon Sep 17 00:00:00 2001 From: Vincent Guittot Date: Fri, 21 Dec 2018 11:33:54 +0100 Subject: PM-runtime: Add new interface to get accounted time Some drivers (like i915/drm) needs to get the accounted suspended time. pm_runtime_suspended_time() will return the suspended accounted time in ns unit. Reviewed-by: Ulf Hansson Signed-off-by: Vincent Guittot Signed-off-by: Rafael J. Wysocki --- drivers/base/power/runtime.c | 15 +++++++++++++++ include/linux/pm_runtime.h | 2 ++ 2 files changed, 17 insertions(+) (limited to 'include/linux') diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index 457be03b744d..a453090c9449 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -88,6 +88,21 @@ static void __update_runtime_status(struct device *dev, enum rpm_status status) dev->power.runtime_status = status; } +u64 pm_runtime_suspended_time(struct device *dev) +{ + unsigned long flags, time; + + spin_lock_irqsave(&dev->power.lock, flags); + + update_pm_runtime_accounting(dev); + time = dev->power.suspended_jiffies; + + spin_unlock_irqrestore(&dev->power.lock, flags); + + return jiffies_to_nsecs(time); +} +EXPORT_SYMBOL_GPL(pm_runtime_suspended_time); + /** * pm_runtime_deactivate_timer - Deactivate given device's suspend timer. * @dev: Device to handle. diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 54af4eef169f..a370006921c0 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -113,6 +113,8 @@ static inline bool pm_runtime_is_irq_safe(struct device *dev) return dev->power.irq_safe; } +extern u64 pm_runtime_suspended_time(struct device *dev); + #else /* !CONFIG_PM */ static inline bool queue_pm_work(struct work_struct *work) { return false; } -- cgit v1.2.3 From b33a02aadcc6330a61e511240b634dc11112e65e Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 9 Jan 2019 17:24:55 +0200 Subject: i2c: acpi: Move I2C bits from acpi.h to i2c.h As discussed previously the best location for certain bus related bits, e.g. I2C, is its own realm of the headers. In order to uncontaminate acpi.h move the I2C bits to i2c.h. There is no functional change intended. Link: https://lkml.org/lkml/2018/11/28/744 Signed-off-by: Andy Shevchenko Acked-by: Mika Westerberg Acked-by: Rafael J. Wysocki Signed-off-by: Wolfram Sang --- include/linux/acpi.h | 11 ----------- include/linux/i2c.h | 10 ++++++++++ 2 files changed, 10 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 87715f20b69a..13f5cb2c4763 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -1061,17 +1061,6 @@ static inline int acpi_dev_gpio_irq_get(struct acpi_device *adev, int index) } #endif -#if defined(CONFIG_ACPI) && IS_ENABLED(CONFIG_I2C) -bool i2c_acpi_get_i2c_resource(struct acpi_resource *ares, - struct acpi_resource_i2c_serialbus **i2c); -#else -static inline bool i2c_acpi_get_i2c_resource(struct acpi_resource *ares, - struct acpi_resource_i2c_serialbus **i2c) -{ - return false; -} -#endif - /* Device properties */ #ifdef CONFIG_ACPI diff --git a/include/linux/i2c.h b/include/linux/i2c.h index cba59d66c00d..1f45331924d6 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -967,11 +967,21 @@ static inline int of_i2c_get_board_info(struct device *dev, #endif /* CONFIG_OF */ +struct acpi_resource; +struct acpi_resource_i2c_serialbus; + #if IS_ENABLED(CONFIG_ACPI) +bool i2c_acpi_get_i2c_resource(struct acpi_resource *ares, + struct acpi_resource_i2c_serialbus **i2c); u32 i2c_acpi_find_bus_speed(struct device *dev); struct i2c_client *i2c_acpi_new_device(struct device *dev, int index, struct i2c_board_info *info); #else +static inline bool i2c_acpi_get_i2c_resource(struct acpi_resource *ares, + struct acpi_resource_i2c_serialbus **i2c) +{ + return false; +} static inline u32 i2c_acpi_find_bus_speed(struct device *dev) { return 0; -- cgit v1.2.3 From 063755ab1d1c1127adc09703185967862584935b Mon Sep 17 00:00:00 2001 From: Philippe Schenker Date: Fri, 21 Dec 2018 14:46:31 +0100 Subject: mfd: stmpe: Move ADC related defines to MFD header Move defines that are ADC related to the header of the overlying MFD, so they can be used from multiple sub-devices. Signed-off-by: Philippe Schenker Acked-by: Dmitry Torokhov Signed-off-by: Lee Jones --- drivers/input/touchscreen/stmpe-ts.c | 34 +++++++++++++--------------------- include/linux/mfd/stmpe.h | 11 +++++++++++ 2 files changed, 24 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/drivers/input/touchscreen/stmpe-ts.c b/drivers/input/touchscreen/stmpe-ts.c index 2a78e27b4495..c5d9006588a2 100644 --- a/drivers/input/touchscreen/stmpe-ts.c +++ b/drivers/input/touchscreen/stmpe-ts.c @@ -49,17 +49,6 @@ #define STMPE_IRQ_TOUCH_DET 0 -#define SAMPLE_TIME(x) ((x & 0xf) << 4) -#define MOD_12B(x) ((x & 0x1) << 3) -#define REF_SEL(x) ((x & 0x1) << 1) -#define ADC_FREQ(x) (x & 0x3) -#define AVE_CTRL(x) ((x & 0x3) << 6) -#define DET_DELAY(x) ((x & 0x7) << 3) -#define SETTLING(x) (x & 0x7) -#define FRACTION_Z(x) (x & 0x7) -#define I_DRIVE(x) (x & 0x1) -#define OP_MODE(x) ((x & 0x7) << 1) - #define STMPE_TS_NAME "stmpe-ts" #define XY_MASK 0xfff @@ -213,9 +202,10 @@ static int stmpe_init_hw(struct stmpe_touch *ts) return ret; } - adc_ctrl1 = SAMPLE_TIME(ts->sample_time) | MOD_12B(ts->mod_12b) | - REF_SEL(ts->ref_sel); - adc_ctrl1_mask = SAMPLE_TIME(0xff) | MOD_12B(0xff) | REF_SEL(0xff); + adc_ctrl1 = STMPE_SAMPLE_TIME(ts->sample_time) | + STMPE_MOD_12B(ts->mod_12b) | STMPE_REF_SEL(ts->ref_sel); + adc_ctrl1_mask = STMPE_SAMPLE_TIME(0xff) | STMPE_MOD_12B(0xff) | + STMPE_REF_SEL(0xff); ret = stmpe_set_bits(stmpe, STMPE_REG_ADC_CTRL1, adc_ctrl1_mask, adc_ctrl1); @@ -225,15 +215,17 @@ static int stmpe_init_hw(struct stmpe_touch *ts) } ret = stmpe_set_bits(stmpe, STMPE_REG_ADC_CTRL2, - ADC_FREQ(0xff), ADC_FREQ(ts->adc_freq)); + STMPE_ADC_FREQ(0xff), STMPE_ADC_FREQ(ts->adc_freq)); if (ret) { dev_err(dev, "Could not setup ADC\n"); return ret; } - tsc_cfg = AVE_CTRL(ts->ave_ctrl) | DET_DELAY(ts->touch_det_delay) | - SETTLING(ts->settling); - tsc_cfg_mask = AVE_CTRL(0xff) | DET_DELAY(0xff) | SETTLING(0xff); + tsc_cfg = STMPE_AVE_CTRL(ts->ave_ctrl) | + STMPE_DET_DELAY(ts->touch_det_delay) | + STMPE_SETTLING(ts->settling); + tsc_cfg_mask = STMPE_AVE_CTRL(0xff) | STMPE_DET_DELAY(0xff) | + STMPE_SETTLING(0xff); ret = stmpe_set_bits(stmpe, STMPE_REG_TSC_CFG, tsc_cfg_mask, tsc_cfg); if (ret) { @@ -242,14 +234,14 @@ static int stmpe_init_hw(struct stmpe_touch *ts) } ret = stmpe_set_bits(stmpe, STMPE_REG_TSC_FRACTION_Z, - FRACTION_Z(0xff), FRACTION_Z(ts->fraction_z)); + STMPE_FRACTION_Z(0xff), STMPE_FRACTION_Z(ts->fraction_z)); if (ret) { dev_err(dev, "Could not config touch\n"); return ret; } ret = stmpe_set_bits(stmpe, STMPE_REG_TSC_I_DRIVE, - I_DRIVE(0xff), I_DRIVE(ts->i_drive)); + STMPE_I_DRIVE(0xff), STMPE_I_DRIVE(ts->i_drive)); if (ret) { dev_err(dev, "Could not config touch\n"); return ret; @@ -263,7 +255,7 @@ static int stmpe_init_hw(struct stmpe_touch *ts) } ret = stmpe_set_bits(stmpe, STMPE_REG_TSC_CTRL, - OP_MODE(0xff), OP_MODE(OP_MOD_XYZ)); + STMPE_OP_MODE(0xff), STMPE_OP_MODE(OP_MOD_XYZ)); if (ret) { dev_err(dev, "Could not set mode\n"); return ret; diff --git a/include/linux/mfd/stmpe.h b/include/linux/mfd/stmpe.h index 4a827af17e59..c0353f6431f9 100644 --- a/include/linux/mfd/stmpe.h +++ b/include/linux/mfd/stmpe.h @@ -10,6 +10,17 @@ #include +#define STMPE_SAMPLE_TIME(x) ((x & 0xf) << 4) +#define STMPE_MOD_12B(x) ((x & 0x1) << 3) +#define STMPE_REF_SEL(x) ((x & 0x1) << 1) +#define STMPE_ADC_FREQ(x) (x & 0x3) +#define STMPE_AVE_CTRL(x) ((x & 0x3) << 6) +#define STMPE_DET_DELAY(x) ((x & 0x7) << 3) +#define STMPE_SETTLING(x) (x & 0x7) +#define STMPE_FRACTION_Z(x) (x & 0x7) +#define STMPE_I_DRIVE(x) (x & 0x1) +#define STMPE_OP_MODE(x) ((x & 0x7) << 1) + struct device; struct regulator; -- cgit v1.2.3 From 6377cfa3b857ced301f2079ac97de6c19057ab65 Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Fri, 21 Dec 2018 14:46:32 +0100 Subject: mfd: stmpe: Preparations for STMPE ADC driver This prepares the MFD for the STMPE ADC driver. This commit introduces devicetree settings that are used by the ADC and adds an init function. Common ADC settings that are shared with the touchscreen driver can now reside in the overlying MFD. Signed-off-by: Stefan Agner Signed-off-by: Max Krummenacher Signed-off-by: Philippe Schenker Signed-off-by: Lee Jones --- drivers/mfd/Kconfig | 3 ++- drivers/mfd/stmpe.c | 68 +++++++++++++++++++++++++++++++++++++++++++++++ include/linux/mfd/stmpe.h | 10 +++++++ 3 files changed, 80 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig index 8c5dfdce4326..bba159e8eaa4 100644 --- a/drivers/mfd/Kconfig +++ b/drivers/mfd/Kconfig @@ -1204,7 +1204,7 @@ config MFD_STMPE Currently supported devices are: - STMPE811: GPIO, Touchscreen + STMPE811: GPIO, Touchscreen, ADC STMPE1601: GPIO, Keypad STMPE1801: GPIO, Keypad STMPE2401: GPIO, Keypad @@ -1217,6 +1217,7 @@ config MFD_STMPE GPIO: stmpe-gpio Keypad: stmpe-keypad Touchscreen: stmpe-ts + ADC: stmpe-adc menu "STMicroelectronics STMPE Interface Drivers" depends on MFD_STMPE diff --git a/drivers/mfd/stmpe.c b/drivers/mfd/stmpe.c index 566caca4efd8..f582531a8f3e 100644 --- a/drivers/mfd/stmpe.c +++ b/drivers/mfd/stmpe.c @@ -463,6 +463,28 @@ static const struct mfd_cell stmpe_ts_cell = { .num_resources = ARRAY_SIZE(stmpe_ts_resources), }; +/* + * ADC (STMPE811) + */ + +static struct resource stmpe_adc_resources[] = { + { + .name = "STMPE_TEMP_SENS", + .flags = IORESOURCE_IRQ, + }, + { + .name = "STMPE_ADC", + .flags = IORESOURCE_IRQ, + }, +}; + +static const struct mfd_cell stmpe_adc_cell = { + .name = "stmpe-adc", + .of_compatible = "st,stmpe-adc", + .resources = stmpe_adc_resources, + .num_resources = ARRAY_SIZE(stmpe_adc_resources), +}; + /* * STMPE811 or STMPE610 */ @@ -497,6 +519,11 @@ static struct stmpe_variant_block stmpe811_blocks[] = { .irq = STMPE811_IRQ_TOUCH_DET, .block = STMPE_BLOCK_TOUCHSCREEN, }, + { + .cell = &stmpe_adc_cell, + .irq = STMPE811_IRQ_TEMP_SENS, + .block = STMPE_BLOCK_ADC, + }, }; static int stmpe811_enable(struct stmpe *stmpe, unsigned int blocks, @@ -517,6 +544,35 @@ static int stmpe811_enable(struct stmpe *stmpe, unsigned int blocks, enable ? 0 : mask); } +int stmpe811_adc_common_init(struct stmpe *stmpe) +{ + int ret; + u8 adc_ctrl1, adc_ctrl1_mask; + + adc_ctrl1 = STMPE_SAMPLE_TIME(stmpe->sample_time) | + STMPE_MOD_12B(stmpe->mod_12b) | + STMPE_REF_SEL(stmpe->ref_sel); + adc_ctrl1_mask = STMPE_SAMPLE_TIME(0xff) | STMPE_MOD_12B(0xff) | + STMPE_REF_SEL(0xff); + + ret = stmpe_set_bits(stmpe, STMPE811_REG_ADC_CTRL1, + adc_ctrl1_mask, adc_ctrl1); + if (ret) { + dev_err(stmpe->dev, "Could not setup ADC\n"); + return ret; + } + + ret = stmpe_set_bits(stmpe, STMPE811_REG_ADC_CTRL2, + STMPE_ADC_FREQ(0xff), STMPE_ADC_FREQ(stmpe->adc_freq)); + if (ret) { + dev_err(stmpe->dev, "Could not setup ADC\n"); + return ret; + } + + return 0; +} +EXPORT_SYMBOL_GPL(stmpe811_adc_common_init); + static int stmpe811_get_altfunc(struct stmpe *stmpe, enum stmpe_block block) { /* 0 for touchscreen, 1 for GPIO */ @@ -1325,6 +1381,7 @@ int stmpe_probe(struct stmpe_client_info *ci, enum stmpe_partnum partnum) struct device_node *np = ci->dev->of_node; struct stmpe *stmpe; int ret; + u32 val; pdata = devm_kzalloc(ci->dev, sizeof(*pdata), GFP_KERNEL); if (!pdata) @@ -1342,6 +1399,15 @@ int stmpe_probe(struct stmpe_client_info *ci, enum stmpe_partnum partnum) mutex_init(&stmpe->irq_lock); mutex_init(&stmpe->lock); + if (!of_property_read_u32(np, "st,sample-time", &val)) + stmpe->sample_time = val; + if (!of_property_read_u32(np, "st,mod-12b", &val)) + stmpe->mod_12b = val; + if (!of_property_read_u32(np, "st,ref-sel", &val)) + stmpe->ref_sel = val; + if (!of_property_read_u32(np, "st,adc-freq", &val)) + stmpe->adc_freq = val; + stmpe->dev = ci->dev; stmpe->client = ci->client; stmpe->pdata = pdata; @@ -1433,6 +1499,8 @@ int stmpe_remove(struct stmpe *stmpe) if (!IS_ERR(stmpe->vcc)) regulator_disable(stmpe->vcc); + __stmpe_disable(stmpe, STMPE_BLOCK_ADC); + mfd_remove_devices(stmpe->dev); return 0; diff --git a/include/linux/mfd/stmpe.h b/include/linux/mfd/stmpe.h index c0353f6431f9..07f55aac9390 100644 --- a/include/linux/mfd/stmpe.h +++ b/include/linux/mfd/stmpe.h @@ -21,6 +21,9 @@ #define STMPE_I_DRIVE(x) (x & 0x1) #define STMPE_OP_MODE(x) ((x & 0x7) << 1) +#define STMPE811_REG_ADC_CTRL1 0x20 +#define STMPE811_REG_ADC_CTRL2 0x21 + struct device; struct regulator; @@ -134,6 +137,12 @@ struct stmpe { u8 ier[2]; u8 oldier[2]; struct stmpe_platform_data *pdata; + + /* For devices that use an ADC */ + u8 sample_time; + u8 mod_12b; + u8 ref_sel; + u8 adc_freq; }; extern int stmpe_reg_write(struct stmpe *stmpe, u8 reg, u8 data); @@ -147,6 +156,7 @@ extern int stmpe_set_altfunc(struct stmpe *stmpe, u32 pins, enum stmpe_block block); extern int stmpe_enable(struct stmpe *stmpe, unsigned int blocks); extern int stmpe_disable(struct stmpe *stmpe, unsigned int blocks); +extern int stmpe811_adc_common_init(struct stmpe *stmpe); #define STMPE_GPIO_NOREQ_811_TOUCH (0xf0) -- cgit v1.2.3 From 1d7ae53b152dbc5ba0a4f6a83ecc42ac66f52d11 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 12 Dec 2018 23:38:47 +0300 Subject: iommu: Introduce iotlb_sync_map callback Introduce iotlb_sync_map() callback that is invoked in the end of iommu_map(). This new callback allows IOMMU drivers to avoid syncing after mapping of each contiguous chunk and sync only when the whole mapping is completed, optimizing performance of the mapping operation. Signed-off-by: Dmitry Osipenko Reviewed-by: Robin Murphy Reviewed-by: Thierry Reding Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 8 ++++++-- include/linux/iommu.h | 1 + 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 3ed4db334341..ed0e63f2cd9b 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -1585,13 +1585,14 @@ static size_t iommu_pgsize(struct iommu_domain *domain, int iommu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t size, int prot) { + const struct iommu_ops *ops = domain->ops; unsigned long orig_iova = iova; unsigned int min_pagesz; size_t orig_size = size; phys_addr_t orig_paddr = paddr; int ret = 0; - if (unlikely(domain->ops->map == NULL || + if (unlikely(ops->map == NULL || domain->pgsize_bitmap == 0UL)) return -ENODEV; @@ -1620,7 +1621,7 @@ int iommu_map(struct iommu_domain *domain, unsigned long iova, pr_debug("mapping: iova 0x%lx pa %pa pgsize 0x%zx\n", iova, &paddr, pgsize); - ret = domain->ops->map(domain, iova, paddr, pgsize, prot); + ret = ops->map(domain, iova, paddr, pgsize, prot); if (ret) break; @@ -1629,6 +1630,9 @@ int iommu_map(struct iommu_domain *domain, unsigned long iova, size -= pgsize; } + if (ops->iotlb_sync_map) + ops->iotlb_sync_map(domain); + /* unroll mapping in case something went wrong */ if (ret) iommu_unmap(domain, orig_iova, orig_size - size); diff --git a/include/linux/iommu.h b/include/linux/iommu.h index e90da6b6f3d1..477ef47c357c 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -201,6 +201,7 @@ struct iommu_ops { void (*flush_iotlb_all)(struct iommu_domain *domain); void (*iotlb_range_add)(struct iommu_domain *domain, unsigned long iova, size_t size); + void (*iotlb_sync_map)(struct iommu_domain *domain); void (*iotlb_sync)(struct iommu_domain *domain); phys_addr_t (*iova_to_phys)(struct iommu_domain *domain, dma_addr_t iova); int (*add_device)(struct device *dev); -- cgit v1.2.3 From 51908d2e9b7c7730608a19f24fc8718af745bb2f Mon Sep 17 00:00:00 2001 From: Pascal PAILLET-LME Date: Mon, 14 Jan 2019 10:05:16 +0000 Subject: mfd: stpmic1: Add STPMIC1 driver STPMIC1 is a PMIC from STMicroelectronics. The STPMIC1 integrates 10 regulators, 3 power switches, a watchdog and an input for a power on key. Signed-off-by: Pascal Paillet Signed-off-by: Lee Jones --- drivers/mfd/Kconfig | 16 ++++ drivers/mfd/Makefile | 1 + drivers/mfd/stpmic1.c | 213 ++++++++++++++++++++++++++++++++++++++++++++ include/linux/mfd/stpmic1.h | 212 +++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 442 insertions(+) create mode 100644 drivers/mfd/stpmic1.c create mode 100644 include/linux/mfd/stpmic1.h (limited to 'include/linux') diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig index 8c5dfdce4326..0761cb83d174 100644 --- a/drivers/mfd/Kconfig +++ b/drivers/mfd/Kconfig @@ -1871,6 +1871,22 @@ config MFD_STM32_TIMERS for PWM and IIO Timer. This driver allow to share the registers between the others drivers. +config MFD_STPMIC1 + tristate "Support for STPMIC1 PMIC" + depends on (I2C=y && OF) + select REGMAP_I2C + select REGMAP_IRQ + select MFD_CORE + help + Support for ST Microelectronics STPMIC1 PMIC. STPMIC1 has power on + key, watchdog and regulator functionalities which are supported via + the relevant subsystems. This driver provides core support for the + STPMIC1. In order to use the actual functionaltiy of the device other + drivers must be enabled. + + To compile this driver as a module, choose M here: the + module will be called stpmic1. + menu "Multimedia Capabilities Port drivers" depends on ARCH_SA1100 diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile index 12980a4ad460..a62fb0112d9f 100644 --- a/drivers/mfd/Makefile +++ b/drivers/mfd/Makefile @@ -233,6 +233,7 @@ obj-$(CONFIG_INTEL_SOC_PMIC_CHTDC_TI) += intel_soc_pmic_chtdc_ti.o obj-$(CONFIG_MFD_MT6397) += mt6397-core.o obj-$(CONFIG_MFD_ALTERA_A10SR) += altera-a10sr.o +obj-$(CONFIG_MFD_STPMIC1) += stpmic1.o obj-$(CONFIG_MFD_SUN4I_GPADC) += sun4i-gpadc.o obj-$(CONFIG_MFD_STM32_LPTIMER) += stm32-lptimer.o diff --git a/drivers/mfd/stpmic1.c b/drivers/mfd/stpmic1.c new file mode 100644 index 000000000000..7dfbe8906cb8 --- /dev/null +++ b/drivers/mfd/stpmic1.c @@ -0,0 +1,213 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) STMicroelectronics 2018 +// Author: Pascal Paillet + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#define STPMIC1_MAIN_IRQ 0 + +static const struct regmap_range stpmic1_readable_ranges[] = { + regmap_reg_range(TURN_ON_SR, VERSION_SR), + regmap_reg_range(SWOFF_PWRCTRL_CR, LDO6_STDBY_CR), + regmap_reg_range(BST_SW_CR, BST_SW_CR), + regmap_reg_range(INT_PENDING_R1, INT_PENDING_R4), + regmap_reg_range(INT_CLEAR_R1, INT_CLEAR_R4), + regmap_reg_range(INT_MASK_R1, INT_MASK_R4), + regmap_reg_range(INT_SET_MASK_R1, INT_SET_MASK_R4), + regmap_reg_range(INT_CLEAR_MASK_R1, INT_CLEAR_MASK_R4), + regmap_reg_range(INT_SRC_R1, INT_SRC_R1), +}; + +static const struct regmap_range stpmic1_writeable_ranges[] = { + regmap_reg_range(SWOFF_PWRCTRL_CR, LDO6_STDBY_CR), + regmap_reg_range(BST_SW_CR, BST_SW_CR), + regmap_reg_range(INT_CLEAR_R1, INT_CLEAR_R4), + regmap_reg_range(INT_SET_MASK_R1, INT_SET_MASK_R4), + regmap_reg_range(INT_CLEAR_MASK_R1, INT_CLEAR_MASK_R4), +}; + +static const struct regmap_range stpmic1_volatile_ranges[] = { + regmap_reg_range(TURN_ON_SR, VERSION_SR), + regmap_reg_range(WCHDG_CR, WCHDG_CR), + regmap_reg_range(INT_PENDING_R1, INT_PENDING_R4), + regmap_reg_range(INT_SRC_R1, INT_SRC_R4), +}; + +static const struct regmap_access_table stpmic1_readable_table = { + .yes_ranges = stpmic1_readable_ranges, + .n_yes_ranges = ARRAY_SIZE(stpmic1_readable_ranges), +}; + +static const struct regmap_access_table stpmic1_writeable_table = { + .yes_ranges = stpmic1_writeable_ranges, + .n_yes_ranges = ARRAY_SIZE(stpmic1_writeable_ranges), +}; + +static const struct regmap_access_table stpmic1_volatile_table = { + .yes_ranges = stpmic1_volatile_ranges, + .n_yes_ranges = ARRAY_SIZE(stpmic1_volatile_ranges), +}; + +const struct regmap_config stpmic1_regmap_config = { + .reg_bits = 8, + .val_bits = 8, + .cache_type = REGCACHE_RBTREE, + .max_register = PMIC_MAX_REGISTER_ADDRESS, + .rd_table = &stpmic1_readable_table, + .wr_table = &stpmic1_writeable_table, + .volatile_table = &stpmic1_volatile_table, +}; + +static const struct regmap_irq stpmic1_irqs[] = { + REGMAP_IRQ_REG(IT_PONKEY_F, 0, 0x01), + REGMAP_IRQ_REG(IT_PONKEY_R, 0, 0x02), + REGMAP_IRQ_REG(IT_WAKEUP_F, 0, 0x04), + REGMAP_IRQ_REG(IT_WAKEUP_R, 0, 0x08), + REGMAP_IRQ_REG(IT_VBUS_OTG_F, 0, 0x10), + REGMAP_IRQ_REG(IT_VBUS_OTG_R, 0, 0x20), + REGMAP_IRQ_REG(IT_SWOUT_F, 0, 0x40), + REGMAP_IRQ_REG(IT_SWOUT_R, 0, 0x80), + + REGMAP_IRQ_REG(IT_CURLIM_BUCK1, 1, 0x01), + REGMAP_IRQ_REG(IT_CURLIM_BUCK2, 1, 0x02), + REGMAP_IRQ_REG(IT_CURLIM_BUCK3, 1, 0x04), + REGMAP_IRQ_REG(IT_CURLIM_BUCK4, 1, 0x08), + REGMAP_IRQ_REG(IT_OCP_OTG, 1, 0x10), + REGMAP_IRQ_REG(IT_OCP_SWOUT, 1, 0x20), + REGMAP_IRQ_REG(IT_OCP_BOOST, 1, 0x40), + REGMAP_IRQ_REG(IT_OVP_BOOST, 1, 0x80), + + REGMAP_IRQ_REG(IT_CURLIM_LDO1, 2, 0x01), + REGMAP_IRQ_REG(IT_CURLIM_LDO2, 2, 0x02), + REGMAP_IRQ_REG(IT_CURLIM_LDO3, 2, 0x04), + REGMAP_IRQ_REG(IT_CURLIM_LDO4, 2, 0x08), + REGMAP_IRQ_REG(IT_CURLIM_LDO5, 2, 0x10), + REGMAP_IRQ_REG(IT_CURLIM_LDO6, 2, 0x20), + REGMAP_IRQ_REG(IT_SHORT_SWOTG, 2, 0x40), + REGMAP_IRQ_REG(IT_SHORT_SWOUT, 2, 0x80), + + REGMAP_IRQ_REG(IT_TWARN_F, 3, 0x01), + REGMAP_IRQ_REG(IT_TWARN_R, 3, 0x02), + REGMAP_IRQ_REG(IT_VINLOW_F, 3, 0x04), + REGMAP_IRQ_REG(IT_VINLOW_R, 3, 0x08), + REGMAP_IRQ_REG(IT_SWIN_F, 3, 0x40), + REGMAP_IRQ_REG(IT_SWIN_R, 3, 0x80), +}; + +static const struct regmap_irq_chip stpmic1_regmap_irq_chip = { + .name = "pmic_irq", + .status_base = INT_PENDING_R1, + .mask_base = INT_CLEAR_MASK_R1, + .unmask_base = INT_SET_MASK_R1, + .ack_base = INT_CLEAR_R1, + .num_regs = STPMIC1_PMIC_NUM_IRQ_REGS, + .irqs = stpmic1_irqs, + .num_irqs = ARRAY_SIZE(stpmic1_irqs), +}; + +static int stpmic1_probe(struct i2c_client *i2c, + const struct i2c_device_id *id) +{ + struct stpmic1 *ddata; + struct device *dev = &i2c->dev; + int ret; + struct device_node *np = dev->of_node; + u32 reg; + + ddata = devm_kzalloc(dev, sizeof(struct stpmic1), GFP_KERNEL); + if (!ddata) + return -ENOMEM; + + i2c_set_clientdata(i2c, ddata); + ddata->dev = dev; + + ddata->regmap = devm_regmap_init_i2c(i2c, &stpmic1_regmap_config); + if (IS_ERR(ddata->regmap)) + return PTR_ERR(ddata->regmap); + + ddata->irq = of_irq_get(np, STPMIC1_MAIN_IRQ); + if (ddata->irq < 0) { + dev_err(dev, "Failed to get main IRQ: %d\n", ddata->irq); + return ddata->irq; + } + + ret = regmap_read(ddata->regmap, VERSION_SR, ®); + if (ret) { + dev_err(dev, "Unable to read PMIC version\n"); + return ret; + } + dev_info(dev, "PMIC Chip Version: 0x%x\n", reg); + + /* Initialize PMIC IRQ Chip & associated IRQ domains */ + ret = devm_regmap_add_irq_chip(dev, ddata->regmap, ddata->irq, + IRQF_ONESHOT | IRQF_SHARED, + 0, &stpmic1_regmap_irq_chip, + &ddata->irq_data); + if (ret) { + dev_err(dev, "IRQ Chip registration failed: %d\n", ret); + return ret; + } + + return devm_of_platform_populate(dev); +} + +#ifdef CONFIG_PM_SLEEP +static int stpmic1_suspend(struct device *dev) +{ + struct i2c_client *i2c = to_i2c_client(dev); + struct stpmic1 *pmic_dev = i2c_get_clientdata(i2c); + + disable_irq(pmic_dev->irq); + + return 0; +} + +static int stpmic1_resume(struct device *dev) +{ + struct i2c_client *i2c = to_i2c_client(dev); + struct stpmic1 *pmic_dev = i2c_get_clientdata(i2c); + int ret; + + ret = regcache_sync(pmic_dev->regmap); + if (ret) + return ret; + + enable_irq(pmic_dev->irq); + + return 0; +} +#endif + +static SIMPLE_DEV_PM_OPS(stpmic1_pm, stpmic1_suspend, stpmic1_resume); + +static const struct of_device_id stpmic1_of_match[] = { + { .compatible = "st,stpmic1", }, + {}, +}; +MODULE_DEVICE_TABLE(of, stpmic1_of_match); + +static struct i2c_driver stpmic1_driver = { + .driver = { + .name = "stpmic1", + .of_match_table = of_match_ptr(stpmic1_of_match), + .pm = &stpmic1_pm, + }, + .probe = stpmic1_probe, +}; + +module_i2c_driver(stpmic1_driver); + +MODULE_DESCRIPTION("STPMIC1 PMIC Driver"); +MODULE_AUTHOR("Pascal Paillet "); +MODULE_LICENSE("GPL v2"); diff --git a/include/linux/mfd/stpmic1.h b/include/linux/mfd/stpmic1.h new file mode 100644 index 000000000000..fa3f99f7e9a1 --- /dev/null +++ b/include/linux/mfd/stpmic1.h @@ -0,0 +1,212 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) STMicroelectronics 2018 - All Rights Reserved + * Author: Philippe Peurichard , + * Pascal Paillet for STMicroelectronics. + */ + +#ifndef __LINUX_MFD_STPMIC1_H +#define __LINUX_MFD_STPMIC1_H + +#define TURN_ON_SR 0x1 +#define TURN_OFF_SR 0x2 +#define ICC_LDO_TURN_OFF_SR 0x3 +#define ICC_BUCK_TURN_OFF_SR 0x4 +#define RREQ_STATE_SR 0x5 +#define VERSION_SR 0x6 + +#define SWOFF_PWRCTRL_CR 0x10 +#define PADS_PULL_CR 0x11 +#define BUCKS_PD_CR 0x12 +#define LDO14_PD_CR 0x13 +#define LDO56_VREF_PD_CR 0x14 +#define VBUS_DET_VIN_CR 0x15 +#define PKEY_TURNOFF_CR 0x16 +#define BUCKS_MASK_RANK_CR 0x17 +#define BUCKS_MASK_RESET_CR 0x18 +#define LDOS_MASK_RANK_CR 0x19 +#define LDOS_MASK_RESET_CR 0x1A +#define WCHDG_CR 0x1B +#define WCHDG_TIMER_CR 0x1C +#define BUCKS_ICCTO_CR 0x1D +#define LDOS_ICCTO_CR 0x1E + +#define BUCK1_ACTIVE_CR 0x20 +#define BUCK2_ACTIVE_CR 0x21 +#define BUCK3_ACTIVE_CR 0x22 +#define BUCK4_ACTIVE_CR 0x23 +#define VREF_DDR_ACTIVE_CR 0x24 +#define LDO1_ACTIVE_CR 0x25 +#define LDO2_ACTIVE_CR 0x26 +#define LDO3_ACTIVE_CR 0x27 +#define LDO4_ACTIVE_CR 0x28 +#define LDO5_ACTIVE_CR 0x29 +#define LDO6_ACTIVE_CR 0x2A + +#define BUCK1_STDBY_CR 0x30 +#define BUCK2_STDBY_CR 0x31 +#define BUCK3_STDBY_CR 0x32 +#define BUCK4_STDBY_CR 0x33 +#define VREF_DDR_STDBY_CR 0x34 +#define LDO1_STDBY_CR 0x35 +#define LDO2_STDBY_CR 0x36 +#define LDO3_STDBY_CR 0x37 +#define LDO4_STDBY_CR 0x38 +#define LDO5_STDBY_CR 0x39 +#define LDO6_STDBY_CR 0x3A + +#define BST_SW_CR 0x40 + +#define INT_PENDING_R1 0x50 +#define INT_PENDING_R2 0x51 +#define INT_PENDING_R3 0x52 +#define INT_PENDING_R4 0x53 + +#define INT_DBG_LATCH_R1 0x60 +#define INT_DBG_LATCH_R2 0x61 +#define INT_DBG_LATCH_R3 0x62 +#define INT_DBG_LATCH_R4 0x63 + +#define INT_CLEAR_R1 0x70 +#define INT_CLEAR_R2 0x71 +#define INT_CLEAR_R3 0x72 +#define INT_CLEAR_R4 0x73 + +#define INT_MASK_R1 0x80 +#define INT_MASK_R2 0x81 +#define INT_MASK_R3 0x82 +#define INT_MASK_R4 0x83 + +#define INT_SET_MASK_R1 0x90 +#define INT_SET_MASK_R2 0x91 +#define INT_SET_MASK_R3 0x92 +#define INT_SET_MASK_R4 0x93 + +#define INT_CLEAR_MASK_R1 0xA0 +#define INT_CLEAR_MASK_R2 0xA1 +#define INT_CLEAR_MASK_R3 0xA2 +#define INT_CLEAR_MASK_R4 0xA3 + +#define INT_SRC_R1 0xB0 +#define INT_SRC_R2 0xB1 +#define INT_SRC_R3 0xB2 +#define INT_SRC_R4 0xB3 + +#define PMIC_MAX_REGISTER_ADDRESS INT_SRC_R4 + +#define STPMIC1_PMIC_NUM_IRQ_REGS 4 + +#define TURN_OFF_SR_ICC_EVENT 0x08 + +#define LDO_VOLTAGE_MASK GENMASK(6, 2) +#define BUCK_VOLTAGE_MASK GENMASK(7, 2) +#define LDO_BUCK_VOLTAGE_SHIFT 2 + +#define LDO_ENABLE_MASK BIT(0) +#define BUCK_ENABLE_MASK BIT(0) + +#define BUCK_HPLP_ENABLE_MASK BIT(1) +#define BUCK_HPLP_SHIFT 1 + +#define STDBY_ENABLE_MASK BIT(0) + +#define BUCKS_PD_CR_REG_MASK GENMASK(7, 0) +#define BUCK_MASK_RANK_REGISTER_MASK GENMASK(3, 0) +#define BUCK_MASK_RESET_REGISTER_MASK GENMASK(3, 0) +#define LDO1234_PULL_DOWN_REGISTER_MASK GENMASK(7, 0) +#define LDO56_VREF_PD_CR_REG_MASK GENMASK(5, 0) +#define LDO_MASK_RANK_REGISTER_MASK GENMASK(5, 0) +#define LDO_MASK_RESET_REGISTER_MASK GENMASK(5, 0) + +#define BUCK1_PULL_DOWN_REG BUCKS_PD_CR +#define BUCK1_PULL_DOWN_MASK BIT(0) +#define BUCK2_PULL_DOWN_REG BUCKS_PD_CR +#define BUCK2_PULL_DOWN_MASK BIT(2) +#define BUCK3_PULL_DOWN_REG BUCKS_PD_CR +#define BUCK3_PULL_DOWN_MASK BIT(4) +#define BUCK4_PULL_DOWN_REG BUCKS_PD_CR +#define BUCK4_PULL_DOWN_MASK BIT(6) + +#define LDO1_PULL_DOWN_REG LDO14_PD_CR +#define LDO1_PULL_DOWN_MASK BIT(0) +#define LDO2_PULL_DOWN_REG LDO14_PD_CR +#define LDO2_PULL_DOWN_MASK BIT(2) +#define LDO3_PULL_DOWN_REG LDO14_PD_CR +#define LDO3_PULL_DOWN_MASK BIT(4) +#define LDO4_PULL_DOWN_REG LDO14_PD_CR +#define LDO4_PULL_DOWN_MASK BIT(6) +#define LDO5_PULL_DOWN_REG LDO56_VREF_PD_CR +#define LDO5_PULL_DOWN_MASK BIT(0) +#define LDO6_PULL_DOWN_REG LDO56_VREF_PD_CR +#define LDO6_PULL_DOWN_MASK BIT(2) +#define VREF_DDR_PULL_DOWN_REG LDO56_VREF_PD_CR +#define VREF_DDR_PULL_DOWN_MASK BIT(4) + +#define BUCKS_ICCTO_CR_REG_MASK GENMASK(6, 0) +#define LDOS_ICCTO_CR_REG_MASK GENMASK(5, 0) + +#define LDO_BYPASS_MASK BIT(7) + +/* Main PMIC Control Register + * SWOFF_PWRCTRL_CR + * Address : 0x10 + */ +#define ICC_EVENT_ENABLED BIT(4) +#define PWRCTRL_POLARITY_HIGH BIT(3) +#define PWRCTRL_PIN_VALID BIT(2) +#define RESTART_REQUEST_ENABLED BIT(1) +#define SOFTWARE_SWITCH_OFF_ENABLED BIT(0) + +/* Main PMIC PADS Control Register + * PADS_PULL_CR + * Address : 0x11 + */ +#define WAKEUP_DETECTOR_DISABLED BIT(4) +#define PWRCTRL_PD_ACTIVE BIT(3) +#define PWRCTRL_PU_ACTIVE BIT(2) +#define WAKEUP_PD_ACTIVE BIT(1) +#define PONKEY_PU_INACTIVE BIT(0) + +/* Main PMIC VINLOW Control Register + * VBUS_DET_VIN_CRC DMSC + * Address : 0x15 + */ +#define SWIN_DETECTOR_ENABLED BIT(7) +#define SWOUT_DETECTOR_ENABLED BIT(6) +#define VINLOW_ENABLED BIT(0) +#define VINLOW_CTRL_REG_MASK GENMASK(7, 0) + +/* USB Control Register + * Address : 0x40 + */ +#define BOOST_OVP_DISABLED BIT(7) +#define VBUS_OTG_DETECTION_DISABLED BIT(6) +#define SW_OUT_DISCHARGE BIT(5) +#define VBUS_OTG_DISCHARGE BIT(4) +#define OCP_LIMIT_HIGH BIT(3) +#define SWIN_SWOUT_ENABLED BIT(2) +#define USBSW_OTG_SWITCH_ENABLED BIT(1) +#define BOOST_ENABLED BIT(0) + +/* PKEY_TURNOFF_CR + * Address : 0x16 + */ +#define PONKEY_PWR_OFF BIT(7) +#define PONKEY_CC_FLAG_CLEAR BIT(6) +#define PONKEY_TURNOFF_TIMER_MASK GENMASK(3, 0) +#define PONKEY_TURNOFF_MASK GENMASK(7, 0) + +/* + * struct stpmic1 - stpmic1 master device for sub-drivers + * @dev: master device of the chip (can be used to access platform data) + * @irq: main IRQ number + * @regmap_irq_chip_data: irq chip data + */ +struct stpmic1 { + struct device *dev; + struct regmap *regmap; + int irq; + struct regmap_irq_chip_data *irq_data; +}; + +#endif /* __LINUX_MFD_STPMIC1_H */ -- cgit v1.2.3 From 8e1f456129e61371fb190c71ea182a9f6e21282e Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 9 Jan 2019 15:44:46 +0100 Subject: leds: Add helper for getting default pattern from Device Tree Multiple LED triggers might need to access default pattern so add a helper for that. Signed-off-by: Krzysztof Kozlowski Acked-by: Pavel Machek Signed-off-by: Jacek Anaszewski --- drivers/leds/led-core.c | 30 ++++++++++++++++++++++++++++++ include/linux/leds.h | 13 +++++++++++++ 2 files changed, 43 insertions(+) (limited to 'include/linux') diff --git a/drivers/leds/led-core.c b/drivers/leds/led-core.c index ede4fa0ac2cc..e3da7c03da1b 100644 --- a/drivers/leds/led-core.c +++ b/drivers/leds/led-core.c @@ -16,7 +16,9 @@ #include #include #include +#include #include +#include #include "leds.h" DECLARE_RWSEM(leds_list_lock); @@ -310,6 +312,34 @@ int led_update_brightness(struct led_classdev *led_cdev) } EXPORT_SYMBOL_GPL(led_update_brightness); +u32 *led_get_default_pattern(struct led_classdev *led_cdev, unsigned int *size) +{ + struct device_node *np = dev_of_node(led_cdev->dev); + u32 *pattern; + int count; + + if (!np) + return NULL; + + count = of_property_count_u32_elems(np, "led-pattern"); + if (count < 0) + return NULL; + + pattern = kcalloc(count, sizeof(*pattern), GFP_KERNEL); + if (!pattern) + return NULL; + + if (of_property_read_u32_array(np, "led-pattern", pattern, count)) { + kfree(pattern); + return NULL; + } + + *size = count; + + return pattern; +} +EXPORT_SYMBOL_GPL(led_get_default_pattern); + /* Caller must ensure led_cdev->led_access held */ void led_sysfs_disable(struct led_classdev *led_cdev) { diff --git a/include/linux/leds.h b/include/linux/leds.h index 5263f87e1d2c..78204650fe2a 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -218,6 +218,19 @@ extern int led_set_brightness_sync(struct led_classdev *led_cdev, */ extern int led_update_brightness(struct led_classdev *led_cdev); +/** + * led_get_default_pattern - return default pattern + * + * @led_cdev: the LED to get default pattern for + * @size: pointer for storing the number of elements in returned array, + * modified only if return != NULL + * + * Return: Allocated array of integers with default pattern from device tree + * or NULL. Caller is responsible for kfree(). + */ +extern u32 *led_get_default_pattern(struct led_classdev *led_cdev, + unsigned int *size); + /** * led_sysfs_disable - disable LED sysfs interface * @led_cdev: the LED to set -- cgit v1.2.3 From fcd44b64b1eb0a33f6cc14f21dcb927ffd664af3 Mon Sep 17 00:00:00 2001 From: Yogesh Narayan Gaur Date: Tue, 15 Jan 2019 10:05:10 +0000 Subject: mtd: spi-nor: add opcodes for octal Read/Write commands - Add opcodes for octal I/O commands * Read : 1-1-8 and 1-8-8 protocol * Write : 1-1-8 and 1-8-8 protocol * opcodes for 4-byte address mode command - Entry of macros in _convert_3to4_xxx function - Add flag SPI_NOR_OCTAL_READ specifying flash support octal read commands. This flag is required for flashes which didn't provides support for auto detection of Octal mode capabilities i.e. not seems to support newer JESD216C standard. Signed-off-by: Vignesh R Signed-off-by: Yogesh Narayan Gaur Reviewed-by: Tudor Ambarus Signed-off-by: Boris Brezillon --- drivers/mtd/spi-nor/spi-nor.c | 16 ++++++++++++++-- include/linux/mtd/spi-nor.h | 16 ++++++++++++---- 2 files changed, 26 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c index 6e13bbd1aaa5..872d70722672 100644 --- a/drivers/mtd/spi-nor/spi-nor.c +++ b/drivers/mtd/spi-nor/spi-nor.c @@ -68,7 +68,7 @@ enum spi_nor_read_command_index { SNOR_CMD_READ_4_4_4, SNOR_CMD_READ_1_4_4_DTR, - /* Octo SPI */ + /* Octal SPI */ SNOR_CMD_READ_1_1_8, SNOR_CMD_READ_1_8_8, SNOR_CMD_READ_8_8_8, @@ -85,7 +85,7 @@ enum spi_nor_pp_command_index { SNOR_CMD_PP_1_4_4, SNOR_CMD_PP_4_4_4, - /* Octo SPI */ + /* Octal SPI */ SNOR_CMD_PP_1_1_8, SNOR_CMD_PP_1_8_8, SNOR_CMD_PP_8_8_8, @@ -278,6 +278,7 @@ struct flash_info { #define NO_CHIP_ERASE BIT(12) /* Chip does not support chip erase */ #define SPI_NOR_SKIP_SFDP BIT(13) /* Skip parsing of SFDP tables */ #define USE_CLSR BIT(14) /* use CLSR command */ +#define SPI_NOR_OCTAL_READ BIT(15) /* Flash supports Octal Read */ /* Part specific fixup hooks. */ const struct spi_nor_fixups *fixups; @@ -398,6 +399,8 @@ static u8 spi_nor_convert_3to4_read(u8 opcode) { SPINOR_OP_READ_1_2_2, SPINOR_OP_READ_1_2_2_4B }, { SPINOR_OP_READ_1_1_4, SPINOR_OP_READ_1_1_4_4B }, { SPINOR_OP_READ_1_4_4, SPINOR_OP_READ_1_4_4_4B }, + { SPINOR_OP_READ_1_1_8, SPINOR_OP_READ_1_1_8_4B }, + { SPINOR_OP_READ_1_8_8, SPINOR_OP_READ_1_8_8_4B }, { SPINOR_OP_READ_1_1_1_DTR, SPINOR_OP_READ_1_1_1_DTR_4B }, { SPINOR_OP_READ_1_2_2_DTR, SPINOR_OP_READ_1_2_2_DTR_4B }, @@ -414,6 +417,8 @@ static u8 spi_nor_convert_3to4_program(u8 opcode) { SPINOR_OP_PP, SPINOR_OP_PP_4B }, { SPINOR_OP_PP_1_1_4, SPINOR_OP_PP_1_1_4_4B }, { SPINOR_OP_PP_1_4_4, SPINOR_OP_PP_1_4_4_4B }, + { SPINOR_OP_PP_1_1_8, SPINOR_OP_PP_1_1_8_4B }, + { SPINOR_OP_PP_1_8_8, SPINOR_OP_PP_1_8_8_4B }, }; return spi_nor_convert_opcode(opcode, spi_nor_3to4_program, @@ -3591,6 +3596,13 @@ static int spi_nor_init_params(struct spi_nor *nor, SNOR_PROTO_1_1_4); } + if (info->flags & SPI_NOR_OCTAL_READ) { + params->hwcaps.mask |= SNOR_HWCAPS_READ_1_1_8; + spi_nor_set_read_settings(¶ms->reads[SNOR_CMD_READ_1_1_8], + 0, 8, SPINOR_OP_READ_1_1_8, + SNOR_PROTO_1_1_8); + } + /* Page Program settings. */ params->hwcaps.mask |= SNOR_HWCAPS_PP; spi_nor_set_pp_settings(¶ms->page_programs[SNOR_CMD_PP], diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h index fa2d89e38e40..2353af8bac99 100644 --- a/include/linux/mtd/spi-nor.h +++ b/include/linux/mtd/spi-nor.h @@ -46,9 +46,13 @@ #define SPINOR_OP_READ_1_2_2 0xbb /* Read data bytes (Dual I/O SPI) */ #define SPINOR_OP_READ_1_1_4 0x6b /* Read data bytes (Quad Output SPI) */ #define SPINOR_OP_READ_1_4_4 0xeb /* Read data bytes (Quad I/O SPI) */ +#define SPINOR_OP_READ_1_1_8 0x8b /* Read data bytes (Octal Output SPI) */ +#define SPINOR_OP_READ_1_8_8 0xcb /* Read data bytes (Octal I/O SPI) */ #define SPINOR_OP_PP 0x02 /* Page program (up to 256 bytes) */ #define SPINOR_OP_PP_1_1_4 0x32 /* Quad page program */ #define SPINOR_OP_PP_1_4_4 0x38 /* Quad page program */ +#define SPINOR_OP_PP_1_1_8 0x82 /* Octal page program */ +#define SPINOR_OP_PP_1_8_8 0xc2 /* Octal page program */ #define SPINOR_OP_BE_4K 0x20 /* Erase 4KiB block */ #define SPINOR_OP_BE_4K_PMC 0xd7 /* Erase 4KiB block on PMC chips */ #define SPINOR_OP_BE_32K 0x52 /* Erase 32KiB block */ @@ -69,9 +73,13 @@ #define SPINOR_OP_READ_1_2_2_4B 0xbc /* Read data bytes (Dual I/O SPI) */ #define SPINOR_OP_READ_1_1_4_4B 0x6c /* Read data bytes (Quad Output SPI) */ #define SPINOR_OP_READ_1_4_4_4B 0xec /* Read data bytes (Quad I/O SPI) */ +#define SPINOR_OP_READ_1_1_8_4B 0x7c /* Read data bytes (Octal Output SPI) */ +#define SPINOR_OP_READ_1_8_8_4B 0xcc /* Read data bytes (Octal I/O SPI) */ #define SPINOR_OP_PP_4B 0x12 /* Page program (up to 256 bytes) */ #define SPINOR_OP_PP_1_1_4_4B 0x34 /* Quad page program */ #define SPINOR_OP_PP_1_4_4_4B 0x3e /* Quad page program */ +#define SPINOR_OP_PP_1_1_8_4B 0x84 /* Octal page program */ +#define SPINOR_OP_PP_1_8_8_4B 0x8e /* Octal page program */ #define SPINOR_OP_BE_4K_4B 0x21 /* Erase 4KiB block */ #define SPINOR_OP_BE_32K_4B 0x5c /* Erase 32KiB block */ #define SPINOR_OP_SE_4B 0xdc /* Sector erase (usually 64KiB) */ @@ -458,7 +466,7 @@ struct spi_nor_hwcaps { /* *(Fast) Read capabilities. * MUST be ordered by priority: the higher bit position, the higher priority. - * As a matter of performances, it is relevant to use Octo SPI protocols first, + * As a matter of performances, it is relevant to use Octal SPI protocols first, * then Quad SPI protocols before Dual SPI protocols, Fast Read and lastly * (Slow) Read. */ @@ -479,7 +487,7 @@ struct spi_nor_hwcaps { #define SNOR_HWCAPS_READ_4_4_4 BIT(9) #define SNOR_HWCAPS_READ_1_4_4_DTR BIT(10) -#define SNOR_HWCPAS_READ_OCTO GENMASK(14, 11) +#define SNOR_HWCPAS_READ_OCTAL GENMASK(14, 11) #define SNOR_HWCAPS_READ_1_1_8 BIT(11) #define SNOR_HWCAPS_READ_1_8_8 BIT(12) #define SNOR_HWCAPS_READ_8_8_8 BIT(13) @@ -488,7 +496,7 @@ struct spi_nor_hwcaps { /* * Page Program capabilities. * MUST be ordered by priority: the higher bit position, the higher priority. - * Like (Fast) Read capabilities, Octo/Quad SPI protocols are preferred to the + * Like (Fast) Read capabilities, Octal/Quad SPI protocols are preferred to the * legacy SPI 1-1-1 protocol. * Note that Dual Page Programs are not supported because there is no existing * JEDEC/SFDP standard to define them. Also at this moment no SPI flash memory @@ -502,7 +510,7 @@ struct spi_nor_hwcaps { #define SNOR_HWCAPS_PP_1_4_4 BIT(18) #define SNOR_HWCAPS_PP_4_4_4 BIT(19) -#define SNOR_HWCAPS_PP_OCTO GENMASK(22, 20) +#define SNOR_HWCAPS_PP_OCTAL GENMASK(22, 20) #define SNOR_HWCAPS_PP_1_1_8 BIT(20) #define SNOR_HWCAPS_PP_1_8_8 BIT(21) #define SNOR_HWCAPS_PP_8_8_8 BIT(22) -- cgit v1.2.3 From b172fd0c898022c47161a99cb40be5304b0d3fd0 Mon Sep 17 00:00:00 2001 From: Alban Bedel Date: Wed, 16 Jan 2019 19:55:46 +0100 Subject: spi: ath79: Enable support for compile test To allow building this driver in compile test we need to remove all dependency on headers from arch/mips/include. To allow this we explicitly define all the registers locally instead of using ar71xx_regs.h and we move the platform data struct definition to include/linux/platform_data/spi-ath79.h. Signed-off-by: Alban Bedel Signed-off-by: Mark Brown --- arch/mips/ath79/dev-spi.h | 2 +- arch/mips/include/asm/mach-ath79/ath79_spi_platform.h | 19 ------------------- drivers/spi/Kconfig | 2 +- drivers/spi/spi-ath79.c | 15 ++++++++++++--- include/linux/platform_data/spi-ath79.h | 19 +++++++++++++++++++ 5 files changed, 33 insertions(+), 24 deletions(-) delete mode 100644 arch/mips/include/asm/mach-ath79/ath79_spi_platform.h create mode 100644 include/linux/platform_data/spi-ath79.h (limited to 'include/linux') diff --git a/arch/mips/ath79/dev-spi.h b/arch/mips/ath79/dev-spi.h index d732565ca736..6e15bc8651be 100644 --- a/arch/mips/ath79/dev-spi.h +++ b/arch/mips/ath79/dev-spi.h @@ -13,7 +13,7 @@ #define _ATH79_DEV_SPI_H #include -#include +#include void ath79_register_spi(struct ath79_spi_platform_data *pdata, struct spi_board_info const *info, diff --git a/arch/mips/include/asm/mach-ath79/ath79_spi_platform.h b/arch/mips/include/asm/mach-ath79/ath79_spi_platform.h deleted file mode 100644 index aa71216edf99..000000000000 --- a/arch/mips/include/asm/mach-ath79/ath79_spi_platform.h +++ /dev/null @@ -1,19 +0,0 @@ -/* - * Platform data definition for Atheros AR71XX/AR724X/AR913X SPI controller - * - * Copyright (C) 2008-2010 Gabor Juhos - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published - * by the Free Software Foundation. - */ - -#ifndef _ATH79_SPI_PLATFORM_H -#define _ATH79_SPI_PLATFORM_H - -struct ath79_spi_platform_data { - unsigned bus_num; - unsigned num_chipselect; -}; - -#endif /* _ATH79_SPI_PLATFORM_H */ diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig index 128892c7e21e..71d3d2d5e5d1 100644 --- a/drivers/spi/Kconfig +++ b/drivers/spi/Kconfig @@ -63,7 +63,7 @@ config SPI_ALTERA config SPI_ATH79 tristate "Atheros AR71XX/AR724X/AR913X SPI controller driver" - depends on ATH79 + depends on ATH79 || COMPILE_TEST select SPI_BITBANG help This enables support for the SPI controller present on the diff --git a/drivers/spi/spi-ath79.c b/drivers/spi/spi-ath79.c index edf695a359f4..09c4fb7fcf7a 100644 --- a/drivers/spi/spi-ath79.c +++ b/drivers/spi/spi-ath79.c @@ -23,15 +23,24 @@ #include #include #include - -#include -#include +#include #define DRV_NAME "ath79-spi" #define ATH79_SPI_RRW_DELAY_FACTOR 12000 #define MHZ (1000 * 1000) +#define AR71XX_SPI_REG_FS 0x00 /* Function Select */ +#define AR71XX_SPI_REG_CTRL 0x04 /* SPI Control */ +#define AR71XX_SPI_REG_IOC 0x08 /* SPI I/O Control */ +#define AR71XX_SPI_REG_RDS 0x0c /* Read Data Shift */ + +#define AR71XX_SPI_FS_GPIO BIT(0) /* Enable GPIO mode */ + +#define AR71XX_SPI_IOC_DO BIT(0) /* Data Out pin */ +#define AR71XX_SPI_IOC_CLK BIT(8) /* CLK pin */ +#define AR71XX_SPI_IOC_CS(n) BIT(16 + (n)) + struct ath79_spi { struct spi_bitbang bitbang; u32 ioc_base; diff --git a/include/linux/platform_data/spi-ath79.h b/include/linux/platform_data/spi-ath79.h new file mode 100644 index 000000000000..aa71216edf99 --- /dev/null +++ b/include/linux/platform_data/spi-ath79.h @@ -0,0 +1,19 @@ +/* + * Platform data definition for Atheros AR71XX/AR724X/AR913X SPI controller + * + * Copyright (C) 2008-2010 Gabor Juhos + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + */ + +#ifndef _ATH79_SPI_PLATFORM_H +#define _ATH79_SPI_PLATFORM_H + +struct ath79_spi_platform_data { + unsigned bus_num; + unsigned num_chipselect; +}; + +#endif /* _ATH79_SPI_PLATFORM_H */ -- cgit v1.2.3 From 6d7fbce7da0cd06ff3f3f30e009a15a6243f0bc0 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 17 Jan 2019 12:02:57 -0500 Subject: kill kernfs_pin_sb() unused now and impossible to use safely anyway. Signed-off-by: Al Viro --- fs/kernfs/mount.c | 30 ------------------------------ include/linux/kernfs.h | 1 - 2 files changed, 31 deletions(-) (limited to 'include/linux') diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c index d71c9405874a..4d303047a4f8 100644 --- a/fs/kernfs/mount.c +++ b/fs/kernfs/mount.c @@ -377,36 +377,6 @@ void kernfs_kill_sb(struct super_block *sb) kfree(info); } -/** - * kernfs_pin_sb: try to pin the superblock associated with a kernfs_root - * @kernfs_root: the kernfs_root in question - * @ns: the namespace tag - * - * Pin the superblock so the superblock won't be destroyed in subsequent - * operations. This can be used to block ->kill_sb() which may be useful - * for kernfs users which dynamically manage superblocks. - * - * Returns NULL if there's no superblock associated to this kernfs_root, or - * -EINVAL if the superblock is being freed. - */ -struct super_block *kernfs_pin_sb(struct kernfs_root *root, const void *ns) -{ - struct kernfs_super_info *info; - struct super_block *sb = NULL; - - mutex_lock(&kernfs_mutex); - list_for_each_entry(info, &root->supers, node) { - if (info->ns == ns) { - sb = info->sb; - if (!atomic_inc_not_zero(&info->sb->s_active)) - sb = ERR_PTR(-EINVAL); - break; - } - } - mutex_unlock(&kernfs_mutex); - return sb; -} - void __init kernfs_init(void) { diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 5b36b1287a5a..44acb4c3659c 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -357,7 +357,6 @@ struct dentry *kernfs_mount_ns(struct file_system_type *fs_type, int flags, struct kernfs_root *root, unsigned long magic, bool *new_sb_created, const void *ns); void kernfs_kill_sb(struct super_block *sb); -struct super_block *kernfs_pin_sb(struct kernfs_root *root, const void *ns); void kernfs_init(void); -- cgit v1.2.3 From ecfc937210e5fdc6554e49b2a735ff22e72ae3f0 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 15 Jan 2019 15:06:11 -0800 Subject: net: dsa: Split platform data to header file Instead of having net/dsa.h contain both the internal switch tree/driver structures, split the relevant platform_data parts into include/linux/platform_data/dsa.h and make that header be included by net/dsa.h in order not to break any setup. A subsequent set of patches will update code including net/dsa.h to include only the platform_data header. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- MAINTAINERS | 1 + include/linux/platform_data/dsa.h | 68 +++++++++++++++++++++++++++++++++++++++ include/net/dsa.h | 61 +---------------------------------- 3 files changed, 70 insertions(+), 60 deletions(-) create mode 100644 include/linux/platform_data/dsa.h (limited to 'include/linux') diff --git a/MAINTAINERS b/MAINTAINERS index 4d04cebb4a71..a592b9992b46 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10576,6 +10576,7 @@ F: Documentation/devicetree/bindings/net/dsa/ F: net/dsa/ F: include/net/dsa.h F: include/linux/dsa/ +F: include/linux/platform_data/dsa.h F: drivers/net/dsa/ NETWORKING [GENERAL] diff --git a/include/linux/platform_data/dsa.h b/include/linux/platform_data/dsa.h new file mode 100644 index 000000000000..d4d9bf2060a6 --- /dev/null +++ b/include/linux/platform_data/dsa.h @@ -0,0 +1,68 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __DSA_PDATA_H +#define __DSA_PDATA_H + +struct device; +struct net_device; + +#define DSA_MAX_SWITCHES 4 +#define DSA_MAX_PORTS 12 +#define DSA_RTABLE_NONE -1 + +struct dsa_chip_data { + /* + * How to access the switch configuration registers. + */ + struct device *host_dev; + int sw_addr; + + /* + * Reference to network devices + */ + struct device *netdev[DSA_MAX_PORTS]; + + /* set to size of eeprom if supported by the switch */ + int eeprom_len; + + /* Device tree node pointer for this specific switch chip + * used during switch setup in case additional properties + * and resources needs to be used + */ + struct device_node *of_node; + + /* + * The names of the switch's ports. Use "cpu" to + * designate the switch port that the cpu is connected to, + * "dsa" to indicate that this port is a DSA link to + * another switch, NULL to indicate the port is unused, + * or any other string to indicate this is a physical port. + */ + char *port_names[DSA_MAX_PORTS]; + struct device_node *port_dn[DSA_MAX_PORTS]; + + /* + * An array of which element [a] indicates which port on this + * switch should be used to send packets to that are destined + * for switch a. Can be NULL if there is only one switch chip. + */ + s8 rtable[DSA_MAX_SWITCHES]; +}; + +struct dsa_platform_data { + /* + * Reference to a Linux network interface that connects + * to the root switch chip of the tree. + */ + struct device *netdev; + struct net_device *of_netdev; + + /* + * Info structs describing each of the switch chips + * connected via this network interface. + */ + int nr_chips; + struct dsa_chip_data *chip; +}; + + +#endif /* __DSA_PDATA_H */ diff --git a/include/net/dsa.h b/include/net/dsa.h index 2f1daf29131a..7f2a668ef2cc 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -44,66 +45,6 @@ enum dsa_tag_protocol { DSA_TAG_LAST, /* MUST BE LAST */ }; -#define DSA_MAX_SWITCHES 4 -#define DSA_MAX_PORTS 12 - -#define DSA_RTABLE_NONE -1 - -struct dsa_chip_data { - /* - * How to access the switch configuration registers. - */ - struct device *host_dev; - int sw_addr; - - /* - * Reference to network devices - */ - struct device *netdev[DSA_MAX_PORTS]; - - /* set to size of eeprom if supported by the switch */ - int eeprom_len; - - /* Device tree node pointer for this specific switch chip - * used during switch setup in case additional properties - * and resources needs to be used - */ - struct device_node *of_node; - - /* - * The names of the switch's ports. Use "cpu" to - * designate the switch port that the cpu is connected to, - * "dsa" to indicate that this port is a DSA link to - * another switch, NULL to indicate the port is unused, - * or any other string to indicate this is a physical port. - */ - char *port_names[DSA_MAX_PORTS]; - struct device_node *port_dn[DSA_MAX_PORTS]; - - /* - * An array of which element [a] indicates which port on this - * switch should be used to send packets to that are destined - * for switch a. Can be NULL if there is only one switch chip. - */ - s8 rtable[DSA_MAX_SWITCHES]; -}; - -struct dsa_platform_data { - /* - * Reference to a Linux network interface that connects - * to the root switch chip of the tree. - */ - struct device *netdev; - struct net_device *of_netdev; - - /* - * Info structs describing each of the switch chips - * connected via this network interface. - */ - int nr_chips; - struct dsa_chip_data *chip; -}; - struct packet_type; struct dsa_switch; -- cgit v1.2.3 From 8cfb5faf32e85b62f08cfe242ce80b2864d0b8f3 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 15 Jan 2019 15:06:13 -0800 Subject: net: dsa: Include platform_data header file b53 and mv88e6xxx support passing platform_data, and now that we have split the platform_data portion from the main net/dsa.h header file, include only the relevant parts. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/platform_data/b53.h | 2 +- include/linux/platform_data/mv88e6xxx.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/b53.h b/include/linux/platform_data/b53.h index 8eaef2f2b691..c3b61ead41f2 100644 --- a/include/linux/platform_data/b53.h +++ b/include/linux/platform_data/b53.h @@ -20,7 +20,7 @@ #define __B53_H #include -#include +#include struct b53_platform_data { /* Must be first such that dsa_register_switch() can access it */ diff --git a/include/linux/platform_data/mv88e6xxx.h b/include/linux/platform_data/mv88e6xxx.h index f63af2955ea0..963730b44aea 100644 --- a/include/linux/platform_data/mv88e6xxx.h +++ b/include/linux/platform_data/mv88e6xxx.h @@ -2,7 +2,7 @@ #ifndef __DSA_MV88E6XXX_H #define __DSA_MV88E6XXX_H -#include +#include struct dsa_mv88e6xxx_pdata { /* Must be first, such that dsa_register_switch() can access this -- cgit v1.2.3 From 5db5ea995fc2fa89fdef61ef3a658cbb41a24222 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 15 Jan 2019 15:09:35 -0800 Subject: net: phy: Add helpers to determine if PHY driver is generic We are already checking in phy_detach() that the PHY driver is of generic kind (1G or 10G) and we are going to make use of that in the SFP layer as well for 1000BaseT SFP modules, so expose helper functions to return that information. Signed-off-by: Florian Fainelli Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/phy_device.c | 34 ++++++++++++++++++++++++++++++++-- include/linux/phy.h | 3 +++ 2 files changed, 35 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index bb1410821ce4..c02669270c41 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -1277,6 +1277,36 @@ struct phy_device *phy_attach(struct net_device *dev, const char *bus_id, } EXPORT_SYMBOL(phy_attach); +static bool phy_driver_is_genphy_kind(struct phy_device *phydev, + struct device_driver *driver) +{ + struct device *d = &phydev->mdio.dev; + bool ret = false; + + if (!phydev->drv) + return ret; + + get_device(d); + ret = d->driver == driver; + put_device(d); + + return ret; +} + +bool phy_driver_is_genphy(struct phy_device *phydev) +{ + return phy_driver_is_genphy_kind(phydev, + &genphy_driver.mdiodrv.driver); +} +EXPORT_SYMBOL_GPL(phy_driver_is_genphy); + +bool phy_driver_is_genphy_10g(struct phy_device *phydev) +{ + return phy_driver_is_genphy_kind(phydev, + &genphy_10g_driver.mdiodrv.driver); +} +EXPORT_SYMBOL_GPL(phy_driver_is_genphy_10g); + /** * phy_detach - detach a PHY device from its network device * @phydev: target phy_device struct @@ -1308,8 +1338,8 @@ void phy_detach(struct phy_device *phydev) * from the generic driver so that there's a chance a * real driver could be loaded */ - if (phydev->mdio.dev.driver == &genphy_10g_driver.mdiodrv.driver || - phydev->mdio.dev.driver == &genphy_driver.mdiodrv.driver) + if (phy_driver_is_genphy(phydev) || + phy_driver_is_genphy_10g(phydev)) device_release_driver(&phydev->mdio.dev); /* diff --git a/include/linux/phy.h b/include/linux/phy.h index 3b051f761450..f1c19bf8c658 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1183,4 +1183,7 @@ module_exit(phy_module_exit) #define module_phy_driver(__phy_drivers) \ phy_module_driver(__phy_drivers, ARRAY_SIZE(__phy_drivers)) +bool phy_driver_is_genphy(struct phy_device *phydev); +bool phy_driver_is_genphy_10g(struct phy_device *phydev); + #endif /* __PHY_H */ -- cgit v1.2.3 From 44021606298870e4adc641ef3927e7bb47ca8236 Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Tue, 15 Jan 2019 12:22:10 -0500 Subject: cpuidle: use BIT() for idle state flags and remove CPUIDLE_DRIVER_FLAGS_MASK Use BIT() macro to do a small tidy-up. CPUIDLE_DRIVER_FLAGS_MASK is not used, so remove it. Signed-off-by: Yangtao Li Signed-off-by: Rafael J. Wysocki --- include/linux/cpuidle.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index 4dff74f48d4b..3b39472324a3 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -69,11 +69,9 @@ struct cpuidle_state { /* Idle State Flags */ #define CPUIDLE_FLAG_NONE (0x00) -#define CPUIDLE_FLAG_POLLING (0x01) /* polling state */ -#define CPUIDLE_FLAG_COUPLED (0x02) /* state applies to multiple cpus */ -#define CPUIDLE_FLAG_TIMER_STOP (0x04) /* timer is stopped on this state */ - -#define CPUIDLE_DRIVER_FLAGS_MASK (0xFFFF0000) +#define CPUIDLE_FLAG_POLLING BIT(0) /* polling state */ +#define CPUIDLE_FLAG_COUPLED BIT(1) /* state applies to multiple cpus */ +#define CPUIDLE_FLAG_TIMER_STOP BIT(2) /* timer is stopped on this state */ struct cpuidle_device_kobj; struct cpuidle_state_kobj; -- cgit v1.2.3 From 87b0984ebfabafcfe959e52ca5cdab5eeb2d60c0 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Wed, 16 Jan 2019 23:06:50 +0000 Subject: net: Add extack argument to ndo_fdb_add() Drivers may not be able to support certain FDB entries, and an error code is insufficient to give clear hints as to the reasons of rejection. In order to make it possible to communicate the rejection reason, extend ndo_fdb_add() with an extack argument. Adapt the existing implementations of ndo_fdb_add() to take the parameter (and ignore it). Pass the extack parameter when invoking ndo_fdb_add() from rtnl_fdb_add(). Signed-off-by: Petr Machata Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/i40e/i40e_main.c | 3 ++- drivers/net/ethernet/intel/ice/ice_main.c | 3 ++- drivers/net/ethernet/intel/igb/igb_main.c | 3 ++- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 3 ++- drivers/net/ethernet/mscc/ocelot.c | 3 ++- drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c | 3 ++- drivers/net/macvlan.c | 3 ++- drivers/net/vxlan.c | 3 ++- include/linux/netdevice.h | 6 ++++-- net/bridge/br_fdb.c | 3 ++- net/bridge/br_private.h | 3 ++- net/core/rtnetlink.c | 5 +++-- net/dsa/dsa_priv.h | 3 ++- net/dsa/slave.c | 3 ++- 14 files changed, 31 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index f52e2c46e6a7..0ee641c41be4 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -11644,7 +11644,8 @@ static int i40e_get_phys_port_id(struct net_device *netdev, static int i40e_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid, - u16 flags) + u16 flags, + struct netlink_ext_ack *extack) { struct i40e_netdev_priv *np = netdev_priv(dev); struct i40e_pf *pf = np->vsi->back; diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index f4bf6bda32a9..48f033928aa2 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -2438,7 +2438,8 @@ static void ice_set_rx_mode(struct net_device *netdev) */ static int ice_fdb_add(struct ndmsg *ndm, struct nlattr __always_unused *tb[], struct net_device *dev, const unsigned char *addr, - u16 vid, u16 flags) + u16 vid, u16 flags, + struct netlink_ext_ack *extack) { int err; diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 87bdf1604ae2..3615e2e52399 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -2486,7 +2486,8 @@ static int igb_set_features(struct net_device *netdev, static int igb_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid, - u16 flags) + u16 flags, + struct netlink_ext_ack *extack) { /* guarantee we can provide a unique filter for the unicast address */ if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr)) { diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index daff8183534b..b53087a980ef 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -9910,7 +9910,8 @@ static void ixgbe_del_udp_tunnel_port(struct net_device *dev, static int ixgbe_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid, - u16 flags) + u16 flags, + struct netlink_ext_ack *extack) { /* guarantee we can provide a unique filter for the unicast address */ if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr)) { diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index 215a45374d7b..c6a575eb0ff5 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -721,7 +721,8 @@ static void ocelot_get_stats64(struct net_device *dev, static int ocelot_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, - u16 vid, u16 flags) + u16 vid, u16 flags, + struct netlink_ext_ack *extack) { struct ocelot_port *port = netdev_priv(dev); struct ocelot *ocelot = port->ocelot; diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c index 16d0479f6891..7a873002e626 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c @@ -396,7 +396,8 @@ static int qlcnic_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], static int qlcnic_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *netdev, - const unsigned char *addr, u16 vid, u16 flags) + const unsigned char *addr, u16 vid, u16 flags, + struct netlink_ext_ack *extack) { struct qlcnic_adapter *adapter = netdev_priv(netdev); int err = 0; diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index fc726ce4c164..084a1b3fbc80 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -963,7 +963,8 @@ static int macvlan_vlan_rx_kill_vid(struct net_device *dev, static int macvlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid, - u16 flags) + u16 flags, + struct netlink_ext_ack *extack) { struct macvlan_dev *vlan = netdev_priv(dev); int err = -EINVAL; diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 83f65eb3085f..11f38fd71678 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -1087,7 +1087,8 @@ static int vxlan_fdb_parse(struct nlattr *tb[], struct vxlan_dev *vxlan, /* Add static entry (via netlink) */ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, - const unsigned char *addr, u16 vid, u16 flags) + const unsigned char *addr, u16 vid, u16 flags, + struct netlink_ext_ack *extack) { struct vxlan_dev *vxlan = netdev_priv(dev); /* struct net *net = dev_net(vxlan->dev); */ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 1377d085ef99..a57b9a853aab 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1152,7 +1152,8 @@ struct dev_ifalias { * * int (*ndo_fdb_add)(struct ndmsg *ndm, struct nlattr *tb[], * struct net_device *dev, - * const unsigned char *addr, u16 vid, u16 flags) + * const unsigned char *addr, u16 vid, u16 flags, + * struct netlink_ext_ack *extack); * Adds an FDB entry to dev for addr. * int (*ndo_fdb_del)(struct ndmsg *ndm, struct nlattr *tb[], * struct net_device *dev, @@ -1376,7 +1377,8 @@ struct net_device_ops { struct net_device *dev, const unsigned char *addr, u16 vid, - u16 flags); + u16 flags, + struct netlink_ext_ack *extack); int (*ndo_fdb_del)(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index fe3c758791ca..6664cb8590f8 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -915,7 +915,8 @@ static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge *br, /* Add new permanent fdb entry with RTM_NEWNEIGH */ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, - const unsigned char *addr, u16 vid, u16 nlh_flags) + const unsigned char *addr, u16 vid, u16 nlh_flags, + struct netlink_ext_ack *extack) { struct net_bridge_vlan_group *vg; struct net_bridge_port *p = NULL; diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index eabf8bf28a3f..00deef7fc1f3 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -573,7 +573,8 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid); int br_fdb_add(struct ndmsg *nlh, struct nlattr *tb[], struct net_device *dev, - const unsigned char *addr, u16 vid, u16 nlh_flags); + const unsigned char *addr, u16 vid, u16 nlh_flags, + struct netlink_ext_ack *extack); int br_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, struct net_device *fdev, int *idx); int br_fdb_get(struct sk_buff *skb, struct nlattr *tb[], struct net_device *dev, diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 5ea1bed08ede..b302df0cd5ae 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3639,7 +3639,7 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, const struct net_device_ops *ops = br_dev->netdev_ops; err = ops->ndo_fdb_add(ndm, tb, dev, addr, vid, - nlh->nlmsg_flags); + nlh->nlmsg_flags, extack); if (err) goto out; else @@ -3651,7 +3651,8 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, if (dev->netdev_ops->ndo_fdb_add) err = dev->netdev_ops->ndo_fdb_add(ndm, tb, dev, addr, vid, - nlh->nlmsg_flags); + nlh->nlmsg_flags, + extack); else err = ndo_dflt_fdb_add(ndm, tb, dev, addr, vid, nlh->nlmsg_flags); diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index 026a05774bf7..1f4972dab9f2 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -103,7 +103,8 @@ static inline void dsa_legacy_unregister(void) { } int dsa_legacy_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid, - u16 flags); + u16 flags, + struct netlink_ext_ack *extack); int dsa_legacy_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid); diff --git a/net/dsa/slave.c b/net/dsa/slave.c index a3fcc1d01615..d5680a98a7f0 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -1009,7 +1009,8 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = { int dsa_legacy_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid, - u16 flags) + u16 flags, + struct netlink_ext_ack *extack) { struct dsa_port *dp = dsa_slave_to_port(dev); -- cgit v1.2.3 From 8b59bfe83cf15f755024e88812e057af7341f525 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Thu, 17 Jan 2019 15:22:20 +0800 Subject: qed: remove duplicated include from qed_if.h Remove duplicated include. Signed-off-by: YueHaibing Acked-by: Denis Bolotin Signed-off-by: David S. Miller --- include/linux/qed/qed_if.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index 91c536a01b56..5f818fda96bd 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -38,7 +38,6 @@ #include #include #include -#include #include #include #include -- cgit v1.2.3 From 58fa4a410fc31afe08d0d0c6b6d8860c22ec17c2 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 16 Jan 2019 14:15:20 +0100 Subject: ipc: introduce ksys_ipc()/compat_ksys_ipc() for s390 The sys_ipc() and compat_ksys_ipc() functions are meant to only be used from the system call table, not called by another function. Introduce ksys_*() interfaces for this purpose, as we have done for many other system calls. Link: https://lore.kernel.org/lkml/20190116131527.2071570-3-arnd@arndb.de Signed-off-by: Arnd Bergmann Reviewed-by: Heiko Carstens [heiko.carstens@de.ibm.com: compile fix for !CONFIG_COMPAT] Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/compat_linux.c | 2 +- arch/s390/kernel/sys_s390.c | 4 +++- include/linux/syscalls.h | 4 ++++ ipc/syscall.c | 20 ++++++++++++++++---- kernel/sys_ni.c | 1 + 5 files changed, 25 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c index 8ac38d51ed7d..a47f6d3c6d5b 100644 --- a/arch/s390/kernel/compat_linux.c +++ b/arch/s390/kernel/compat_linux.c @@ -296,7 +296,7 @@ COMPAT_SYSCALL_DEFINE5(s390_ipc, uint, call, int, first, compat_ulong_t, second, { if (call >> 16) /* hack for backward compatibility */ return -EINVAL; - return compat_sys_ipc(call, first, second, third, ptr, third); + return compat_ksys_ipc(call, first, second, third, ptr, third); } #endif diff --git a/arch/s390/kernel/sys_s390.c b/arch/s390/kernel/sys_s390.c index 560bdaf8a74f..fd0cbbed4d9f 100644 --- a/arch/s390/kernel/sys_s390.c +++ b/arch/s390/kernel/sys_s390.c @@ -58,6 +58,7 @@ out: return error; } +#ifdef CONFIG_SYSVIPC /* * sys_ipc() is the de-multiplexer for the SysV IPC calls. */ @@ -74,8 +75,9 @@ SYSCALL_DEFINE5(s390_ipc, uint, call, int, first, unsigned long, second, * Therefore we can call the generic variant by simply passing the * third parameter also as fifth parameter. */ - return sys_ipc(call, first, second, third, ptr, third); + return ksys_ipc(call, first, second, third, ptr, third); } +#endif /* CONFIG_SYSVIPC */ SYSCALL_DEFINE1(s390_personality, unsigned int, personality) { diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 257cccba3062..fb63045a0fb6 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -1185,6 +1185,10 @@ unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long fd, unsigned long pgoff); ssize_t ksys_readahead(int fd, loff_t offset, size_t count); +int ksys_ipc(unsigned int call, int first, unsigned long second, + unsigned long third, void __user * ptr, long fifth); +int compat_ksys_ipc(u32 call, int first, int second, + u32 third, u32 ptr, u32 fifth); /* * The following kernel syscall equivalents are just wrappers to fs-internal diff --git a/ipc/syscall.c b/ipc/syscall.c index 1ac06e3983c0..3cf8ad703a4d 100644 --- a/ipc/syscall.c +++ b/ipc/syscall.c @@ -17,8 +17,8 @@ #include #include -SYSCALL_DEFINE6(ipc, unsigned int, call, int, first, unsigned long, second, - unsigned long, third, void __user *, ptr, long, fifth) +int ksys_ipc(unsigned int call, int first, unsigned long second, + unsigned long third, void __user * ptr, long fifth) { int version, ret; @@ -106,6 +106,12 @@ SYSCALL_DEFINE6(ipc, unsigned int, call, int, first, unsigned long, second, return -ENOSYS; } } + +SYSCALL_DEFINE6(ipc, unsigned int, call, int, first, unsigned long, second, + unsigned long, third, void __user *, ptr, long, fifth) +{ + return ksys_ipc(call, first, second, third, ptr, fifth); +} #endif #ifdef CONFIG_COMPAT @@ -121,8 +127,8 @@ struct compat_ipc_kludge { }; #ifdef CONFIG_ARCH_WANT_OLD_COMPAT_IPC -COMPAT_SYSCALL_DEFINE6(ipc, u32, call, int, first, int, second, - u32, third, compat_uptr_t, ptr, u32, fifth) +int compat_ksys_ipc(u32 call, int first, int second, + u32 third, compat_uptr_t ptr, u32 fifth) { int version; u32 pad; @@ -195,5 +201,11 @@ COMPAT_SYSCALL_DEFINE6(ipc, u32, call, int, first, int, second, return -ENOSYS; } + +COMPAT_SYSCALL_DEFINE6(ipc, u32, call, int, first, int, second, + u32, third, compat_uptr_t, ptr, u32, fifth) +{ + return compat_ksys_ipc(call, first, second, third, ptr, fifth); +} #endif #endif diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index ab9d0e3c6d50..bc934f31ab10 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -366,6 +366,7 @@ COND_SYSCALL(kexec_file_load); /* s390 */ COND_SYSCALL(s390_pci_mmio_read); COND_SYSCALL(s390_pci_mmio_write); +COND_SYSCALL(s390_ipc); COND_SYSCALL_COMPAT(s390_ipc); /* powerpc */ -- cgit v1.2.3 From 5f620bb6439ea8f354cfe4c7d47887df9d3acaf0 Mon Sep 17 00:00:00 2001 From: Ran Wang Date: Thu, 17 Jan 2019 09:10:55 +0000 Subject: drivers: usb :fsl: Remove USB Errata checking code Remove USB errata checking code from driver. Applicability of erratum is retrieved by reading corresponding property in device tree. This property is written during device tree fixup. Besides, replace spaces with tabs to make code aligned. Signed-off-by: Ramneek Mehresh Signed-off-by: Nikhil Badola Signed-off-by: Yinbo Zhu Signed-off-by: Ran Wang Acked-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/ehci-fsl.c | 7 +------ drivers/usb/host/fsl-mph-dr-of.c | 6 ++++++ include/linux/fsl_devices.h | 7 ++++--- 3 files changed, 11 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/drivers/usb/host/ehci-fsl.c b/drivers/usb/host/ehci-fsl.c index 0a867d96c126..e3d0c1c25160 100644 --- a/drivers/usb/host/ehci-fsl.c +++ b/drivers/usb/host/ehci-fsl.c @@ -304,14 +304,9 @@ static int ehci_fsl_usb_setup(struct ehci_hcd *ehci) return -EINVAL; if (pdata->operating_mode == FSL_USB2_MPH_HOST) { - unsigned int chip, rev, svr; - - svr = mfspr(SPRN_SVR); - chip = svr >> 16; - rev = (svr >> 4) & 0xf; /* Deal with USB Erratum #14 on MPC834x Rev 1.0 & 1.1 chips */ - if ((rev == 1) && (chip >= 0x8050) && (chip <= 0x8055)) + if (pdata->has_fsl_erratum_14 == 1) ehci->has_fsl_port_bug = 1; if (pdata->port_enables & FSL_USB2_PORT0_ENABLED) diff --git a/drivers/usb/host/fsl-mph-dr-of.c b/drivers/usb/host/fsl-mph-dr-of.c index 677f9d592109..4f8b8a08c914 100644 --- a/drivers/usb/host/fsl-mph-dr-of.c +++ b/drivers/usb/host/fsl-mph-dr-of.c @@ -225,6 +225,12 @@ static int fsl_usb2_mph_dr_of_probe(struct platform_device *ofdev) pdata->has_fsl_erratum_a005697 = of_property_read_bool(np, "fsl,usb_erratum-a005697"); + if (of_get_property(np, "fsl,usb_erratum_14", NULL)) + pdata->has_fsl_erratum_14 = 1; + else + pdata->has_fsl_erratum_14 = 0; + + /* * Determine whether phy_clk_valid needs to be checked * by reading property in device tree diff --git a/include/linux/fsl_devices.h b/include/linux/fsl_devices.h index 60cef8227534..5da56a674f2f 100644 --- a/include/linux/fsl_devices.h +++ b/include/linux/fsl_devices.h @@ -98,10 +98,11 @@ struct fsl_usb2_platform_data { unsigned suspended:1; unsigned already_suspended:1; - unsigned has_fsl_erratum_a007792:1; - unsigned has_fsl_erratum_a005275:1; + unsigned has_fsl_erratum_a007792:1; + unsigned has_fsl_erratum_14:1; + unsigned has_fsl_erratum_a005275:1; unsigned has_fsl_erratum_a005697:1; - unsigned check_phy_clk_valid:1; + unsigned check_phy_clk_valid:1; /* register save area for suspend/resume */ u32 pm_command; -- cgit v1.2.3 From 2ff5c5a1dc6e6c502e0a3e49db4e792804e43693 Mon Sep 17 00:00:00 2001 From: Martin Hostettler Date: Sat, 15 Dec 2018 15:34:20 +0100 Subject: vt: refactor vc_ques to allow of other private sequences. The vc_ques keeps track if a csi sequence is a private DEC control function beginning with '?'. Nowadays some private control functions begin with '>' and '='. Switch the code to instead use a new 3-bit vc_priv that allows for all private use parameter prefixes. Signed-off-by: Martin Hostettler Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/vt.c | 20 +++++++++++--------- include/linux/console_struct.h | 2 +- 2 files changed, 12 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index ec55336abf95..b59feeaaf02b 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -1341,6 +1341,8 @@ struct vc_data *vc_deallocate(unsigned int currcons) * VT102 emulator */ +enum { EPecma = 0, EPdec, EPeq, EPgt, EPlt}; + #define set_kbd(vc, x) vt_set_kbd_mode_bit((vc)->vc_num, (x)) #define clr_kbd(vc, x) vt_clr_kbd_mode_bit((vc)->vc_num, (x)) #define is_kbd(vc, x) vt_get_kbd_mode_bit((vc)->vc_num, (x)) @@ -1814,7 +1816,7 @@ static void set_mode(struct vc_data *vc, int on_off) int i; for (i = 0; i <= vc->vc_npar; i++) - if (vc->vc_ques) { + if (vc->vc_priv == EPdec) { switch(vc->vc_par[i]) { /* DEC private modes set/reset */ case 1: /* Cursor keys send ^[Ox/^[[x */ if (on_off) @@ -2030,7 +2032,7 @@ static void reset_terminal(struct vc_data *vc, int do_clear) vc->vc_top = 0; vc->vc_bottom = vc->vc_rows; vc->vc_state = ESnormal; - vc->vc_ques = 0; + vc->vc_priv = EPecma; vc->vc_translate = set_translate(LAT1_MAP, vc); vc->vc_G0_charset = LAT1_MAP; vc->vc_G1_charset = GRAF_MAP; @@ -2234,8 +2236,8 @@ static void do_con_trol(struct tty_struct *tty, struct vc_data *vc, int c) vc->vc_state=ESfunckey; return; } - vc->vc_ques = (c == '?'); - if (vc->vc_ques) + vc->vc_priv = (c == '?') ? EPdec : EPecma; + if (vc->vc_priv != EPecma) return; /* fall through */ case ESgetpars: @@ -2256,7 +2258,7 @@ static void do_con_trol(struct tty_struct *tty, struct vc_data *vc, int c) set_mode(vc, 0); return; case 'c': - if (vc->vc_ques) { + if (vc->vc_priv == EPdec) { if (vc->vc_par[0]) vc->vc_cursor_type = vc->vc_par[0] | (vc->vc_par[1] << 8) | (vc->vc_par[2] << 16); else @@ -2265,7 +2267,7 @@ static void do_con_trol(struct tty_struct *tty, struct vc_data *vc, int c) } break; case 'm': - if (vc->vc_ques) { + if (vc->vc_priv == EPdec) { clear_selection(); if (vc->vc_par[0]) vc->vc_complement_mask = vc->vc_par[0] << 8 | vc->vc_par[1]; @@ -2275,7 +2277,7 @@ static void do_con_trol(struct tty_struct *tty, struct vc_data *vc, int c) } break; case 'n': - if (!vc->vc_ques) { + if (vc->vc_priv == EPecma) { if (vc->vc_par[0] == 5) status_report(tty); else if (vc->vc_par[0] == 6) @@ -2283,8 +2285,8 @@ static void do_con_trol(struct tty_struct *tty, struct vc_data *vc, int c) } return; } - if (vc->vc_ques) { - vc->vc_ques = 0; + if (vc->vc_priv != EPecma) { + vc->vc_priv = EPecma; return; } switch(c) { diff --git a/include/linux/console_struct.h b/include/linux/console_struct.h index ab137f97ecbd..ed798e114663 100644 --- a/include/linux/console_struct.h +++ b/include/linux/console_struct.h @@ -119,7 +119,7 @@ struct vc_data { unsigned int vc_s_blink : 1; unsigned int vc_s_reverse : 1; /* misc */ - unsigned int vc_ques : 1; + unsigned int vc_priv : 3; unsigned int vc_need_wrap : 1; unsigned int vc_can_do_color : 1; unsigned int vc_report_mouse : 2; -- cgit v1.2.3 From 202e651cd43c69a43f75b445e90f55b59f9af0ad Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 15 Jan 2019 22:03:34 +0100 Subject: netfilter: conntrack: gre: convert rwlock to rcu We can use gre. Lock is only needed when a new expectation is added. In case a single spinlock proves to be problematic we can either add one per netns or use an array of locks combined with net_hash_mix() or similar to pick the 'correct' one. But given this is only needed for an expectation rather than per packet a single one should be ok. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nf_conntrack_proto_gre.h | 1 + net/netfilter/nf_conntrack_proto_gre.c | 37 ++++++++++-------------- 2 files changed, 16 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/nf_conntrack_proto_gre.h b/include/linux/netfilter/nf_conntrack_proto_gre.h index 6989e2e4eabf..222c9d3d453f 100644 --- a/include/linux/netfilter/nf_conntrack_proto_gre.h +++ b/include/linux/netfilter/nf_conntrack_proto_gre.h @@ -19,6 +19,7 @@ struct nf_conn; struct nf_ct_gre_keymap { struct list_head list; struct nf_conntrack_tuple tuple; + struct rcu_head rcu; }; enum grep_conntrack { diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index 8899b51aad44..34dd89485be2 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -49,6 +49,8 @@ static const unsigned int gre_timeouts[GRE_CT_MAX] = { }; static unsigned int proto_gre_net_id __read_mostly; +/* used when expectation is added */ +static DEFINE_SPINLOCK(keymap_lock); static inline struct netns_proto_gre *gre_pernet(struct net *net) { @@ -60,12 +62,12 @@ static void nf_ct_gre_keymap_flush(struct net *net) struct netns_proto_gre *net_gre = gre_pernet(net); struct nf_ct_gre_keymap *km, *tmp; - write_lock_bh(&net_gre->keymap_lock); + spin_lock_bh(&keymap_lock); list_for_each_entry_safe(km, tmp, &net_gre->keymap_list, list) { - list_del(&km->list); - kfree(km); + list_del_rcu(&km->list); + kfree_rcu(km, rcu); } - write_unlock_bh(&net_gre->keymap_lock); + spin_unlock_bh(&keymap_lock); } static inline int gre_key_cmpfn(const struct nf_ct_gre_keymap *km, @@ -85,14 +87,12 @@ static __be16 gre_keymap_lookup(struct net *net, struct nf_conntrack_tuple *t) struct nf_ct_gre_keymap *km; __be16 key = 0; - read_lock_bh(&net_gre->keymap_lock); - list_for_each_entry(km, &net_gre->keymap_list, list) { + list_for_each_entry_rcu(km, &net_gre->keymap_list, list) { if (gre_key_cmpfn(km, t)) { key = km->tuple.src.u.gre.key; break; } } - read_unlock_bh(&net_gre->keymap_lock); pr_debug("lookup src key 0x%x for ", key); nf_ct_dump_tuple(t); @@ -112,14 +112,10 @@ int nf_ct_gre_keymap_add(struct nf_conn *ct, enum ip_conntrack_dir dir, kmp = &ct_pptp_info->keymap[dir]; if (*kmp) { /* check whether it's a retransmission */ - read_lock_bh(&net_gre->keymap_lock); - list_for_each_entry(km, &net_gre->keymap_list, list) { - if (gre_key_cmpfn(km, t) && km == *kmp) { - read_unlock_bh(&net_gre->keymap_lock); + list_for_each_entry_rcu(km, &net_gre->keymap_list, list) { + if (gre_key_cmpfn(km, t) && km == *kmp) return 0; - } } - read_unlock_bh(&net_gre->keymap_lock); pr_debug("trying to override keymap_%s for ct %p\n", dir == IP_CT_DIR_REPLY ? "reply" : "orig", ct); return -EEXIST; @@ -134,9 +130,9 @@ int nf_ct_gre_keymap_add(struct nf_conn *ct, enum ip_conntrack_dir dir, pr_debug("adding new entry %p: ", km); nf_ct_dump_tuple(&km->tuple); - write_lock_bh(&net_gre->keymap_lock); + spin_lock_bh(&keymap_lock); list_add_tail(&km->list, &net_gre->keymap_list); - write_unlock_bh(&net_gre->keymap_lock); + spin_unlock_bh(&keymap_lock); return 0; } @@ -145,24 +141,22 @@ EXPORT_SYMBOL_GPL(nf_ct_gre_keymap_add); /* destroy the keymap entries associated with specified master ct */ void nf_ct_gre_keymap_destroy(struct nf_conn *ct) { - struct net *net = nf_ct_net(ct); - struct netns_proto_gre *net_gre = gre_pernet(net); struct nf_ct_pptp_master *ct_pptp_info = nfct_help_data(ct); enum ip_conntrack_dir dir; pr_debug("entering for ct %p\n", ct); - write_lock_bh(&net_gre->keymap_lock); + spin_lock_bh(&keymap_lock); for (dir = IP_CT_DIR_ORIGINAL; dir < IP_CT_DIR_MAX; dir++) { if (ct_pptp_info->keymap[dir]) { pr_debug("removing %p from list\n", ct_pptp_info->keymap[dir]); - list_del(&ct_pptp_info->keymap[dir]->list); - kfree(ct_pptp_info->keymap[dir]); + list_del_rcu(&ct_pptp_info->keymap[dir]->list); + kfree_rcu(ct_pptp_info->keymap[dir], rcu); ct_pptp_info->keymap[dir] = NULL; } } - write_unlock_bh(&net_gre->keymap_lock); + spin_unlock_bh(&keymap_lock); } EXPORT_SYMBOL_GPL(nf_ct_gre_keymap_destroy); @@ -365,7 +359,6 @@ static int gre_init_net(struct net *net) struct nf_proto_net *nf = &net_gre->nf; int i; - rwlock_init(&net_gre->keymap_lock); INIT_LIST_HEAD(&net_gre->keymap_list); for (i = 0; i < GRE_CT_MAX; i++) net_gre->gre_timeouts[i] = gre_timeouts[i]; -- cgit v1.2.3 From 22fc4c4c9fd60427bcda00878cee94e7622cfa7a Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 15 Jan 2019 22:03:35 +0100 Subject: netfilter: conntrack: gre: switch module to be built-in This makes the last of the modular l4 trackers 'bool'. After this, all infrastructure to handle dynamic l4 protocol registration becomes obsolete and can be removed in followup patches. Old: 302824 net/netfilter/nf_conntrack.ko 21504 net/netfilter/nf_conntrack_proto_gre.ko New: 313728 net/netfilter/nf_conntrack.ko Old: text data bss dec hex filename 6281 1732 4 8017 1f51 nf_conntrack_proto_gre.ko 108356 20613 236 129205 1f8b5 nf_conntrack.ko New: 112095 21381 240 133716 20a54 nf_conntrack.ko The size increase is only temporary. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nf_conntrack_proto_gre.h | 14 +--- include/net/netfilter/ipv4/nf_conntrack_ipv4.h | 3 + include/net/netfilter/nf_conntrack_l4proto.h | 7 ++ include/net/netns/conntrack.h | 17 +++++ net/netfilter/Kconfig | 2 +- net/netfilter/Makefile | 3 +- net/netfilter/nf_conntrack_proto.c | 7 +- net/netfilter/nf_conntrack_proto_gre.c | 93 +++++------------------- net/netfilter/nfnetlink_cttimeout.c | 7 +- 9 files changed, 55 insertions(+), 98 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/nf_conntrack_proto_gre.h b/include/linux/netfilter/nf_conntrack_proto_gre.h index 222c9d3d453f..59714e9ee4ef 100644 --- a/include/linux/netfilter/nf_conntrack_proto_gre.h +++ b/include/linux/netfilter/nf_conntrack_proto_gre.h @@ -22,23 +22,11 @@ struct nf_ct_gre_keymap { struct rcu_head rcu; }; -enum grep_conntrack { - GRE_CT_UNREPLIED, - GRE_CT_REPLIED, - GRE_CT_MAX -}; - -struct netns_proto_gre { - struct nf_proto_net nf; - rwlock_t keymap_lock; - struct list_head keymap_list; - unsigned int gre_timeouts[GRE_CT_MAX]; -}; - /* add new tuple->key_reply pair to keymap */ int nf_ct_gre_keymap_add(struct nf_conn *ct, enum ip_conntrack_dir dir, struct nf_conntrack_tuple *t); +void nf_ct_gre_keymap_flush(struct net *net); /* delete keymap entries */ void nf_ct_gre_keymap_destroy(struct nf_conn *ct); diff --git a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h index 135ee702c7b0..2c8c2b023848 100644 --- a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h +++ b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h @@ -22,5 +22,8 @@ extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp; #ifdef CONFIG_NF_CT_PROTO_UDPLITE extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite; #endif +#ifdef CONFIG_NF_CT_PROTO_GRE +extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_gre; +#endif #endif /*_NF_CONNTRACK_IPV4_H*/ diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index 46d554806eb3..fded3f164dcc 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -239,4 +239,11 @@ static inline struct nf_sctp_net *nf_sctp_pernet(struct net *net) } #endif +#ifdef CONFIG_NF_CT_PROTO_GRE +static inline struct nf_gre_net *nf_gre_pernet(struct net *net) +{ + return &net->ct.nf_ct_proto.gre; +} +#endif + #endif /*_NF_CONNTRACK_PROTOCOL_H*/ diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index 51cba0b8adf5..c72f413a2d4d 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -70,6 +70,20 @@ struct nf_sctp_net { }; #endif +#ifdef CONFIG_NF_CT_PROTO_GRE +enum gre_conntrack { + GRE_CT_UNREPLIED, + GRE_CT_REPLIED, + GRE_CT_MAX +}; + +struct nf_gre_net { + struct nf_proto_net nf; + struct list_head keymap_list; + unsigned int timeouts[GRE_CT_MAX]; +}; +#endif + struct nf_ip_net { struct nf_generic_net generic; struct nf_tcp_net tcp; @@ -82,6 +96,9 @@ struct nf_ip_net { #ifdef CONFIG_NF_CT_PROTO_SCTP struct nf_sctp_net sctp; #endif +#ifdef CONFIG_NF_CT_PROTO_GRE + struct nf_gre_net gre; +#endif }; struct ct_pcpu { diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index beb3a69ce1d4..fefd63a243f2 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -174,7 +174,7 @@ config NF_CT_PROTO_DCCP If unsure, say Y. config NF_CT_PROTO_GRE - tristate + bool config NF_CT_PROTO_SCTP bool 'SCTP protocol connection tracking support' diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 1ae65a314d7a..e66067befa42 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -13,6 +13,7 @@ nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o nf_conntrack-$(CONFIG_NF_CONNTRACK_LABELS) += nf_conntrack_labels.o nf_conntrack-$(CONFIG_NF_CT_PROTO_DCCP) += nf_conntrack_proto_dccp.o nf_conntrack-$(CONFIG_NF_CT_PROTO_SCTP) += nf_conntrack_proto_sctp.o +nf_conntrack-$(CONFIG_NF_CT_PROTO_GRE) += nf_conntrack_proto_gre.o obj-$(CONFIG_NETFILTER) = netfilter.o @@ -25,8 +26,6 @@ obj-$(CONFIG_NETFILTER_NETLINK_OSF) += nfnetlink_osf.o # connection tracking obj-$(CONFIG_NF_CONNTRACK) += nf_conntrack.o -obj-$(CONFIG_NF_CT_PROTO_GRE) += nf_conntrack_proto_gre.o - # netlink interface for nf_conntrack obj-$(CONFIG_NF_CT_NETLINK) += nf_conntrack_netlink.o obj-$(CONFIG_NF_CT_NETLINK_TIMEOUT) += nfnetlink_cttimeout.o diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index 2bbc32d939e4..e113bb2dc88d 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -817,6 +817,9 @@ static const struct nf_conntrack_l4proto * const builtin_l4proto[] = { #ifdef CONFIG_NF_CT_PROTO_UDPLITE &nf_conntrack_l4proto_udplite, #endif +#ifdef CONFIG_NF_CT_PROTO_GRE + &nf_conntrack_l4proto_gre, +#endif #if IS_ENABLED(CONFIG_IPV6) &nf_conntrack_l4proto_icmpv6, #endif /* CONFIG_IPV6 */ @@ -897,9 +900,11 @@ void nf_conntrack_proto_pernet_fini(struct net *net) ARRAY_SIZE(builtin_l4proto)); pn->users--; nf_ct_l4proto_unregister_sysctl(pn); +#ifdef CONFIG_NF_CT_PROTO_GRE + nf_ct_gre_keymap_flush(net); +#endif } - module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint, &nf_conntrack_htable_size, 0600); diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index 34dd89485be2..68f9bfb79c4e 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -48,18 +48,17 @@ static const unsigned int gre_timeouts[GRE_CT_MAX] = { [GRE_CT_REPLIED] = 180*HZ, }; -static unsigned int proto_gre_net_id __read_mostly; /* used when expectation is added */ static DEFINE_SPINLOCK(keymap_lock); -static inline struct netns_proto_gre *gre_pernet(struct net *net) +static inline struct nf_gre_net *gre_pernet(struct net *net) { - return net_generic(net, proto_gre_net_id); + return &net->ct.nf_ct_proto.gre; } -static void nf_ct_gre_keymap_flush(struct net *net) +void nf_ct_gre_keymap_flush(struct net *net) { - struct netns_proto_gre *net_gre = gre_pernet(net); + struct nf_gre_net *net_gre = gre_pernet(net); struct nf_ct_gre_keymap *km, *tmp; spin_lock_bh(&keymap_lock); @@ -83,7 +82,7 @@ static inline int gre_key_cmpfn(const struct nf_ct_gre_keymap *km, /* look up the source key for a given tuple */ static __be16 gre_keymap_lookup(struct net *net, struct nf_conntrack_tuple *t) { - struct netns_proto_gre *net_gre = gre_pernet(net); + struct nf_gre_net *net_gre = gre_pernet(net); struct nf_ct_gre_keymap *km; __be16 key = 0; @@ -105,7 +104,7 @@ int nf_ct_gre_keymap_add(struct nf_conn *ct, enum ip_conntrack_dir dir, struct nf_conntrack_tuple *t) { struct net *net = nf_ct_net(ct); - struct netns_proto_gre *net_gre = gre_pernet(net); + struct nf_gre_net *net_gre = gre_pernet(net); struct nf_ct_pptp_master *ct_pptp_info = nfct_help_data(ct); struct nf_ct_gre_keymap **kmp, *km; @@ -210,7 +209,7 @@ static void gre_print_conntrack(struct seq_file *s, struct nf_conn *ct) static unsigned int *gre_get_timeouts(struct net *net) { - return gre_pernet(net)->gre_timeouts; + return gre_pernet(net)->timeouts; } /* Returns verdict for packet, and may modify conntrack */ @@ -272,13 +271,13 @@ static int gre_timeout_nlattr_to_obj(struct nlattr *tb[], struct net *net, void *data) { unsigned int *timeouts = data; - struct netns_proto_gre *net_gre = gre_pernet(net); + struct nf_gre_net *net_gre = gre_pernet(net); if (!timeouts) timeouts = gre_get_timeouts(net); /* set default timeouts for GRE. */ - timeouts[GRE_CT_UNREPLIED] = net_gre->gre_timeouts[GRE_CT_UNREPLIED]; - timeouts[GRE_CT_REPLIED] = net_gre->gre_timeouts[GRE_CT_REPLIED]; + timeouts[GRE_CT_UNREPLIED] = net_gre->timeouts[GRE_CT_UNREPLIED]; + timeouts[GRE_CT_REPLIED] = net_gre->timeouts[GRE_CT_REPLIED]; if (tb[CTA_TIMEOUT_GRE_UNREPLIED]) { timeouts[GRE_CT_UNREPLIED] = @@ -332,10 +331,11 @@ static struct ctl_table gre_sysctl_table[] = { }; #endif -static int gre_kmemdup_sysctl_table(struct net *net, struct nf_proto_net *nf, - struct netns_proto_gre *net_gre) +static int gre_kmemdup_sysctl_table(struct net *net) { #ifdef CONFIG_SYSCTL + struct nf_gre_net *net_gre = gre_pernet(net); + struct nf_proto_net *nf = &net_gre->nf; int i; if (nf->ctl_table) @@ -348,26 +348,25 @@ static int gre_kmemdup_sysctl_table(struct net *net, struct nf_proto_net *nf, return -ENOMEM; for (i = 0; i < GRE_CT_MAX; i++) - nf->ctl_table[i].data = &net_gre->gre_timeouts[i]; + nf->ctl_table[i].data = &net_gre->timeouts[i]; #endif return 0; } static int gre_init_net(struct net *net) { - struct netns_proto_gre *net_gre = gre_pernet(net); - struct nf_proto_net *nf = &net_gre->nf; + struct nf_gre_net *net_gre = gre_pernet(net); int i; INIT_LIST_HEAD(&net_gre->keymap_list); for (i = 0; i < GRE_CT_MAX; i++) - net_gre->gre_timeouts[i] = gre_timeouts[i]; + net_gre->timeouts[i] = gre_timeouts[i]; - return gre_kmemdup_sysctl_table(net, nf, net_gre); + return gre_kmemdup_sysctl_table(net); } /* protocol helper struct */ -static const struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 = { +const struct nf_conntrack_l4proto nf_conntrack_l4proto_gre = { .l4proto = IPPROTO_GRE, .pkt_to_tuple = gre_pkt_to_tuple, #ifdef CONFIG_NF_CONNTRACK_PROCFS @@ -391,61 +390,5 @@ static const struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 = { .nla_policy = gre_timeout_nla_policy, }, #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ - .net_id = &proto_gre_net_id, .init_net = gre_init_net, }; - -static int proto_gre_net_init(struct net *net) -{ - int ret = 0; - - ret = nf_ct_l4proto_pernet_register_one(net, - &nf_conntrack_l4proto_gre4); - if (ret < 0) - pr_err("nf_conntrack_gre4: pernet registration failed.\n"); - return ret; -} - -static void proto_gre_net_exit(struct net *net) -{ - nf_ct_l4proto_pernet_unregister_one(net, &nf_conntrack_l4proto_gre4); - nf_ct_gre_keymap_flush(net); -} - -static struct pernet_operations proto_gre_net_ops = { - .init = proto_gre_net_init, - .exit = proto_gre_net_exit, - .id = &proto_gre_net_id, - .size = sizeof(struct netns_proto_gre), -}; - -static int __init nf_ct_proto_gre_init(void) -{ - int ret; - - BUILD_BUG_ON(offsetof(struct netns_proto_gre, nf) != 0); - - ret = register_pernet_subsys(&proto_gre_net_ops); - if (ret < 0) - goto out_pernet; - ret = nf_ct_l4proto_register_one(&nf_conntrack_l4proto_gre4); - if (ret < 0) - goto out_gre4; - - return 0; -out_gre4: - unregister_pernet_subsys(&proto_gre_net_ops); -out_pernet: - return ret; -} - -static void __exit nf_ct_proto_gre_fini(void) -{ - nf_ct_l4proto_unregister_one(&nf_conntrack_l4proto_gre4); - unregister_pernet_subsys(&proto_gre_net_ops); -} - -module_init(nf_ct_proto_gre_init); -module_exit(nf_ct_proto_gre_fini); - -MODULE_LICENSE("GPL"); diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index 109b0d27345a..0e3e1a018206 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -474,12 +474,7 @@ static int cttimeout_default_get(struct net *net, struct sock *ctnl, break; case IPPROTO_GRE: #ifdef CONFIG_NF_CT_PROTO_GRE - if (l4proto->net_id) { - struct netns_proto_gre *net_gre; - - net_gre = net_generic(net, *l4proto->net_id); - timeouts = net_gre->gre_timeouts; - } + timeouts = nf_gre_pernet(net)->timeouts; #endif break; case 255: -- cgit v1.2.3 From df5e1629087a45ca915fa0f69ea662175261855e Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 15 Jan 2019 22:03:37 +0100 Subject: netfilter: conntrack: remove pkt_to_tuple callback GRE is now builtin, so we can handle it via direct call and remove the callback. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nf_conntrack_proto_gre.h | 2 ++ include/net/netfilter/nf_conntrack_l4proto.h | 5 ----- net/netfilter/nf_conntrack_core.c | 6 ++++-- net/netfilter/nf_conntrack_proto_generic.c | 11 ----------- net/netfilter/nf_conntrack_proto_gre.c | 5 ++--- 5 files changed, 8 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/nf_conntrack_proto_gre.h b/include/linux/netfilter/nf_conntrack_proto_gre.h index 59714e9ee4ef..25f9a770fb84 100644 --- a/include/linux/netfilter/nf_conntrack_proto_gre.h +++ b/include/linux/netfilter/nf_conntrack_proto_gre.h @@ -30,5 +30,7 @@ void nf_ct_gre_keymap_flush(struct net *net); /* delete keymap entries */ void nf_ct_gre_keymap_destroy(struct nf_conn *ct); +bool gre_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, + struct net *net, struct nf_conntrack_tuple *tuple); #endif /* __KERNEL__ */ #endif /* _CONNTRACK_PROTO_GRE_H */ diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index 3585f8666fc0..0d4b0398aeb9 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -27,11 +27,6 @@ struct nf_conntrack_l4proto { /* protoinfo nlattr size, closes a hole */ u16 nlattr_size; - /* Try to fill in the third arg: dataoff is offset past network protocol - hdr. Return true if possible. */ - bool (*pkt_to_tuple)(const struct sk_buff *skb, unsigned int dataoff, - struct net *net, struct nf_conntrack_tuple *tuple); - /* Invert the per-proto part of the tuple: ie. turn xmit into reply. * Only used by icmp, most protocols use a generic version. */ diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index b3840d36c3a6..b71e271f2b44 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -279,9 +279,11 @@ nf_ct_get_tuple(const struct sk_buff *skb, return icmpv6_pkt_to_tuple(skb, dataoff, net, tuple); case IPPROTO_ICMP: return icmp_pkt_to_tuple(skb, dataoff, net, tuple); +#ifdef CONFIG_NF_CT_PROTO_GRE + case IPPROTO_GRE: + return gre_pkt_to_tuple(skb, dataoff, net, tuple); +#endif } - if (unlikely(l4proto->pkt_to_tuple)) - return l4proto->pkt_to_tuple(skb, dataoff, net, tuple); /* Actually only need first 4 bytes to get ports. */ inet_hdr = skb_header_pointer(skb, dataoff, sizeof(_inet_hdr), &_inet_hdr); diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c index 5da19d5fbc76..5a5bf7cb6508 100644 --- a/net/netfilter/nf_conntrack_proto_generic.c +++ b/net/netfilter/nf_conntrack_proto_generic.c @@ -27,16 +27,6 @@ static bool nf_generic_should_process(u8 proto) } } -static bool generic_pkt_to_tuple(const struct sk_buff *skb, - unsigned int dataoff, - struct net *net, struct nf_conntrack_tuple *tuple) -{ - tuple->src.u.all = 0; - tuple->dst.u.all = 0; - - return true; -} - /* Returns verdict for packet, or -1 for invalid. */ static int generic_packet(struct nf_conn *ct, struct sk_buff *skb, @@ -149,7 +139,6 @@ static struct nf_proto_net *generic_get_net_proto(struct net *net) const struct nf_conntrack_l4proto nf_conntrack_l4proto_generic = { .l4proto = 255, - .pkt_to_tuple = generic_pkt_to_tuple, .packet = generic_packet, #ifdef CONFIG_NF_CONNTRACK_TIMEOUT .ctnl_timeout = { diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index 68f9bfb79c4e..04bc982b274d 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -162,8 +162,8 @@ EXPORT_SYMBOL_GPL(nf_ct_gre_keymap_destroy); /* PUBLIC CONNTRACK PROTO HELPER FUNCTIONS */ /* gre hdr info to tuple */ -static bool gre_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, - struct net *net, struct nf_conntrack_tuple *tuple) +bool gre_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, + struct net *net, struct nf_conntrack_tuple *tuple) { const struct pptp_gre_header *pgrehdr; struct pptp_gre_header _pgrehdr; @@ -368,7 +368,6 @@ static int gre_init_net(struct net *net) /* protocol helper struct */ const struct nf_conntrack_l4proto nf_conntrack_l4proto_gre = { .l4proto = IPPROTO_GRE, - .pkt_to_tuple = gre_pkt_to_tuple, #ifdef CONFIG_NF_CONNTRACK_PROCFS .print_conntrack = gre_print_conntrack, #endif -- cgit v1.2.3 From 570d0200123fb4f809aa2f6226e93a458d664d70 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Fri, 18 Jan 2019 10:34:59 +0800 Subject: driver core: move device->knode_class to device_private As the description of struct device_private says, it stores data which is private to driver core. And it already has similar fields like: knode_parent, knode_driver, knode_driver and knode_bus. This look it is more proper to put knode_class together with those fields to make it private to driver core. This patch move device->knode_class to device_private to make it comply with code convention. Signed-off-by: Wei Yang Reviewed-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/base/base.h | 4 ++++ drivers/base/class.c | 14 ++++++++++---- drivers/base/core.c | 4 ++-- include/linux/device.h | 1 - 4 files changed, 16 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/base.h b/drivers/base/base.h index 7a419a7a6235..37329a668935 100644 --- a/drivers/base/base.h +++ b/drivers/base/base.h @@ -60,6 +60,7 @@ struct driver_private { * @knode_parent - node in sibling list * @knode_driver - node in driver list * @knode_bus - node in bus list + * @knode_class - node in class list * @deferred_probe - entry in deferred_probe_list which is used to retry the * binding of drivers which were unable to get all the resources needed by * the device; typically because it depends on another driver getting @@ -74,6 +75,7 @@ struct device_private { struct klist_node knode_parent; struct klist_node knode_driver; struct klist_node knode_bus; + struct klist_node knode_class; struct list_head deferred_probe; struct device *device; }; @@ -83,6 +85,8 @@ struct device_private { container_of(obj, struct device_private, knode_driver) #define to_device_private_bus(obj) \ container_of(obj, struct device_private, knode_bus) +#define to_device_private_class(obj) \ + container_of(obj, struct device_private, knode_class) /* initialisation functions */ extern int devices_init(void); diff --git a/drivers/base/class.c b/drivers/base/class.c index 54def4e02f00..d8a6a5864c2e 100644 --- a/drivers/base/class.c +++ b/drivers/base/class.c @@ -117,16 +117,22 @@ static void class_put(struct class *cls) kset_put(&cls->p->subsys); } +static struct device *klist_class_to_dev(struct klist_node *n) +{ + struct device_private *p = to_device_private_class(n); + return p->device; +} + static void klist_class_dev_get(struct klist_node *n) { - struct device *dev = container_of(n, struct device, knode_class); + struct device *dev = klist_class_to_dev(n); get_device(dev); } static void klist_class_dev_put(struct klist_node *n) { - struct device *dev = container_of(n, struct device, knode_class); + struct device *dev = klist_class_to_dev(n); put_device(dev); } @@ -277,7 +283,7 @@ void class_dev_iter_init(struct class_dev_iter *iter, struct class *class, struct klist_node *start_knode = NULL; if (start) - start_knode = &start->knode_class; + start_knode = &start->p->knode_class; klist_iter_init_node(&class->p->klist_devices, &iter->ki, start_knode); iter->type = type; } @@ -304,7 +310,7 @@ struct device *class_dev_iter_next(struct class_dev_iter *iter) knode = klist_next(&iter->ki); if (!knode) return NULL; - dev = container_of(knode, struct device, knode_class); + dev = klist_class_to_dev(knode); if (!iter->type || iter->type == dev->type) return dev; } diff --git a/drivers/base/core.c b/drivers/base/core.c index 0073b09bb99f..4a4b6f8cbc4f 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -1966,7 +1966,7 @@ int device_add(struct device *dev) if (dev->class) { mutex_lock(&dev->class->p->mutex); /* tie the class to the device */ - klist_add_tail(&dev->knode_class, + klist_add_tail(&dev->p->knode_class, &dev->class->p->klist_devices); /* notify any interfaces that the device is here */ @@ -2105,7 +2105,7 @@ void device_del(struct device *dev) if (class_intf->remove_dev) class_intf->remove_dev(dev, class_intf); /* remove the device from the class list */ - klist_del(&dev->knode_class); + klist_del(&dev->p->knode_class); mutex_unlock(&dev->class->p->mutex); } device_remove_file(dev, &dev_attr_uevent); diff --git a/include/linux/device.h b/include/linux/device.h index 6cb4640b6160..d0e452fd0bff 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -1035,7 +1035,6 @@ struct device { spinlock_t devres_lock; struct list_head devres_head; - struct klist_node knode_class; struct class *class; const struct attribute_group **groups; /* optional groups */ -- cgit v1.2.3 From 1cfb2a512e74e577bb0ed7c8d76df90a41a83f6a Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Fri, 18 Jan 2019 19:15:59 +0900 Subject: LSM: Make lsm_early_cred() and lsm_early_task() local functions. Since current->cred == current->real_cred when ordered_lsm_init() is called, and lsm_early_cred()/lsm_early_task() need to be called between the amount of required bytes is determined and module specific initialization function is called, we can move these calls from individual modules to ordered_lsm_init(). Signed-off-by: Tetsuo Handa Acked-by: Casey Schaufler Signed-off-by: James Morris --- include/linux/lsm_hooks.h | 5 ----- security/apparmor/lsm.c | 2 -- security/security.c | 27 +++++++++++---------------- security/selinux/hooks.c | 1 - security/smack/smack_lsm.c | 2 -- security/tomoyo/tomoyo.c | 1 - 6 files changed, 11 insertions(+), 27 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 195707210975..22fc786d723a 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -2112,9 +2112,4 @@ static inline void security_delete_hooks(struct security_hook_list *hooks, extern int lsm_inode_alloc(struct inode *inode); -#ifdef CONFIG_SECURITY -void __init lsm_early_cred(struct cred *cred); -void __init lsm_early_task(struct task_struct *task); -#endif - #endif /* ! __LINUX_LSM_HOOKS_H */ diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index b6c395e2acd0..bb5a02d2439f 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -1484,8 +1484,6 @@ static int __init set_init_ctx(void) { struct cred *cred = (struct cred *)current->real_cred; - lsm_early_cred(cred); - lsm_early_task(current); set_cred_label(cred, aa_get_label(ns_unconfined(root_ns))); return 0; diff --git a/security/security.c b/security/security.c index a618e22df5c6..992b612c819a 100644 --- a/security/security.c +++ b/security/security.c @@ -278,6 +278,9 @@ static void __init ordered_lsm_parse(const char *order, const char *origin) kfree(sep); } +static void __init lsm_early_cred(struct cred *cred); +static void __init lsm_early_task(struct task_struct *task); + static void __init ordered_lsm_init(void) { struct lsm_info **lsm; @@ -312,6 +315,8 @@ static void __init ordered_lsm_init(void) blob_sizes.lbs_inode, 0, SLAB_PANIC, NULL); + lsm_early_cred((struct cred *) current->cred); + lsm_early_task(current); for (lsm = ordered_lsms; *lsm; lsm++) initialize_lsm(*lsm); @@ -465,17 +470,12 @@ static int lsm_cred_alloc(struct cred *cred, gfp_t gfp) * lsm_early_cred - during initialization allocate a composite cred blob * @cred: the cred that needs a blob * - * Allocate the cred blob for all the modules if it's not already there + * Allocate the cred blob for all the modules */ -void __init lsm_early_cred(struct cred *cred) +static void __init lsm_early_cred(struct cred *cred) { - int rc; + int rc = lsm_cred_alloc(cred, GFP_KERNEL); - if (cred == NULL) - panic("%s: NULL cred.\n", __func__); - if (cred->security != NULL) - return; - rc = lsm_cred_alloc(cred, GFP_KERNEL); if (rc) panic("%s: Early cred alloc failed.\n", __func__); } @@ -589,17 +589,12 @@ int lsm_msg_msg_alloc(struct msg_msg *mp) * lsm_early_task - during initialization allocate a composite task blob * @task: the task that needs a blob * - * Allocate the task blob for all the modules if it's not already there + * Allocate the task blob for all the modules */ -void __init lsm_early_task(struct task_struct *task) +static void __init lsm_early_task(struct task_struct *task) { - int rc; + int rc = lsm_task_alloc(task); - if (task == NULL) - panic("%s: task cred.\n", __func__); - if (task->security != NULL) - return; - rc = lsm_task_alloc(task); if (rc) panic("%s: Early task alloc failed.\n", __func__); } diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index b2ee49f938f1..5d92167dbe05 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -207,7 +207,6 @@ static void cred_init_security(void) struct cred *cred = (struct cred *) current->real_cred; struct task_security_struct *tsec; - lsm_early_cred(cred); tsec = selinux_cred(cred); tsec->osid = tsec->sid = SECINITSID_KERNEL; } diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index 0b848b1f6366..79d6d2a6a0bc 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -4671,8 +4671,6 @@ static __init int smack_init(void) if (!smack_inode_cache) return -ENOMEM; - lsm_early_cred(cred); - /* * Set the security state for the initial task. */ diff --git a/security/tomoyo/tomoyo.c b/security/tomoyo/tomoyo.c index 066c0daf0efc..2b3eee06004b 100644 --- a/security/tomoyo/tomoyo.c +++ b/security/tomoyo/tomoyo.c @@ -566,7 +566,6 @@ static int __init tomoyo_init(void) /* register ourselves with the security framework */ security_add_hooks(tomoyo_hooks, ARRAY_SIZE(tomoyo_hooks), "tomoyo"); printk(KERN_INFO "TOMOYO Linux initialized\n"); - lsm_early_cred(cred); blob = tomoyo_cred(cred); *blob = &tomoyo_kernel_domain; tomoyo_mm_init(); -- cgit v1.2.3 From 7527a7b157d1191b23562ed70154ae93bd65f845 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Thu, 17 Jan 2019 20:14:15 +0200 Subject: IB/core: Simplify rdma cgroup registration RDMA cgroup registration routine always returns success, so simplify function to be void and run clang formatter over whole CONFIG_CGROUP_RDMA art of core_priv.h. This reduces unwinding error path for regular registration and future net namespace change functionality for rdma device. Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Acked-by: Tejun Heo Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cgroup.c | 5 ++--- drivers/infiniband/core/core_priv.h | 17 +++++++++++------ drivers/infiniband/core/device.c | 8 +------- include/linux/cgroup_rdma.h | 2 +- kernel/cgroup/rdma.c | 5 +---- 5 files changed, 16 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/core/cgroup.c b/drivers/infiniband/core/cgroup.c index 126ac5f99db7..388fd04e5f63 100644 --- a/drivers/infiniband/core/cgroup.c +++ b/drivers/infiniband/core/cgroup.c @@ -21,12 +21,11 @@ * Register with the rdma cgroup. Should be called before * exposing rdma device to user space applications to avoid * resource accounting leak. - * Returns 0 on success or otherwise failure code. */ -int ib_device_register_rdmacg(struct ib_device *device) +void ib_device_register_rdmacg(struct ib_device *device) { device->cg_device.name = device->name; - return rdmacg_register_device(&device->cg_device); + rdmacg_register_device(&device->cg_device); } /** diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index aca75c74e451..42a49982f66e 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -115,7 +115,7 @@ void ib_cache_cleanup_one(struct ib_device *device); void ib_cache_release_one(struct ib_device *device); #ifdef CONFIG_CGROUP_RDMA -int ib_device_register_rdmacg(struct ib_device *device); +void ib_device_register_rdmacg(struct ib_device *device); void ib_device_unregister_rdmacg(struct ib_device *device); int ib_rdmacg_try_charge(struct ib_rdmacg_object *cg_obj, @@ -126,21 +126,26 @@ void ib_rdmacg_uncharge(struct ib_rdmacg_object *cg_obj, struct ib_device *device, enum rdmacg_resource_type resource_index); #else -static inline int ib_device_register_rdmacg(struct ib_device *device) -{ return 0; } +static inline void ib_device_register_rdmacg(struct ib_device *device) +{ +} static inline void ib_device_unregister_rdmacg(struct ib_device *device) -{ } +{ +} static inline int ib_rdmacg_try_charge(struct ib_rdmacg_object *cg_obj, struct ib_device *device, enum rdmacg_resource_type resource_index) -{ return 0; } +{ + return 0; +} static inline void ib_rdmacg_uncharge(struct ib_rdmacg_object *cg_obj, struct ib_device *device, enum rdmacg_resource_type resource_index) -{ } +{ +} #endif static inline bool rdma_is_upper_dev_rcu(struct net_device *dev, diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 4a9aa6d10c5e..200431c540f2 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -599,12 +599,7 @@ int ib_register_device(struct ib_device *device, const char *name) device->index = __dev_new_index(); - ret = ib_device_register_rdmacg(device); - if (ret) { - dev_warn(&device->dev, - "Couldn't register device with rdma cgroup\n"); - goto dev_cleanup; - } + ib_device_register_rdmacg(device); ret = ib_device_register_sysfs(device); if (ret) { @@ -627,7 +622,6 @@ int ib_register_device(struct ib_device *device, const char *name) cg_cleanup: ib_device_unregister_rdmacg(device); -dev_cleanup: cleanup_device(device); out: mutex_unlock(&device_mutex); diff --git a/include/linux/cgroup_rdma.h b/include/linux/cgroup_rdma.h index e94290b29e99..ef1bae2983f3 100644 --- a/include/linux/cgroup_rdma.h +++ b/include/linux/cgroup_rdma.h @@ -39,7 +39,7 @@ struct rdmacg_device { * APIs for RDMA/IB stack to publish when a device wants to * participate in resource accounting */ -int rdmacg_register_device(struct rdmacg_device *device); +void rdmacg_register_device(struct rdmacg_device *device); void rdmacg_unregister_device(struct rdmacg_device *device); /* APIs for RDMA/IB stack to charge/uncharge pool specific resources */ diff --git a/kernel/cgroup/rdma.c b/kernel/cgroup/rdma.c index d3bbb757ee49..1d75ae7f1cb7 100644 --- a/kernel/cgroup/rdma.c +++ b/kernel/cgroup/rdma.c @@ -313,10 +313,8 @@ EXPORT_SYMBOL(rdmacg_try_charge); * If IB stack wish a device to participate in rdma cgroup resource * tracking, it must invoke this API to register with rdma cgroup before * any user space application can start using the RDMA resources. - * Returns 0 on success or EINVAL when table length given is beyond - * supported size. */ -int rdmacg_register_device(struct rdmacg_device *device) +void rdmacg_register_device(struct rdmacg_device *device) { INIT_LIST_HEAD(&device->dev_node); INIT_LIST_HEAD(&device->rpools); @@ -324,7 +322,6 @@ int rdmacg_register_device(struct rdmacg_device *device) mutex_lock(&rdmacg_mutex); list_add_tail(&device->dev_node, &rdmacg_devices); mutex_unlock(&rdmacg_mutex); - return 0; } EXPORT_SYMBOL(rdmacg_register_device); -- cgit v1.2.3 From e302c2a5fe0ca63b8fcc93389917625f486e0670 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Wed, 16 Jan 2019 19:47:57 +0100 Subject: net: phy: remove state PHY_CHANGELINK Since recent changes to the phylib state machine state PHY_CHANGELINK isn't used any longer. Therefore let's remove it. Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- drivers/net/phy/phy.c | 2 -- include/linux/phy.h | 6 ------ 2 files changed, 8 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 241fb83ef4de..b0632e859564 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -47,7 +47,6 @@ static const char *phy_state_to_str(enum phy_state st) PHY_STATE_STR(RUNNING) PHY_STATE_STR(NOLINK) PHY_STATE_STR(FORCING) - PHY_STATE_STR(CHANGELINK) PHY_STATE_STR(HALTED) PHY_STATE_STR(RESUMING) } @@ -939,7 +938,6 @@ void phy_state_machine(struct work_struct *work) break; case PHY_NOLINK: case PHY_RUNNING: - case PHY_CHANGELINK: case PHY_RESUMING: err = phy_check_link_status(phydev); break; diff --git a/include/linux/phy.h b/include/linux/phy.h index f1c19bf8c658..232d93b9cea4 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -304,11 +304,6 @@ struct phy_device *mdiobus_scan(struct mii_bus *bus, int addr); * - irq or timer will set NOLINK if link goes down * - phy_stop moves to HALTED * - * CHANGELINK: PHY experienced a change in link state - * - timer moves to RUNNING if link - * - timer moves to NOLINK if the link is down - * - phy_stop moves to HALTED - * * HALTED: PHY is up, but no polling or interrupts are done. Or * PHY is in an error state. * @@ -327,7 +322,6 @@ enum phy_state { PHY_RUNNING, PHY_NOLINK, PHY_FORCING, - PHY_CHANGELINK, PHY_RESUMING }; -- cgit v1.2.3 From bb658ab7b8f2828b35c207a95cb0c05965721022 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Thu, 17 Jan 2019 20:09:21 +0100 Subject: net: phy: remove phy_stop_interrupts Interrupts have been disabled in phy_stop() already. So we can remove phy_stop_interrupts() and free the interrupt in phy_disconnect() directly. Signed-off-by: Heiner Kallweit Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/phy.c | 17 ----------------- drivers/net/phy/phy_device.c | 4 ++-- include/linux/phy.h | 1 - 3 files changed, 2 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 37cf39fdcc91..f7a92e7edff7 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -818,23 +818,6 @@ int phy_start_interrupts(struct phy_device *phydev) } EXPORT_SYMBOL(phy_start_interrupts); -/** - * phy_stop_interrupts - disable interrupts from a PHY device - * @phydev: target phy_device struct - */ -int phy_stop_interrupts(struct phy_device *phydev) -{ - int err = phy_disable_interrupts(phydev); - - if (err) - phy_error(phydev); - - free_irq(phydev->irq, phydev); - - return err; -} -EXPORT_SYMBOL(phy_stop_interrupts); - /** * phy_stop - Bring down the PHY link, and stop checking the status * @phydev: target phy_device struct diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index e269a355012d..7b3164174251 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -1002,8 +1002,8 @@ void phy_disconnect(struct phy_device *phydev) if (phy_is_started(phydev)) phy_stop(phydev); - if (phydev->irq > 0) - phy_stop_interrupts(phydev); + if (phy_interrupt_is_valid(phydev)) + free_irq(phydev->irq, phydev); phydev->adjust_link = NULL; diff --git a/include/linux/phy.h b/include/linux/phy.h index 232d93b9cea4..0990f913d649 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -951,7 +951,6 @@ int phy_aneg_done(struct phy_device *phydev); int phy_speed_down(struct phy_device *phydev, bool sync); int phy_speed_up(struct phy_device *phydev); -int phy_stop_interrupts(struct phy_device *phydev); int phy_restart_aneg(struct phy_device *phydev); int phy_reset_after_clk_enable(struct phy_device *phydev); -- cgit v1.2.3 From 59c28058fa7bb1cc7ab8b2c5607093cbbefafeb4 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 18 Jan 2019 10:46:13 -0800 Subject: net: netlink: add helper to retrieve NETLINK_F_STRICT_CHK Dumps can read state of the NETLINK_F_STRICT_CHK flag from a field in the callback structure. For non-dump GET requests we need a way to access the state of that flag from a socket. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/linux/netlink.h | 1 + net/netlink/af_netlink.c | 8 ++++++++ 2 files changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 4e8add270200..593d1b9c33a8 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -126,6 +126,7 @@ void __netlink_clear_multicast_users(struct sock *sk, unsigned int group); void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err, const struct netlink_ext_ack *extack); int netlink_has_listeners(struct sock *sk, unsigned int group); +bool netlink_strict_get_check(struct sk_buff *skb); int netlink_unicast(struct sock *ssk, struct sk_buff *skb, __u32 portid, int nonblock); int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, __u32 portid, diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 3c023d6120f6..8fa35df94c07 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1371,6 +1371,14 @@ int netlink_has_listeners(struct sock *sk, unsigned int group) } EXPORT_SYMBOL_GPL(netlink_has_listeners); +bool netlink_strict_get_check(struct sk_buff *skb) +{ + const struct netlink_sock *nlk = nlk_sk(NETLINK_CB(skb).sk); + + return nlk->flags & NETLINK_F_STRICT_CHK; +} +EXPORT_SYMBOL_GPL(netlink_strict_get_check); + static int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb) { struct netlink_sock *nlk = nlk_sk(sk); -- cgit v1.2.3 From f5d782d46aa5d4dd369e6560ce5227136b58926f Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Thu, 13 Dec 2018 02:38:58 +0100 Subject: power: supply: isp1704: switch to gpiod API This migrates isp1704 driver from old GPIO API to new descriptor based GPIO API and drops useless platform data as a side-effect. Migration is simple, since all mainline users are DT based and DT API does not change. Out of tree users of the platform data need to migrate to gpiod_lookup_table as described here: Documentation/driver-api/gpio/board.rst Reviewed-by: Linus Walleij Acked-by: Pavel Machek Signed-off-by: Sebastian Reichel --- drivers/power/supply/isp1704_charger.c | 60 +++++++--------------------------- include/linux/power/isp1704_charger.h | 30 ----------------- 2 files changed, 12 insertions(+), 78 deletions(-) delete mode 100644 include/linux/power/isp1704_charger.h (limited to 'include/linux') diff --git a/drivers/power/supply/isp1704_charger.c b/drivers/power/supply/isp1704_charger.c index 95af5f305838..a63cb5dcfa08 100644 --- a/drivers/power/supply/isp1704_charger.c +++ b/drivers/power/supply/isp1704_charger.c @@ -30,13 +30,12 @@ #include #include #include -#include +#include #include #include #include #include -#include /* Vendor specific Power Control register */ #define ISP1704_PWR_CTRL 0x3d @@ -60,6 +59,7 @@ struct isp1704_charger { struct device *dev; struct power_supply *psy; struct power_supply_desc psy_desc; + struct gpio_desc *enable_gpio; struct usb_phy *phy; struct notifier_block nb; struct work_struct work; @@ -81,18 +81,9 @@ static inline int isp1704_write(struct isp1704_charger *isp, u32 reg, u32 val) return usb_phy_io_write(isp->phy, val, reg); } -/* - * Disable/enable the power from the isp1704 if a function for it - * has been provided with platform data. - */ static void isp1704_charger_set_power(struct isp1704_charger *isp, bool on) { - struct isp1704_charger_data *board = isp->dev->platform_data; - - if (board && board->set_power) - board->set_power(on); - else if (board) - gpio_set_value(board->enable_gpio, on); + gpiod_set_value(isp->enable_gpio, on); } /* @@ -405,46 +396,19 @@ static int isp1704_charger_probe(struct platform_device *pdev) int ret = -ENODEV; struct power_supply_config psy_cfg = {}; - struct isp1704_charger_data *pdata = dev_get_platdata(&pdev->dev); - struct device_node *np = pdev->dev.of_node; - - if (np) { - int gpio = of_get_named_gpio(np, "nxp,enable-gpio", 0); - - if (gpio < 0) { - dev_err(&pdev->dev, "missing DT GPIO nxp,enable-gpio\n"); - return gpio; - } - - pdata = devm_kzalloc(&pdev->dev, - sizeof(struct isp1704_charger_data), GFP_KERNEL); - if (!pdata) { - ret = -ENOMEM; - goto fail0; - } - pdata->enable_gpio = gpio; - - dev_info(&pdev->dev, "init gpio %d\n", pdata->enable_gpio); - - ret = devm_gpio_request_one(&pdev->dev, pdata->enable_gpio, - GPIOF_OUT_INIT_HIGH, "isp1704_reset"); - if (ret) { - dev_err(&pdev->dev, "gpio request failed\n"); - goto fail0; - } - } - - if (!pdata) { - dev_err(&pdev->dev, "missing platform data!\n"); - return -ENODEV; - } - - isp = devm_kzalloc(&pdev->dev, sizeof(*isp), GFP_KERNEL); if (!isp) return -ENOMEM; - if (np) + isp->enable_gpio = devm_gpiod_get(&pdev->dev, "nxp,enable", + GPIOD_OUT_HIGH); + if (IS_ERR(isp->enable_gpio)) { + ret = PTR_ERR(isp->enable_gpio); + dev_err(&pdev->dev, "Could not get reset gpio: %d\n", ret); + return ret; + } + + if (pdev->dev.of_node) isp->phy = devm_usb_get_phy_by_phandle(&pdev->dev, "usb-phy", 0); else isp->phy = devm_usb_get_phy(&pdev->dev, USB_PHY_TYPE_USB2); diff --git a/include/linux/power/isp1704_charger.h b/include/linux/power/isp1704_charger.h deleted file mode 100644 index 0105d9e7af85..000000000000 --- a/include/linux/power/isp1704_charger.h +++ /dev/null @@ -1,30 +0,0 @@ -/* - * ISP1704 USB Charger Detection driver - * - * Copyright (C) 2011 Nokia Corporation - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - - -#ifndef __ISP1704_CHARGER_H -#define __ISP1704_CHARGER_H - -struct isp1704_charger_data { - void (*set_power)(bool on); - int enable_gpio; -}; - -#endif -- cgit v1.2.3 From 486efe9f8e30bac1e236f867df164f4966f3e207 Mon Sep 17 00:00:00 2001 From: Andrew Murray Date: Thu, 10 Jan 2019 13:53:24 +0000 Subject: perf/core: Add function to test for event exclusion flags Add a function that tests if any of the perf event exclusion flags are set on a given event. Signed-off-by: Andrew Murray Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Benjamin Herrenschmidt Cc: Borislav Petkov Cc: Ivan Kokshaysky Cc: Linus Torvalds Cc: Mark Rutland Cc: Matt Turner Cc: Michael Ellerman Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Richard Henderson Cc: Russell King Cc: Sascha Hauer Cc: Shawn Guo Cc: Thomas Gleixner Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linuxppc-dev@lists.ozlabs.org Cc: robin.murphy@arm.com Cc: suzuki.poulose@arm.com Link: https://lkml.kernel.org/r/1547128414-50693-3-git-send-email-andrew.murray@arm.com Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 1d5c551a5add..54a78d22f0a6 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1004,6 +1004,15 @@ perf_event__output_id_sample(struct perf_event *event, extern void perf_log_lost_samples(struct perf_event *event, u64 lost); +static inline bool event_has_any_exclude_flag(struct perf_event *event) +{ + struct perf_event_attr *attr = &event->attr; + + return attr->exclude_idle || attr->exclude_user || + attr->exclude_kernel || attr->exclude_hv || + attr->exclude_guest || attr->exclude_host; +} + static inline bool is_sampling_event(struct perf_event *event) { return event->attr.sample_period != 0; -- cgit v1.2.3 From cc6795aeffea0a80d0baf9ad31ba926a6c42cef5 Mon Sep 17 00:00:00 2001 From: Andrew Murray Date: Thu, 10 Jan 2019 13:53:25 +0000 Subject: perf/core: Add PERF_PMU_CAP_NO_EXCLUDE for exclusion incapable PMUs Many PMU drivers do not have the capability to exclude counting events that occur in specific contexts such as idle, kernel, guest, etc. These drivers indicate this by returning an error in their event_init upon testing the events attribute flags. This approach is error prone and often inconsistent. Let's instead allow PMU drivers to advertise their inability to exclude based on context via a new capability: PERF_PMU_CAP_NO_EXCLUDE. This allows the perf core to reject requests for exclusion events where there is no support in the PMU. Signed-off-by: Andrew Murray Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Benjamin Herrenschmidt Cc: Borislav Petkov Cc: Ivan Kokshaysky Cc: Linus Torvalds Cc: Mark Rutland Cc: Matt Turner Cc: Michael Ellerman Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Richard Henderson Cc: Russell King Cc: Sascha Hauer Cc: Shawn Guo Cc: Thomas Gleixner Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linuxppc-dev@lists.ozlabs.org Cc: robin.murphy@arm.com Cc: suzuki.poulose@arm.com Link: https://lkml.kernel.org/r/1547128414-50693-4-git-send-email-andrew.murray@arm.com Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 1 + kernel/events/core.c | 9 +++++++++ 2 files changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 54a78d22f0a6..cec02dc63b51 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -244,6 +244,7 @@ struct perf_event; #define PERF_PMU_CAP_EXCLUSIVE 0x10 #define PERF_PMU_CAP_ITRACE 0x20 #define PERF_PMU_CAP_HETEROGENEOUS_CPUS 0x40 +#define PERF_PMU_CAP_NO_EXCLUDE 0x80 /** * struct pmu - generic performance monitoring unit diff --git a/kernel/events/core.c b/kernel/events/core.c index 3cd13a30f732..fbe59b793b36 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -9772,6 +9772,15 @@ static int perf_try_init_event(struct pmu *pmu, struct perf_event *event) if (ctx) perf_event_ctx_unlock(event->group_leader, ctx); + if (!ret) { + if (pmu->capabilities & PERF_PMU_CAP_NO_EXCLUDE && + event_has_any_exclude_flag(event)) { + if (event->destroy) + event->destroy(event); + ret = -EINVAL; + } + } + if (ret) module_put(pmu->module); -- cgit v1.2.3 From 8321be6a9df5c5cfbf3fb5f716caf8698a5a7016 Mon Sep 17 00:00:00 2001 From: Amit Kucheria Date: Mon, 21 Jan 2019 14:17:37 +0530 Subject: cpufreq: Replace open-coded << with BIT() Minor clean-up to use BIT() and keep checkpatch happy. Clean up the comment formatting while we're at it to make it easier to read. Signed-off-by: Amit Kucheria Reviewed-by: Stephen Boyd Signed-off-by: Rafael J. Wysocki --- include/linux/cpufreq.h | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index c86d6d8bdfed..bd7fbd6a4478 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -346,14 +346,15 @@ struct cpufreq_driver { }; /* flags */ -#define CPUFREQ_STICKY (1 << 0) /* driver isn't removed even if - all ->init() calls failed */ -#define CPUFREQ_CONST_LOOPS (1 << 1) /* loops_per_jiffy or other - kernel "constants" aren't - affected by frequency - transitions */ -#define CPUFREQ_PM_NO_WARN (1 << 2) /* don't warn on suspend/resume - speed mismatches */ + +/* driver isn't removed even if all ->init() calls failed */ +#define CPUFREQ_STICKY BIT(0) + +/* loops_per_jiffy or other kernel "constants" aren't affected by frequency transitions */ +#define CPUFREQ_CONST_LOOPS BIT(1) + +/* don't warn on suspend/resume speed mismatches */ +#define CPUFREQ_PM_NO_WARN BIT(2) /* * This should be set by platforms having multiple clock-domains, i.e. @@ -361,14 +362,14 @@ struct cpufreq_driver { * be created in cpu/cpu/cpufreq/ directory and so they can use the same * governor with different tunables for different clusters. */ -#define CPUFREQ_HAVE_GOVERNOR_PER_POLICY (1 << 3) +#define CPUFREQ_HAVE_GOVERNOR_PER_POLICY BIT(3) /* * Driver will do POSTCHANGE notifications from outside of their ->target() * routine and so must set cpufreq_driver->flags with this flag, so that core * can handle them specially. */ -#define CPUFREQ_ASYNC_NOTIFICATION (1 << 4) +#define CPUFREQ_ASYNC_NOTIFICATION BIT(4) /* * Set by drivers which want cpufreq core to check if CPU is running at a @@ -377,13 +378,13 @@ struct cpufreq_driver { * from the table. And if that fails, we will stop further boot process by * issuing a BUG_ON(). */ -#define CPUFREQ_NEED_INITIAL_FREQ_CHECK (1 << 5) +#define CPUFREQ_NEED_INITIAL_FREQ_CHECK BIT(5) /* * Set by drivers to disallow use of governors with "dynamic_switching" flag * set. */ -#define CPUFREQ_NO_AUTO_DYNAMIC_SWITCHING (1 << 6) +#define CPUFREQ_NO_AUTO_DYNAMIC_SWITCHING BIT(6) int cpufreq_register_driver(struct cpufreq_driver *driver_data); int cpufreq_unregister_driver(struct cpufreq_driver *driver_data); -- cgit v1.2.3 From bbe7449e2599b58cf7b995461e2189998111f907 Mon Sep 17 00:00:00 2001 From: Phillip Potter Date: Mon, 21 Jan 2019 00:54:27 +0000 Subject: fs: common implementation of file type Many file systems use a copy&paste implementation of dirent to on-disk file type conversions. Create a common implementation to be used by file systems with some useful conversion helpers to reduce open coded file type conversions in file system code. Signed-off-by: Amir Goldstein Signed-off-by: Phillip Potter Signed-off-by: Jan Kara --- MAINTAINERS | 1 + fs/Makefile | 3 +- fs/fs_types.c | 105 +++++++++++++++++++++++++++++++++++++++++++++++ include/linux/fs.h | 17 +------- include/linux/fs_types.h | 75 +++++++++++++++++++++++++++++++++ 5 files changed, 184 insertions(+), 17 deletions(-) create mode 100644 fs/fs_types.c create mode 100644 include/linux/fs_types.h (limited to 'include/linux') diff --git a/MAINTAINERS b/MAINTAINERS index 51029a425dbe..0afaaf0aa6be 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5881,6 +5881,7 @@ L: linux-fsdevel@vger.kernel.org S: Maintained F: fs/* F: include/linux/fs.h +F: include/linux/fs_types.h F: include/uapi/linux/fs.h FINTEK F75375S HARDWARE MONITOR AND FAN CONTROLLER DRIVER diff --git a/fs/Makefile b/fs/Makefile index 293733f61594..23fcd8c164a3 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -12,7 +12,8 @@ obj-y := open.o read_write.o file_table.o super.o \ attr.o bad_inode.o file.o filesystems.o namespace.o \ seq_file.o xattr.o libfs.o fs-writeback.o \ pnode.o splice.o sync.o utimes.o d_path.o \ - stack.o fs_struct.o statfs.o fs_pin.o nsfs.o + stack.o fs_struct.o statfs.o fs_pin.o nsfs.o \ + fs_types.o ifeq ($(CONFIG_BLOCK),y) obj-y += buffer.o block_dev.o direct-io.o mpage.o diff --git a/fs/fs_types.c b/fs/fs_types.c new file mode 100644 index 000000000000..78365e5dc08c --- /dev/null +++ b/fs/fs_types.c @@ -0,0 +1,105 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include + +/* + * fs on-disk file type to dirent file type conversion + */ +static const unsigned char fs_dtype_by_ftype[FT_MAX] = { + [FT_UNKNOWN] = DT_UNKNOWN, + [FT_REG_FILE] = DT_REG, + [FT_DIR] = DT_DIR, + [FT_CHRDEV] = DT_CHR, + [FT_BLKDEV] = DT_BLK, + [FT_FIFO] = DT_FIFO, + [FT_SOCK] = DT_SOCK, + [FT_SYMLINK] = DT_LNK +}; + +/** + * fs_ftype_to_dtype() - fs on-disk file type to dirent type. + * @filetype: The on-disk file type to convert. + * + * This function converts the on-disk file type value (FT_*) to the directory + * entry type (DT_*). + * + * Context: Any context. + * Return: + * * DT_UNKNOWN - Unknown type + * * DT_FIFO - FIFO + * * DT_CHR - Character device + * * DT_DIR - Directory + * * DT_BLK - Block device + * * DT_REG - Regular file + * * DT_LNK - Symbolic link + * * DT_SOCK - Local-domain socket + */ +unsigned char fs_ftype_to_dtype(unsigned int filetype) +{ + if (filetype >= FT_MAX) + return DT_UNKNOWN; + + return fs_dtype_by_ftype[filetype]; +} +EXPORT_SYMBOL_GPL(fs_ftype_to_dtype); + +/* + * dirent file type to fs on-disk file type conversion + * Values not initialized explicitly are FT_UNKNOWN (0). + */ +static const unsigned char fs_ftype_by_dtype[DT_MAX] = { + [DT_REG] = FT_REG_FILE, + [DT_DIR] = FT_DIR, + [DT_LNK] = FT_SYMLINK, + [DT_CHR] = FT_CHRDEV, + [DT_BLK] = FT_BLKDEV, + [DT_FIFO] = FT_FIFO, + [DT_SOCK] = FT_SOCK, +}; + +/** + * fs_umode_to_ftype() - file mode to on-disk file type. + * @mode: The file mode to convert. + * + * This function converts the file mode value to the on-disk file type (FT_*). + * + * Context: Any context. + * Return: + * * FT_UNKNOWN - Unknown type + * * FT_REG_FILE - Regular file + * * FT_DIR - Directory + * * FT_CHRDEV - Character device + * * FT_BLKDEV - Block device + * * FT_FIFO - FIFO + * * FT_SOCK - Local-domain socket + * * FT_SYMLINK - Symbolic link + */ +unsigned char fs_umode_to_ftype(umode_t mode) +{ + return fs_ftype_by_dtype[S_DT(mode)]; +} +EXPORT_SYMBOL_GPL(fs_umode_to_ftype); + +/** + * fs_umode_to_dtype() - file mode to dirent file type. + * @mode: The file mode to convert. + * + * This function converts the file mode value to the directory + * entry type (DT_*). + * + * Context: Any context. + * Return: + * * DT_UNKNOWN - Unknown type + * * DT_FIFO - FIFO + * * DT_CHR - Character device + * * DT_DIR - Directory + * * DT_BLK - Block device + * * DT_REG - Regular file + * * DT_LNK - Symbolic link + * * DT_SOCK - Local-domain socket + */ +unsigned char fs_umode_to_dtype(umode_t mode) +{ + return fs_ftype_to_dtype(fs_umode_to_ftype(mode)); +} +EXPORT_SYMBOL_GPL(fs_umode_to_dtype); diff --git a/include/linux/fs.h b/include/linux/fs.h index 811c77743dad..92966678539d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -1699,22 +1700,6 @@ int fiemap_fill_next_extent(struct fiemap_extent_info *info, u64 logical, u64 phys, u64 len, u32 flags); int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags); -/* - * File types - * - * NOTE! These match bits 12..15 of stat.st_mode - * (ie "(i_mode >> 12) & 15"). - */ -#define DT_UNKNOWN 0 -#define DT_FIFO 1 -#define DT_CHR 2 -#define DT_DIR 4 -#define DT_BLK 6 -#define DT_REG 8 -#define DT_LNK 10 -#define DT_SOCK 12 -#define DT_WHT 14 - /* * This is the "filldir" function type, used by readdir() to let * the kernel specify what kind of dirent layout it wants to have. diff --git a/include/linux/fs_types.h b/include/linux/fs_types.h new file mode 100644 index 000000000000..54816791196f --- /dev/null +++ b/include/linux/fs_types.h @@ -0,0 +1,75 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_FS_TYPES_H +#define _LINUX_FS_TYPES_H + +/* + * This is a header for the common implementation of dirent + * to fs on-disk file type conversion. Although the fs on-disk + * bits are specific to every file system, in practice, many + * file systems use the exact same on-disk format to describe + * the lower 3 file type bits that represent the 7 POSIX file + * types. + * + * It is important to note that the definitions in this + * header MUST NOT change. This would break both the + * userspace ABI and the on-disk format of filesystems + * using this code. + * + * All those file systems can use this generic code for the + * conversions. + */ + +/* + * struct dirent file types + * exposed to user via getdents(2), readdir(3) + * + * These match bits 12..15 of stat.st_mode + * (ie "(i_mode >> 12) & 15"). + */ +#define S_DT_SHIFT 12 +#define S_DT(mode) (((mode) & S_IFMT) >> S_DT_SHIFT) +#define S_DT_MASK (S_IFMT >> S_DT_SHIFT) + +/* these are defined by POSIX and also present in glibc's dirent.h */ +#define DT_UNKNOWN 0 +#define DT_FIFO 1 +#define DT_CHR 2 +#define DT_DIR 4 +#define DT_BLK 6 +#define DT_REG 8 +#define DT_LNK 10 +#define DT_SOCK 12 +#define DT_WHT 14 + +#define DT_MAX (S_DT_MASK + 1) /* 16 */ + +/* + * fs on-disk file types. + * Only the low 3 bits are used for the POSIX file types. + * Other bits are reserved for fs private use. + * These definitions are shared and used by multiple filesystems, + * and MUST NOT change under any circumstances. + * + * Note that no fs currently stores the whiteout type on-disk, + * so whiteout dirents are exposed to user as DT_CHR. + */ +#define FT_UNKNOWN 0 +#define FT_REG_FILE 1 +#define FT_DIR 2 +#define FT_CHRDEV 3 +#define FT_BLKDEV 4 +#define FT_FIFO 5 +#define FT_SOCK 6 +#define FT_SYMLINK 7 + +#define FT_MAX 8 + +/* + * declarations for helper functions, accompanying implementation + * is in fs/fs_types.c + */ +extern unsigned char fs_ftype_to_dtype(unsigned int filetype); +extern unsigned char fs_umode_to_ftype(umode_t mode); +extern unsigned char fs_umode_to_dtype(umode_t mode); + +#endif -- cgit v1.2.3 From dc60a4cfb77c891f67f31953025208067b05883c Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Thu, 17 Jan 2019 11:47:55 -0200 Subject: media: soc_camera_platform: remove obsolete soc_camera test driver This is a test stub driver for soc_camera. Since soc_camera is being deprecated (and in fact, nobody is using it anymore) there's no sense in keeping this test driver. Signed-off-by: Hans Verkuil Acked-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab --- drivers/media/platform/soc_camera/Kconfig | 6 - drivers/media/platform/soc_camera/Makefile | 4 - .../platform/soc_camera/soc_camera_platform.c | 188 --------------------- .../platform_data/media/soc_camera_platform.h | 83 --------- 4 files changed, 281 deletions(-) delete mode 100644 drivers/media/platform/soc_camera/soc_camera_platform.c delete mode 100644 include/linux/platform_data/media/soc_camera_platform.h (limited to 'include/linux') diff --git a/drivers/media/platform/soc_camera/Kconfig b/drivers/media/platform/soc_camera/Kconfig index d471d34b884c..8f9b3bac5450 100644 --- a/drivers/media/platform/soc_camera/Kconfig +++ b/drivers/media/platform/soc_camera/Kconfig @@ -6,9 +6,3 @@ config SOC_CAMERA SoC Camera is a common API to several cameras, not connecting over a bus like PCI or USB. For example some i2c camera connected directly to the data bus of an SoC. - -config SOC_CAMERA_PLATFORM - tristate "platform camera support" - depends on SOC_CAMERA - help - This is a generic SoC camera platform driver, useful for testing diff --git a/drivers/media/platform/soc_camera/Makefile b/drivers/media/platform/soc_camera/Makefile index 2cb7022e073b..85d5e74f3b2b 100644 --- a/drivers/media/platform/soc_camera/Makefile +++ b/drivers/media/platform/soc_camera/Makefile @@ -1,5 +1 @@ obj-$(CONFIG_SOC_CAMERA) += soc_camera.o soc_mediabus.o - -# a platform subdevice driver stub, allowing to support cameras by adding a -# couple of callback functions to the board code -obj-$(CONFIG_SOC_CAMERA_PLATFORM) += soc_camera_platform.o diff --git a/drivers/media/platform/soc_camera/soc_camera_platform.c b/drivers/media/platform/soc_camera/soc_camera_platform.c deleted file mode 100644 index 79fbe1fea95f..000000000000 --- a/drivers/media/platform/soc_camera/soc_camera_platform.c +++ /dev/null @@ -1,188 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Generic Platform Camera Driver - * - * Copyright (C) 2008 Magnus Damm - * Based on mt9m001 driver, - * Copyright (C) 2008, Guennadi Liakhovetski - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -struct soc_camera_platform_priv { - struct v4l2_subdev subdev; -}; - -static struct soc_camera_platform_priv *get_priv(struct platform_device *pdev) -{ - struct v4l2_subdev *subdev = platform_get_drvdata(pdev); - return container_of(subdev, struct soc_camera_platform_priv, subdev); -} - -static int soc_camera_platform_s_stream(struct v4l2_subdev *sd, int enable) -{ - struct soc_camera_platform_info *p = v4l2_get_subdevdata(sd); - return p->set_capture(p, enable); -} - -static int soc_camera_platform_fill_fmt(struct v4l2_subdev *sd, - struct v4l2_subdev_pad_config *cfg, - struct v4l2_subdev_format *format) -{ - struct soc_camera_platform_info *p = v4l2_get_subdevdata(sd); - struct v4l2_mbus_framefmt *mf = &format->format; - - mf->width = p->format.width; - mf->height = p->format.height; - mf->code = p->format.code; - mf->colorspace = p->format.colorspace; - mf->field = p->format.field; - - return 0; -} - -static int soc_camera_platform_s_power(struct v4l2_subdev *sd, int on) -{ - struct soc_camera_platform_info *p = v4l2_get_subdevdata(sd); - - return soc_camera_set_power(p->icd->control, &p->icd->sdesc->subdev_desc, NULL, on); -} - -static const struct v4l2_subdev_core_ops platform_subdev_core_ops = { - .s_power = soc_camera_platform_s_power, -}; - -static int soc_camera_platform_enum_mbus_code(struct v4l2_subdev *sd, - struct v4l2_subdev_pad_config *cfg, - struct v4l2_subdev_mbus_code_enum *code) -{ - struct soc_camera_platform_info *p = v4l2_get_subdevdata(sd); - - if (code->pad || code->index) - return -EINVAL; - - code->code = p->format.code; - return 0; -} - -static int soc_camera_platform_get_selection(struct v4l2_subdev *sd, - struct v4l2_subdev_pad_config *cfg, - struct v4l2_subdev_selection *sel) -{ - struct soc_camera_platform_info *p = v4l2_get_subdevdata(sd); - - if (sel->which != V4L2_SUBDEV_FORMAT_ACTIVE) - return -EINVAL; - - switch (sel->target) { - case V4L2_SEL_TGT_CROP_BOUNDS: - case V4L2_SEL_TGT_CROP_DEFAULT: - case V4L2_SEL_TGT_CROP: - sel->r.left = 0; - sel->r.top = 0; - sel->r.width = p->format.width; - sel->r.height = p->format.height; - return 0; - default: - return -EINVAL; - } -} - -static int soc_camera_platform_g_mbus_config(struct v4l2_subdev *sd, - struct v4l2_mbus_config *cfg) -{ - struct soc_camera_platform_info *p = v4l2_get_subdevdata(sd); - - cfg->flags = p->mbus_param; - cfg->type = p->mbus_type; - - return 0; -} - -static const struct v4l2_subdev_video_ops platform_subdev_video_ops = { - .s_stream = soc_camera_platform_s_stream, - .g_mbus_config = soc_camera_platform_g_mbus_config, -}; - -static const struct v4l2_subdev_pad_ops platform_subdev_pad_ops = { - .enum_mbus_code = soc_camera_platform_enum_mbus_code, - .get_selection = soc_camera_platform_get_selection, - .get_fmt = soc_camera_platform_fill_fmt, - .set_fmt = soc_camera_platform_fill_fmt, -}; - -static const struct v4l2_subdev_ops platform_subdev_ops = { - .core = &platform_subdev_core_ops, - .video = &platform_subdev_video_ops, - .pad = &platform_subdev_pad_ops, -}; - -static int soc_camera_platform_probe(struct platform_device *pdev) -{ - struct soc_camera_host *ici; - struct soc_camera_platform_priv *priv; - struct soc_camera_platform_info *p = pdev->dev.platform_data; - struct soc_camera_device *icd; - - if (!p) - return -EINVAL; - - if (!p->icd) { - dev_err(&pdev->dev, - "Platform has not set soc_camera_device pointer!\n"); - return -EINVAL; - } - - priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL); - if (!priv) - return -ENOMEM; - - icd = p->icd; - - /* soc-camera convention: control's drvdata points to the subdev */ - platform_set_drvdata(pdev, &priv->subdev); - /* Set the control device reference */ - icd->control = &pdev->dev; - - ici = to_soc_camera_host(icd->parent); - - v4l2_subdev_init(&priv->subdev, &platform_subdev_ops); - v4l2_set_subdevdata(&priv->subdev, p); - strscpy(priv->subdev.name, dev_name(&pdev->dev), - sizeof(priv->subdev.name)); - - return v4l2_device_register_subdev(&ici->v4l2_dev, &priv->subdev); -} - -static int soc_camera_platform_remove(struct platform_device *pdev) -{ - struct soc_camera_platform_priv *priv = get_priv(pdev); - struct soc_camera_platform_info *p = v4l2_get_subdevdata(&priv->subdev); - - p->icd->control = NULL; - v4l2_device_unregister_subdev(&priv->subdev); - return 0; -} - -static struct platform_driver soc_camera_platform_driver = { - .driver = { - .name = "soc_camera_platform", - }, - .probe = soc_camera_platform_probe, - .remove = soc_camera_platform_remove, -}; - -module_platform_driver(soc_camera_platform_driver); - -MODULE_DESCRIPTION("SoC Camera Platform driver"); -MODULE_AUTHOR("Magnus Damm"); -MODULE_LICENSE("GPL v2"); -MODULE_ALIAS("platform:soc_camera_platform"); diff --git a/include/linux/platform_data/media/soc_camera_platform.h b/include/linux/platform_data/media/soc_camera_platform.h deleted file mode 100644 index 1e5065dab430..000000000000 --- a/include/linux/platform_data/media/soc_camera_platform.h +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Generic Platform Camera Driver Header - * - * Copyright (C) 2008 Magnus Damm - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#ifndef __SOC_CAMERA_H__ -#define __SOC_CAMERA_H__ - -#include -#include -#include - -struct device; - -struct soc_camera_platform_info { - const char *format_name; - unsigned long format_depth; - struct v4l2_mbus_framefmt format; - unsigned long mbus_param; - enum v4l2_mbus_type mbus_type; - struct soc_camera_device *icd; - int (*set_capture)(struct soc_camera_platform_info *info, int enable); -}; - -static inline void soc_camera_platform_release(struct platform_device **pdev) -{ - *pdev = NULL; -} - -static inline int soc_camera_platform_add(struct soc_camera_device *icd, - struct platform_device **pdev, - struct soc_camera_link *plink, - void (*release)(struct device *dev), - int id) -{ - struct soc_camera_subdev_desc *ssdd = - (struct soc_camera_subdev_desc *)plink; - struct soc_camera_platform_info *info = ssdd->drv_priv; - int ret; - - if (&icd->sdesc->subdev_desc != ssdd) - return -ENODEV; - - if (*pdev) - return -EBUSY; - - *pdev = platform_device_alloc("soc_camera_platform", id); - if (!*pdev) - return -ENOMEM; - - info->icd = icd; - - (*pdev)->dev.platform_data = info; - (*pdev)->dev.release = release; - - ret = platform_device_add(*pdev); - if (ret < 0) { - platform_device_put(*pdev); - *pdev = NULL; - info->icd = NULL; - } - - return ret; -} - -static inline void soc_camera_platform_del(const struct soc_camera_device *icd, - struct platform_device *pdev, - const struct soc_camera_link *plink) -{ - const struct soc_camera_subdev_desc *ssdd = - (const struct soc_camera_subdev_desc *)plink; - if (&icd->sdesc->subdev_desc != ssdd || !pdev) - return; - - platform_device_unregister(pdev); -} - -#endif /* __SOC_CAMERA_H__ */ -- cgit v1.2.3 From 1fc1b63638da1accb27264a507b23aa6863c3852 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Sat, 19 Jan 2019 16:04:12 +0100 Subject: spi: spi-mem: Add devm_spi_mem_dirmap_{create,destroy}() Since direct mapping descriptors usually the same lifetime as the SPI MEM device adding devm_ variants of the spi_mem_dirmap_{create,destroy}() should greatly simplify error/remove path of spi-mem drivers making use of the direct mapping API. Signed-off-by: Boris Brezillon Signed-off-by: Mark Brown --- drivers/spi/spi-mem.c | 69 +++++++++++++++++++++++++++++++++++++++++++++ include/linux/spi/spi-mem.h | 5 ++++ 2 files changed, 74 insertions(+) (limited to 'include/linux') diff --git a/drivers/spi/spi-mem.c b/drivers/spi/spi-mem.c index 5217a5628be2..08e326a124cc 100644 --- a/drivers/spi/spi-mem.c +++ b/drivers/spi/spi-mem.c @@ -551,6 +551,75 @@ void spi_mem_dirmap_destroy(struct spi_mem_dirmap_desc *desc) } EXPORT_SYMBOL_GPL(spi_mem_dirmap_destroy); +static void devm_spi_mem_dirmap_release(struct device *dev, void *res) +{ + struct spi_mem_dirmap_desc *desc = *(struct spi_mem_dirmap_desc **)res; + + spi_mem_dirmap_destroy(desc); +} + +/** + * devm_spi_mem_dirmap_create() - Create a direct mapping descriptor and attach + * it to a device + * @dev: device the dirmap desc will be attached to + * @mem: SPI mem device this direct mapping should be created for + * @info: direct mapping information + * + * devm_ variant of the spi_mem_dirmap_create() function. See + * spi_mem_dirmap_create() for more details. + * + * Return: a valid pointer in case of success, and ERR_PTR() otherwise. + */ +struct spi_mem_dirmap_desc * +devm_spi_mem_dirmap_create(struct device *dev, struct spi_mem *mem, + const struct spi_mem_dirmap_info *info) +{ + struct spi_mem_dirmap_desc **ptr, *desc; + + ptr = devres_alloc(devm_spi_mem_dirmap_release, sizeof(*ptr), + GFP_KERNEL); + if (!ptr) + return ERR_PTR(-ENOMEM); + + desc = spi_mem_dirmap_create(mem, info); + if (IS_ERR(desc)) { + devres_free(ptr); + } else { + *ptr = desc; + devres_add(dev, ptr); + } + + return desc; +} +EXPORT_SYMBOL_GPL(devm_spi_mem_dirmap_create); + +static int devm_spi_mem_dirmap_match(struct device *dev, void *res, void *data) +{ + struct spi_mem_dirmap_desc **ptr = res; + + if (WARN_ON(!ptr || !*ptr)) + return 0; + + return *ptr == data; +} + +/** + * devm_spi_mem_dirmap_destroy() - Destroy a direct mapping descriptor attached + * to a device + * @dev: device the dirmap desc is attached to + * @desc: the direct mapping descriptor to destroy + * + * devm_ variant of the spi_mem_dirmap_destroy() function. See + * spi_mem_dirmap_destroy() for more details. + */ +void devm_spi_mem_dirmap_destroy(struct device *dev, + struct spi_mem_dirmap_desc *desc) +{ + devres_release(dev, devm_spi_mem_dirmap_release, + devm_spi_mem_dirmap_match, desc); +} +EXPORT_SYMBOL_GPL(devm_spi_mem_dirmap_destroy); + /** * spi_mem_dirmap_dirmap_read() - Read data through a direct mapping * @desc: direct mapping descriptor diff --git a/include/linux/spi/spi-mem.h b/include/linux/spi/spi-mem.h index 3fe24500c5ee..3703d0dcac2e 100644 --- a/include/linux/spi/spi-mem.h +++ b/include/linux/spi/spi-mem.h @@ -330,6 +330,11 @@ ssize_t spi_mem_dirmap_read(struct spi_mem_dirmap_desc *desc, u64 offs, size_t len, void *buf); ssize_t spi_mem_dirmap_write(struct spi_mem_dirmap_desc *desc, u64 offs, size_t len, const void *buf); +struct spi_mem_dirmap_desc * +devm_spi_mem_dirmap_create(struct device *dev, struct spi_mem *mem, + const struct spi_mem_dirmap_info *info); +void devm_spi_mem_dirmap_destroy(struct device *dev, + struct spi_mem_dirmap_desc *desc); int spi_mem_driver_register_with_owner(struct spi_mem_driver *drv, struct module *owner); -- cgit v1.2.3 From cf5c6c211b7e9eb4f4219f83671432c9ef257187 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Thu, 17 Jan 2019 15:25:04 +0800 Subject: perf: Remove duplicated workqueue.h include from perf_event.h It is already included a little bit higher up in that file. Signed-off-by: YueHaibing Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20190117072504.14428-1-yuehaibing@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- include/linux/perf_event.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index cec02dc63b51..f8ec36197718 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -53,7 +53,6 @@ struct perf_guest_info_callbacks { #include #include #include -#include #include #include -- cgit v1.2.3 From 5620196951192f7cd2da0a04e7c0113f40bfc14e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 11 Jan 2019 13:20:20 -0300 Subject: perf: Make perf_event_output() propagate the output() return For the original mode of operation it isn't needed, since we report back errors via PERF_RECORD_LOST records in the ring buffer, but for use in bpf_perf_event_output() it is convenient to return the errors, basically -ENOSPC. Currently bpf_perf_event_output() returns an error indication, the last thing it does, which is to push it to the ring buffer is that can fail and if so, this failure won't be reported back to its users, fix it. Reported-by: Jamal Hadi Salim Tested-by: Jamal Hadi Salim Acked-by: Peter Zijlstra (Intel) Cc: Adrian Hunter Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/r/20190118150938.GN5823@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- include/linux/perf_event.h | 6 +++--- kernel/events/core.c | 11 +++++++---- kernel/trace/bpf_trace.c | 3 +-- tools/perf/examples/bpf/augmented_raw_syscalls.c | 4 ++-- tools/perf/examples/bpf/augmented_syscalls.c | 14 +++++++------- tools/perf/examples/bpf/etcsnoop.c | 10 +++++----- 6 files changed, 25 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index f8ec36197718..4eb88065a9b5 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -978,9 +978,9 @@ extern void perf_event_output_forward(struct perf_event *event, extern void perf_event_output_backward(struct perf_event *event, struct perf_sample_data *data, struct pt_regs *regs); -extern void perf_event_output(struct perf_event *event, - struct perf_sample_data *data, - struct pt_regs *regs); +extern int perf_event_output(struct perf_event *event, + struct perf_sample_data *data, + struct pt_regs *regs); static inline bool is_default_overflow_handler(struct perf_event *event) diff --git a/kernel/events/core.c b/kernel/events/core.c index fbe59b793b36..bc525cd1615c 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6489,7 +6489,7 @@ void perf_prepare_sample(struct perf_event_header *header, data->phys_addr = perf_virt_to_phys(data->addr); } -static __always_inline void +static __always_inline int __perf_event_output(struct perf_event *event, struct perf_sample_data *data, struct pt_regs *regs, @@ -6499,13 +6499,15 @@ __perf_event_output(struct perf_event *event, { struct perf_output_handle handle; struct perf_event_header header; + int err; /* protect the callchain buffers */ rcu_read_lock(); perf_prepare_sample(&header, data, event, regs); - if (output_begin(&handle, event, header.size)) + err = output_begin(&handle, event, header.size); + if (err) goto exit; perf_output_sample(&handle, &header, data, event); @@ -6514,6 +6516,7 @@ __perf_event_output(struct perf_event *event, exit: rcu_read_unlock(); + return err; } void @@ -6532,12 +6535,12 @@ perf_event_output_backward(struct perf_event *event, __perf_event_output(event, data, regs, perf_output_begin_backward); } -void +int perf_event_output(struct perf_event *event, struct perf_sample_data *data, struct pt_regs *regs) { - __perf_event_output(event, data, regs, perf_output_begin); + return __perf_event_output(event, data, regs, perf_output_begin); } /* diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 8b068adb9da1..088c2032ceaf 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -431,8 +431,7 @@ __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map, if (unlikely(event->oncpu != cpu)) return -EOPNOTSUPP; - perf_event_output(event, sd, regs); - return 0; + return perf_event_output(event, sd, regs); } BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map, diff --git a/tools/perf/examples/bpf/augmented_raw_syscalls.c b/tools/perf/examples/bpf/augmented_raw_syscalls.c index 53c233370fae..9e9d4c66e53c 100644 --- a/tools/perf/examples/bpf/augmented_raw_syscalls.c +++ b/tools/perf/examples/bpf/augmented_raw_syscalls.c @@ -141,8 +141,8 @@ int sys_enter(struct syscall_enter_args *args) len = sizeof(augmented_args.args); } - perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, &augmented_args, len); - return 0; + /* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */ + return perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, &augmented_args, len); } SEC("raw_syscalls:sys_exit") diff --git a/tools/perf/examples/bpf/augmented_syscalls.c b/tools/perf/examples/bpf/augmented_syscalls.c index 2ae44813ef2d..b7dba114e36c 100644 --- a/tools/perf/examples/bpf/augmented_syscalls.c +++ b/tools/perf/examples/bpf/augmented_syscalls.c @@ -55,9 +55,9 @@ int syscall_enter(syscall)(struct syscall_enter_##syscall##_args *args) \ len -= sizeof(augmented_args.filename.value) - augmented_args.filename.size; \ len &= sizeof(augmented_args.filename.value) - 1; \ } \ - perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, \ - &augmented_args, len); \ - return 0; \ + /* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */ \ + return perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, \ + &augmented_args, len); \ } \ int syscall_exit(syscall)(struct syscall_exit_args *args) \ { \ @@ -125,10 +125,10 @@ int syscall_enter(syscall)(struct syscall_enter_##syscall##_args *args) \ /* addrlen = augmented_args.args.addrlen; */ \ /* */ \ probe_read(&augmented_args.addr, addrlen, args->addr_ptr); \ - perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, \ - &augmented_args, \ - sizeof(augmented_args) - sizeof(augmented_args.addr) + addrlen); \ - return 0; \ + /* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */ \ + return perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, \ + &augmented_args, \ + sizeof(augmented_args) - sizeof(augmented_args.addr) + addrlen);\ } \ int syscall_exit(syscall)(struct syscall_exit_args *args) \ { \ diff --git a/tools/perf/examples/bpf/etcsnoop.c b/tools/perf/examples/bpf/etcsnoop.c index b59e8812ee8c..550e69c2e8d1 100644 --- a/tools/perf/examples/bpf/etcsnoop.c +++ b/tools/perf/examples/bpf/etcsnoop.c @@ -49,11 +49,11 @@ int syscall_enter(syscall)(struct syscall_enter_##syscall##_args *args) \ args->filename_ptr); \ if (__builtin_memcmp(augmented_args.filename.value, etc, 4) != 0) \ return 0; \ - perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, \ - &augmented_args, \ - (sizeof(augmented_args) - sizeof(augmented_args.filename.value) + \ - augmented_args.filename.size)); \ - return 0; \ + /* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */ \ + return perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, \ + &augmented_args, \ + (sizeof(augmented_args) - sizeof(augmented_args.filename.value) + \ + augmented_args.filename.size)); \ } struct syscall_enter_openat_args { -- cgit v1.2.3 From 76193a94522f1d4edf2447a536f3f796ce56343b Mon Sep 17 00:00:00 2001 From: Song Liu Date: Thu, 17 Jan 2019 08:15:13 -0800 Subject: perf, bpf: Introduce PERF_RECORD_KSYMBOL For better performance analysis of dynamically JITed and loaded kernel functions, such as BPF programs, this patch introduces PERF_RECORD_KSYMBOL, a new perf_event_type that exposes kernel symbol register/unregister information to user space. The following data structure is used for PERF_RECORD_KSYMBOL. /* * struct { * struct perf_event_header header; * u64 addr; * u32 len; * u16 ksym_type; * u16 flags; * char name[]; * struct sample_id sample_id; * }; */ Signed-off-by: Song Liu Reviewed-by: Arnaldo Carvalho de Melo Tested-by: Arnaldo Carvalho de Melo Acked-by: Peter Zijlstra Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Peter Zijlstra Cc: kernel-team@fb.com Cc: netdev@vger.kernel.org Link: http://lkml.kernel.org/r/20190117161521.1341602-2-songliubraving@fb.com Signed-off-by: Arnaldo Carvalho de Melo --- include/linux/perf_event.h | 8 ++++ include/uapi/linux/perf_event.h | 26 ++++++++++- kernel/events/core.c | 98 ++++++++++++++++++++++++++++++++++++++++- 3 files changed, 130 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 4eb88065a9b5..136fe0495374 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1122,6 +1122,10 @@ static inline void perf_event_task_sched_out(struct task_struct *prev, } extern void perf_event_mmap(struct vm_area_struct *vma); + +extern void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len, + bool unregister, const char *sym); + extern struct perf_guest_info_callbacks *perf_guest_cbs; extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks); extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks); @@ -1342,6 +1346,10 @@ static inline int perf_unregister_guest_info_callbacks (struct perf_guest_info_callbacks *callbacks) { return 0; } static inline void perf_event_mmap(struct vm_area_struct *vma) { } + +typedef int (perf_ksymbol_get_name_f)(char *name, int name_len, void *data); +static inline void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len, + bool unregister, const char *sym) { } static inline void perf_event_exec(void) { } static inline void perf_event_comm(struct task_struct *tsk, bool exec) { } static inline void perf_event_namespaces(struct task_struct *tsk) { } diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index ea19b5d491bf..1dee5c8f166b 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -372,7 +372,8 @@ struct perf_event_attr { context_switch : 1, /* context switch data */ write_backward : 1, /* Write ring buffer from end to beginning */ namespaces : 1, /* include namespaces data */ - __reserved_1 : 35; + ksymbol : 1, /* include ksymbol events */ + __reserved_1 : 34; union { __u32 wakeup_events; /* wakeup every n events */ @@ -963,9 +964,32 @@ enum perf_event_type { */ PERF_RECORD_NAMESPACES = 16, + /* + * Record ksymbol register/unregister events: + * + * struct { + * struct perf_event_header header; + * u64 addr; + * u32 len; + * u16 ksym_type; + * u16 flags; + * char name[]; + * struct sample_id sample_id; + * }; + */ + PERF_RECORD_KSYMBOL = 17, + PERF_RECORD_MAX, /* non-ABI */ }; +enum perf_record_ksymbol_type { + PERF_RECORD_KSYMBOL_TYPE_UNKNOWN = 0, + PERF_RECORD_KSYMBOL_TYPE_BPF = 1, + PERF_RECORD_KSYMBOL_TYPE_MAX /* non-ABI */ +}; + +#define PERF_RECORD_KSYMBOL_FLAGS_UNREGISTER (1 << 0) + #define PERF_MAX_STACK_DEPTH 127 #define PERF_MAX_CONTEXTS_PER_STACK 8 diff --git a/kernel/events/core.c b/kernel/events/core.c index bc525cd1615c..e04ab5f325cf 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -385,6 +385,7 @@ static atomic_t nr_namespaces_events __read_mostly; static atomic_t nr_task_events __read_mostly; static atomic_t nr_freq_events __read_mostly; static atomic_t nr_switch_events __read_mostly; +static atomic_t nr_ksymbol_events __read_mostly; static LIST_HEAD(pmus); static DEFINE_MUTEX(pmus_lock); @@ -4235,7 +4236,7 @@ static bool is_sb_event(struct perf_event *event) if (attr->mmap || attr->mmap_data || attr->mmap2 || attr->comm || attr->comm_exec || - attr->task || + attr->task || attr->ksymbol || attr->context_switch) return true; return false; @@ -4305,6 +4306,8 @@ static void unaccount_event(struct perf_event *event) dec = true; if (has_branch_stack(event)) dec = true; + if (event->attr.ksymbol) + atomic_dec(&nr_ksymbol_events); if (dec) { if (!atomic_add_unless(&perf_sched_count, -1, 1)) @@ -7653,6 +7656,97 @@ static void perf_log_throttle(struct perf_event *event, int enable) perf_output_end(&handle); } +/* + * ksymbol register/unregister tracking + */ + +struct perf_ksymbol_event { + const char *name; + int name_len; + struct { + struct perf_event_header header; + u64 addr; + u32 len; + u16 ksym_type; + u16 flags; + } event_id; +}; + +static int perf_event_ksymbol_match(struct perf_event *event) +{ + return event->attr.ksymbol; +} + +static void perf_event_ksymbol_output(struct perf_event *event, void *data) +{ + struct perf_ksymbol_event *ksymbol_event = data; + struct perf_output_handle handle; + struct perf_sample_data sample; + int ret; + + if (!perf_event_ksymbol_match(event)) + return; + + perf_event_header__init_id(&ksymbol_event->event_id.header, + &sample, event); + ret = perf_output_begin(&handle, event, + ksymbol_event->event_id.header.size); + if (ret) + return; + + perf_output_put(&handle, ksymbol_event->event_id); + __output_copy(&handle, ksymbol_event->name, ksymbol_event->name_len); + perf_event__output_id_sample(event, &handle, &sample); + + perf_output_end(&handle); +} + +void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len, bool unregister, + const char *sym) +{ + struct perf_ksymbol_event ksymbol_event; + char name[KSYM_NAME_LEN]; + u16 flags = 0; + int name_len; + + if (!atomic_read(&nr_ksymbol_events)) + return; + + if (ksym_type >= PERF_RECORD_KSYMBOL_TYPE_MAX || + ksym_type == PERF_RECORD_KSYMBOL_TYPE_UNKNOWN) + goto err; + + strlcpy(name, sym, KSYM_NAME_LEN); + name_len = strlen(name) + 1; + while (!IS_ALIGNED(name_len, sizeof(u64))) + name[name_len++] = '\0'; + BUILD_BUG_ON(KSYM_NAME_LEN % sizeof(u64)); + + if (unregister) + flags |= PERF_RECORD_KSYMBOL_FLAGS_UNREGISTER; + + ksymbol_event = (struct perf_ksymbol_event){ + .name = name, + .name_len = name_len, + .event_id = { + .header = { + .type = PERF_RECORD_KSYMBOL, + .size = sizeof(ksymbol_event.event_id) + + name_len, + }, + .addr = addr, + .len = len, + .ksym_type = ksym_type, + .flags = flags, + }, + }; + + perf_iterate_sb(perf_event_ksymbol_output, &ksymbol_event, NULL); + return; +err: + WARN_ONCE(1, "%s: Invalid KSYMBOL type 0x%x\n", __func__, ksym_type); +} + void perf_event_itrace_started(struct perf_event *event) { event->attach_state |= PERF_ATTACH_ITRACE; @@ -9912,6 +10006,8 @@ static void account_event(struct perf_event *event) inc = true; if (is_cgroup_event(event)) inc = true; + if (event->attr.ksymbol) + atomic_inc(&nr_ksymbol_events); if (inc) { /* -- cgit v1.2.3 From 6ee52e2a3fe4ea35520720736e6791df1fb67106 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Thu, 17 Jan 2019 08:15:15 -0800 Subject: perf, bpf: Introduce PERF_RECORD_BPF_EVENT For better performance analysis of BPF programs, this patch introduces PERF_RECORD_BPF_EVENT, a new perf_event_type that exposes BPF program load/unload information to user space. Each BPF program may contain up to BPF_MAX_SUBPROGS (256) sub programs. The following example shows kernel symbols for a BPF program with 7 sub programs: ffffffffa0257cf9 t bpf_prog_b07ccb89267cf242_F ffffffffa02592e1 t bpf_prog_2dcecc18072623fc_F ffffffffa025b0e9 t bpf_prog_bb7a405ebaec5d5c_F ffffffffa025dd2c t bpf_prog_a7540d4a39ec1fc7_F ffffffffa025fcca t bpf_prog_05762d4ade0e3737_F ffffffffa026108f t bpf_prog_db4bd11e35df90d4_F ffffffffa0263f00 t bpf_prog_89d64e4abf0f0126_F ffffffffa0257cf9 t bpf_prog_ae31629322c4b018__dummy_tracepoi When a bpf program is loaded, PERF_RECORD_KSYMBOL is generated for each of these sub programs. Therefore, PERF_RECORD_BPF_EVENT is not needed for simple profiling. For annotation, user space need to listen to PERF_RECORD_BPF_EVENT and gather more information about these (sub) programs via sys_bpf. Signed-off-by: Song Liu Reviewed-by: Arnaldo Carvalho de Melo Acked-by: Alexei Starovoitov Acked-by: Peter Zijlstra (Intel) Tested-by: Arnaldo Carvalho de Melo Cc: Daniel Borkmann Cc: Peter Zijlstra Cc: kernel-team@fb.com Cc: netdev@vger.kernel.org Link: http://lkml.kernel.org/r/20190117161521.1341602-4-songliubraving@fb.com Signed-off-by: Arnaldo Carvalho de Melo --- include/linux/filter.h | 7 +++ include/linux/perf_event.h | 6 +++ include/uapi/linux/perf_event.h | 29 +++++++++- kernel/bpf/core.c | 2 +- kernel/bpf/syscall.c | 2 + kernel/events/core.c | 115 ++++++++++++++++++++++++++++++++++++++++ 6 files changed, 159 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index ad106d845b22..d531d4250bff 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -951,6 +951,7 @@ bpf_address_lookup(unsigned long addr, unsigned long *size, void bpf_prog_kallsyms_add(struct bpf_prog *fp); void bpf_prog_kallsyms_del(struct bpf_prog *fp); +void bpf_get_prog_name(const struct bpf_prog *prog, char *sym); #else /* CONFIG_BPF_JIT */ @@ -1006,6 +1007,12 @@ static inline void bpf_prog_kallsyms_add(struct bpf_prog *fp) static inline void bpf_prog_kallsyms_del(struct bpf_prog *fp) { } + +static inline void bpf_get_prog_name(const struct bpf_prog *prog, char *sym) +{ + sym[0] = '\0'; +} + #endif /* CONFIG_BPF_JIT */ void bpf_prog_kallsyms_del_subprogs(struct bpf_prog *fp); diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 136fe0495374..a79e59fc3b7d 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1125,6 +1125,9 @@ extern void perf_event_mmap(struct vm_area_struct *vma); extern void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len, bool unregister, const char *sym); +extern void perf_event_bpf_event(struct bpf_prog *prog, + enum perf_bpf_event_type type, + u16 flags); extern struct perf_guest_info_callbacks *perf_guest_cbs; extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks); @@ -1350,6 +1353,9 @@ static inline void perf_event_mmap(struct vm_area_struct *vma) { } typedef int (perf_ksymbol_get_name_f)(char *name, int name_len, void *data); static inline void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len, bool unregister, const char *sym) { } +static inline void perf_event_bpf_event(struct bpf_prog *prog, + enum perf_bpf_event_type type, + u16 flags) { } static inline void perf_event_exec(void) { } static inline void perf_event_comm(struct task_struct *tsk, bool exec) { } static inline void perf_event_namespaces(struct task_struct *tsk) { } diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 1dee5c8f166b..7198ddd0c6b1 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -373,7 +373,8 @@ struct perf_event_attr { write_backward : 1, /* Write ring buffer from end to beginning */ namespaces : 1, /* include namespaces data */ ksymbol : 1, /* include ksymbol events */ - __reserved_1 : 34; + bpf_event : 1, /* include bpf events */ + __reserved_1 : 33; union { __u32 wakeup_events; /* wakeup every n events */ @@ -979,6 +980,25 @@ enum perf_event_type { */ PERF_RECORD_KSYMBOL = 17, + /* + * Record bpf events: + * enum perf_bpf_event_type { + * PERF_BPF_EVENT_UNKNOWN = 0, + * PERF_BPF_EVENT_PROG_LOAD = 1, + * PERF_BPF_EVENT_PROG_UNLOAD = 2, + * }; + * + * struct { + * struct perf_event_header header; + * u16 type; + * u16 flags; + * u32 id; + * u8 tag[BPF_TAG_SIZE]; + * struct sample_id sample_id; + * }; + */ + PERF_RECORD_BPF_EVENT = 18, + PERF_RECORD_MAX, /* non-ABI */ }; @@ -990,6 +1010,13 @@ enum perf_record_ksymbol_type { #define PERF_RECORD_KSYMBOL_FLAGS_UNREGISTER (1 << 0) +enum perf_bpf_event_type { + PERF_BPF_EVENT_UNKNOWN = 0, + PERF_BPF_EVENT_PROG_LOAD = 1, + PERF_BPF_EVENT_PROG_UNLOAD = 2, + PERF_BPF_EVENT_MAX, /* non-ABI */ +}; + #define PERF_MAX_STACK_DEPTH 127 #define PERF_MAX_CONTEXTS_PER_STACK 8 diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index f908b9356025..19c49313c709 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -495,7 +495,7 @@ bpf_get_prog_addr_region(const struct bpf_prog *prog, *symbol_end = addr + hdr->pages * PAGE_SIZE; } -static void bpf_get_prog_name(const struct bpf_prog *prog, char *sym) +void bpf_get_prog_name(const struct bpf_prog *prog, char *sym) { const char *end = sym + KSYM_NAME_LEN; const struct btf_type *type; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index b155cd17c1bd..30ebd085790b 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1211,6 +1211,7 @@ static void __bpf_prog_put_rcu(struct rcu_head *rcu) static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock) { if (atomic_dec_and_test(&prog->aux->refcnt)) { + perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_UNLOAD, 0); /* bpf_prog_free_id() must be called first */ bpf_prog_free_id(prog, do_idr_lock); bpf_prog_kallsyms_del_all(prog); @@ -1554,6 +1555,7 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr) } bpf_prog_kallsyms_add(prog); + perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_LOAD, 0); return err; free_used_maps: diff --git a/kernel/events/core.c b/kernel/events/core.c index e04ab5f325cf..236bb8ddb7bc 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -386,6 +386,7 @@ static atomic_t nr_task_events __read_mostly; static atomic_t nr_freq_events __read_mostly; static atomic_t nr_switch_events __read_mostly; static atomic_t nr_ksymbol_events __read_mostly; +static atomic_t nr_bpf_events __read_mostly; static LIST_HEAD(pmus); static DEFINE_MUTEX(pmus_lock); @@ -4308,6 +4309,8 @@ static void unaccount_event(struct perf_event *event) dec = true; if (event->attr.ksymbol) atomic_dec(&nr_ksymbol_events); + if (event->attr.bpf_event) + atomic_dec(&nr_bpf_events); if (dec) { if (!atomic_add_unless(&perf_sched_count, -1, 1)) @@ -7747,6 +7750,116 @@ err: WARN_ONCE(1, "%s: Invalid KSYMBOL type 0x%x\n", __func__, ksym_type); } +/* + * bpf program load/unload tracking + */ + +struct perf_bpf_event { + struct bpf_prog *prog; + struct { + struct perf_event_header header; + u16 type; + u16 flags; + u32 id; + u8 tag[BPF_TAG_SIZE]; + } event_id; +}; + +static int perf_event_bpf_match(struct perf_event *event) +{ + return event->attr.bpf_event; +} + +static void perf_event_bpf_output(struct perf_event *event, void *data) +{ + struct perf_bpf_event *bpf_event = data; + struct perf_output_handle handle; + struct perf_sample_data sample; + int ret; + + if (!perf_event_bpf_match(event)) + return; + + perf_event_header__init_id(&bpf_event->event_id.header, + &sample, event); + ret = perf_output_begin(&handle, event, + bpf_event->event_id.header.size); + if (ret) + return; + + perf_output_put(&handle, bpf_event->event_id); + perf_event__output_id_sample(event, &handle, &sample); + + perf_output_end(&handle); +} + +static void perf_event_bpf_emit_ksymbols(struct bpf_prog *prog, + enum perf_bpf_event_type type) +{ + bool unregister = type == PERF_BPF_EVENT_PROG_UNLOAD; + char sym[KSYM_NAME_LEN]; + int i; + + if (prog->aux->func_cnt == 0) { + bpf_get_prog_name(prog, sym); + perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_BPF, + (u64)(unsigned long)prog->bpf_func, + prog->jited_len, unregister, sym); + } else { + for (i = 0; i < prog->aux->func_cnt; i++) { + struct bpf_prog *subprog = prog->aux->func[i]; + + bpf_get_prog_name(subprog, sym); + perf_event_ksymbol( + PERF_RECORD_KSYMBOL_TYPE_BPF, + (u64)(unsigned long)subprog->bpf_func, + subprog->jited_len, unregister, sym); + } + } +} + +void perf_event_bpf_event(struct bpf_prog *prog, + enum perf_bpf_event_type type, + u16 flags) +{ + struct perf_bpf_event bpf_event; + + if (type <= PERF_BPF_EVENT_UNKNOWN || + type >= PERF_BPF_EVENT_MAX) + return; + + switch (type) { + case PERF_BPF_EVENT_PROG_LOAD: + case PERF_BPF_EVENT_PROG_UNLOAD: + if (atomic_read(&nr_ksymbol_events)) + perf_event_bpf_emit_ksymbols(prog, type); + break; + default: + break; + } + + if (!atomic_read(&nr_bpf_events)) + return; + + bpf_event = (struct perf_bpf_event){ + .prog = prog, + .event_id = { + .header = { + .type = PERF_RECORD_BPF_EVENT, + .size = sizeof(bpf_event.event_id), + }, + .type = type, + .flags = flags, + .id = prog->aux->id, + }, + }; + + BUILD_BUG_ON(BPF_TAG_SIZE % sizeof(u64)); + + memcpy(bpf_event.event_id.tag, prog->tag, BPF_TAG_SIZE); + perf_iterate_sb(perf_event_bpf_output, &bpf_event, NULL); +} + void perf_event_itrace_started(struct perf_event *event) { event->attach_state |= PERF_ATTACH_ITRACE; @@ -10008,6 +10121,8 @@ static void account_event(struct perf_event *event) inc = true; if (event->attr.ksymbol) atomic_inc(&nr_ksymbol_events); + if (event->attr.bpf_event) + atomic_inc(&nr_bpf_events); if (inc) { /* -- cgit v1.2.3 From 534fd7aac56a7994d16032f32123def9923e339f Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Sun, 13 Jan 2019 16:01:17 +0200 Subject: IB/mlx5: Manage indirection mkey upon DEVX flow for ODP Manage indirection mkey upon DEVX flow to support ODP. To support a page fault event on the indirection mkey it needs to be part of the device mkey radix tree. Both the creation and the deletion flows for a DEVX object which is indirection mkey were adapted to handle that. Signed-off-by: Yishai Hadas Reviewed-by: Artemy Kovalyov Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/devx.c | 89 +++++++++++++++++++++++++++++++++++- drivers/infiniband/hw/mlx5/main.c | 1 + drivers/infiniband/hw/mlx5/mlx5_ib.h | 6 +++ include/linux/mlx5/driver.h | 1 + 4 files changed, 96 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index b7ff2138ac2a..bbf9a26d8fa6 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -17,12 +17,18 @@ #define UVERBS_MODULE_NAME mlx5_ib #include +enum devx_obj_flags { + DEVX_OBJ_FLAGS_INDIRECT_MKEY = 1 << 0, +}; + #define MLX5_MAX_DESTROY_INBOX_SIZE_DW MLX5_ST_SZ_DW(delete_fte_in) struct devx_obj { struct mlx5_core_dev *mdev; u64 obj_id; u32 dinlen; /* destroy inbox length */ u32 dinbox[MLX5_MAX_DESTROY_INBOX_SIZE_DW]; + u32 flags; + struct mlx5_ib_devx_mr devx_mr; }; struct devx_umem { @@ -1011,6 +1017,36 @@ static void devx_obj_build_destroy_cmd(void *in, void *out, void *din, } } +static int devx_handle_mkey_indirect(struct devx_obj *obj, + struct mlx5_ib_dev *dev, + void *in, void *out) +{ + struct mlx5_mkey_table *table = &dev->mdev->priv.mkey_table; + struct mlx5_ib_devx_mr *devx_mr = &obj->devx_mr; + unsigned long flags; + struct mlx5_core_mkey *mkey; + void *mkc; + u8 key; + int err; + + mkey = &devx_mr->mmkey; + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + key = MLX5_GET(mkc, mkc, mkey_7_0); + mkey->key = mlx5_idx_to_mkey( + MLX5_GET(create_mkey_out, out, mkey_index)) | key; + mkey->type = MLX5_MKEY_INDIRECT_DEVX; + mkey->iova = MLX5_GET64(mkc, mkc, start_addr); + mkey->size = MLX5_GET64(mkc, mkc, len); + mkey->pd = MLX5_GET(mkc, mkc, pd); + devx_mr->ndescs = MLX5_GET(mkc, mkc, translations_octword_size); + + write_lock_irqsave(&table->lock, flags); + err = radix_tree_insert(&table->tree, mlx5_base_mkey(mkey->key), + mkey); + write_unlock_irqrestore(&table->lock, flags); + return err; +} + static int devx_handle_mkey_create(struct mlx5_ib_dev *dev, struct devx_obj *obj, void *in, int in_len) @@ -1030,13 +1066,45 @@ static int devx_handle_mkey_create(struct mlx5_ib_dev *dev, access_mode |= MLX5_GET(mkc, mkc, access_mode_4_2) << 2; if (access_mode == MLX5_MKC_ACCESS_MODE_KLMS || - access_mode == MLX5_MKC_ACCESS_MODE_KSM) + access_mode == MLX5_MKC_ACCESS_MODE_KSM) { + if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) + obj->flags |= DEVX_OBJ_FLAGS_INDIRECT_MKEY; return 0; + } MLX5_SET(create_mkey_in, in, mkey_umem_valid, 1); return 0; } +static void devx_free_indirect_mkey(struct rcu_head *rcu) +{ + kfree(container_of(rcu, struct devx_obj, devx_mr.rcu)); +} + +/* This function to delete from the radix tree needs to be called before + * destroying the underlying mkey. Otherwise a race might occur in case that + * other thread will get the same mkey before this one will be deleted, + * in that case it will fail via inserting to the tree its own data. + * + * Note: + * An error in the destroy is not expected unless there is some other indirect + * mkey which points to this one. In a kernel cleanup flow it will be just + * destroyed in the iterative destruction call. In a user flow, in case + * the application didn't close in the expected order it's its own problem, + * the mkey won't be part of the tree, in both cases the kernel is safe. + */ +static void devx_cleanup_mkey(struct devx_obj *obj) +{ + struct mlx5_mkey_table *table = &obj->mdev->priv.mkey_table; + struct mlx5_core_mkey *del_mkey; + unsigned long flags; + + write_lock_irqsave(&table->lock, flags); + del_mkey = radix_tree_delete(&table->tree, + mlx5_base_mkey(obj->devx_mr.mmkey.key)); + write_unlock_irqrestore(&table->lock, flags); +} + static int devx_obj_cleanup(struct ib_uobject *uobject, enum rdma_remove_reason why) { @@ -1044,10 +1112,21 @@ static int devx_obj_cleanup(struct ib_uobject *uobject, struct devx_obj *obj = uobject->object; int ret; + if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) + devx_cleanup_mkey(obj); + ret = mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, out, sizeof(out)); if (ib_is_destroy_retryable(ret, why, uobject)) return ret; + if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) { + struct mlx5_ib_dev *dev = to_mdev(uobject->context->device); + + call_srcu(&dev->mr_srcu, &obj->devx_mr.rcu, + devx_free_indirect_mkey); + return ret; + } + kfree(obj); return ret; } @@ -1108,6 +1187,12 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)( &obj_id); WARN_ON(obj->dinlen > MLX5_MAX_DESTROY_INBOX_SIZE_DW * sizeof(u32)); + if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) { + err = devx_handle_mkey_indirect(obj, dev, cmd_in, cmd_out); + if (err) + goto obj_destroy; + } + err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT, cmd_out, cmd_out_len); if (err) goto obj_destroy; @@ -1116,6 +1201,8 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)( return 0; obj_destroy: + if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) + devx_cleanup_mkey(obj); mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, out, sizeof(out)); obj_free: kfree(obj); diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 61064b7171fc..ae00f994673b 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -5724,6 +5724,7 @@ void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev) { mlx5_ib_cleanup_multiport_master(dev); if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { + srcu_barrier(&dev->mr_srcu); cleanup_srcu_struct(&dev->mr_srcu); drain_workqueue(dev->advise_mr_wq); destroy_workqueue(dev->advise_mr_wq); diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index b0a37ca2a714..819207190343 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -602,6 +602,12 @@ struct mlx5_ib_mw { int ndescs; }; +struct mlx5_ib_devx_mr { + struct mlx5_core_mkey mmkey; + int ndescs; + struct rcu_head rcu; +}; + struct mlx5_ib_umr_context { struct ib_cqe cqe; enum ib_wc_status status; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index b6f5839f129a..619d6fee96a1 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -364,6 +364,7 @@ struct mlx5_core_sig_ctx { enum { MLX5_MKEY_MR = 1, MLX5_MKEY_MW, + MLX5_MKEY_INDIRECT_DEVX, }; struct mlx5_core_mkey { -- cgit v1.2.3 From 1278cf66cf4b1c3d30e311200b50c45457c92baa Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Tue, 15 Jan 2019 15:18:56 +1100 Subject: nvram: Replace nvram_* function exports with static functions Replace nvram_* functions with static functions in nvram.h. These will become wrappers for struct nvram_ops method calls. This patch effectively disables existing NVRAM functionality so as to allow the rest of the series to be bisected without build failures. That functionality is gradually re-implemented in subsequent patches. Replace the sole validate-checksum-and-read-byte sequence with a call to nvram_read() which will gain the same semantics in subsequent patches. Remove unused exports. Acked-by: Geert Uytterhoeven Signed-off-by: Finn Thain Signed-off-by: Greg Kroah-Hartman --- arch/m68k/atari/nvram.c | 39 +++------------------------------------ drivers/char/nvram.c | 27 +++++---------------------- drivers/scsi/atari_scsi.c | 8 +++++--- include/linux/nvram.h | 32 +++++++++++++++++++++++++------- 4 files changed, 38 insertions(+), 68 deletions(-) (limited to 'include/linux') diff --git a/arch/m68k/atari/nvram.c b/arch/m68k/atari/nvram.c index a8c457e40b0b..1d767847ffa6 100644 --- a/arch/m68k/atari/nvram.c +++ b/arch/m68k/atari/nvram.c @@ -34,38 +34,17 @@ * periodic 11 min sync from kernel/time/ntp.c vs. this driver.) */ -unsigned char __nvram_read_byte(int i) +static unsigned char __nvram_read_byte(int i) { return CMOS_READ(NVRAM_FIRST_BYTE + i); } -unsigned char nvram_read_byte(int i) -{ - unsigned long flags; - unsigned char c; - - spin_lock_irqsave(&rtc_lock, flags); - c = __nvram_read_byte(i); - spin_unlock_irqrestore(&rtc_lock, flags); - return c; -} -EXPORT_SYMBOL(nvram_read_byte); - /* This races nicely with trying to read with checksum checking */ -void __nvram_write_byte(unsigned char c, int i) +static void __nvram_write_byte(unsigned char c, int i) { CMOS_WRITE(c, NVRAM_FIRST_BYTE + i); } -void nvram_write_byte(unsigned char c, int i) -{ - unsigned long flags; - - spin_lock_irqsave(&rtc_lock, flags); - __nvram_write_byte(c, i); - spin_unlock_irqrestore(&rtc_lock, flags); -} - /* On Ataris, the checksum is over all bytes except the checksum bytes * themselves; these are at the very end. */ @@ -73,7 +52,7 @@ void nvram_write_byte(unsigned char c, int i) #define ATARI_CKS_RANGE_END 47 #define ATARI_CKS_LOC 48 -int __nvram_check_checksum(void) +static int __nvram_check_checksum(void) { int i; unsigned char sum = 0; @@ -84,18 +63,6 @@ int __nvram_check_checksum(void) (__nvram_read_byte(ATARI_CKS_LOC + 1) == (sum & 0xff)); } -int nvram_check_checksum(void) -{ - unsigned long flags; - int rv; - - spin_lock_irqsave(&rtc_lock, flags); - rv = __nvram_check_checksum(); - spin_unlock_irqrestore(&rtc_lock, flags); - return rv; -} -EXPORT_SYMBOL(nvram_check_checksum); - static void __nvram_set_checksum(void) { int i; diff --git a/drivers/char/nvram.c b/drivers/char/nvram.c index c660cff9faf4..c98775bfd896 100644 --- a/drivers/char/nvram.c +++ b/drivers/char/nvram.c @@ -74,13 +74,12 @@ static int nvram_open_mode; /* special open modes */ * periodic 11 min sync from kernel/time/ntp.c vs. this driver.) */ -unsigned char __nvram_read_byte(int i) +static unsigned char __nvram_read_byte(int i) { return CMOS_READ(NVRAM_FIRST_BYTE + i); } -EXPORT_SYMBOL(__nvram_read_byte); -unsigned char nvram_read_byte(int i) +static unsigned char pc_nvram_read_byte(int i) { unsigned long flags; unsigned char c; @@ -90,16 +89,14 @@ unsigned char nvram_read_byte(int i) spin_unlock_irqrestore(&rtc_lock, flags); return c; } -EXPORT_SYMBOL(nvram_read_byte); /* This races nicely with trying to read with checksum checking (nvram_read) */ -void __nvram_write_byte(unsigned char c, int i) +static void __nvram_write_byte(unsigned char c, int i) { CMOS_WRITE(c, NVRAM_FIRST_BYTE + i); } -EXPORT_SYMBOL(__nvram_write_byte); -void nvram_write_byte(unsigned char c, int i) +static void pc_nvram_write_byte(unsigned char c, int i) { unsigned long flags; @@ -107,14 +104,13 @@ void nvram_write_byte(unsigned char c, int i) __nvram_write_byte(c, i); spin_unlock_irqrestore(&rtc_lock, flags); } -EXPORT_SYMBOL(nvram_write_byte); /* On PCs, the checksum is built only over bytes 2..31 */ #define PC_CKS_RANGE_START 2 #define PC_CKS_RANGE_END 31 #define PC_CKS_LOC 32 -int __nvram_check_checksum(void) +static int __nvram_check_checksum(void) { int i; unsigned short sum = 0; @@ -126,19 +122,6 @@ int __nvram_check_checksum(void) __nvram_read_byte(PC_CKS_LOC+1); return (sum & 0xffff) == expect; } -EXPORT_SYMBOL(__nvram_check_checksum); - -int nvram_check_checksum(void) -{ - unsigned long flags; - int rv; - - spin_lock_irqsave(&rtc_lock, flags); - rv = __nvram_check_checksum(); - spin_unlock_irqrestore(&rtc_lock, flags); - return rv; -} -EXPORT_SYMBOL(nvram_check_checksum); static void __nvram_set_checksum(void) { diff --git a/drivers/scsi/atari_scsi.c b/drivers/scsi/atari_scsi.c index 78b43200c99e..e809493d0d06 100644 --- a/drivers/scsi/atari_scsi.c +++ b/drivers/scsi/atari_scsi.c @@ -759,13 +759,15 @@ static int __init atari_scsi_probe(struct platform_device *pdev) atari_scsi_template.this_id = setup_hostid & 7; } else if (IS_REACHABLE(CONFIG_NVRAM)) { /* Test if a host id is set in the NVRam */ - if (ATARIHW_PRESENT(TT_CLK) && nvram_check_checksum()) { - unsigned char b = nvram_read_byte(16); + if (ATARIHW_PRESENT(TT_CLK)) { + unsigned char b; + loff_t offset = 16; + ssize_t count = nvram_read(&b, 1, &offset); /* Arbitration enabled? (for TOS) * If yes, use configured host ID */ - if (b & 0x80) + if ((count == 1) && (b & 0x80)) atari_scsi_template.this_id = b & 7; } } diff --git a/include/linux/nvram.h b/include/linux/nvram.h index 28bfb9ab94ca..eb5b52a9a747 100644 --- a/include/linux/nvram.h +++ b/include/linux/nvram.h @@ -2,13 +2,31 @@ #ifndef _LINUX_NVRAM_H #define _LINUX_NVRAM_H +#include #include -/* __foo is foo without grabbing the rtc_lock - get it yourself */ -extern unsigned char __nvram_read_byte(int i); -extern unsigned char nvram_read_byte(int i); -extern void __nvram_write_byte(unsigned char c, int i); -extern void nvram_write_byte(unsigned char c, int i); -extern int __nvram_check_checksum(void); -extern int nvram_check_checksum(void); +static inline ssize_t nvram_get_size(void) +{ + return -ENODEV; +} + +static inline unsigned char nvram_read_byte(int addr) +{ + return 0xFF; +} + +static inline void nvram_write_byte(unsigned char val, int addr) +{ +} + +static inline ssize_t nvram_read(char *buf, size_t count, loff_t *ppos) +{ + return -ENODEV; +} + +static inline ssize_t nvram_write(char *buf, size_t count, loff_t *ppos) +{ + return -ENODEV; +} + #endif /* _LINUX_NVRAM_H */ -- cgit v1.2.3 From a084dbf6592c22468eb946014b2e731fb42da7a9 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Tue, 15 Jan 2019 15:18:56 +1100 Subject: m68k/atari: Implement arch_nvram_ops struct By implementing an arch_nvram_ops struct, a platform can re-use the drivers/char/nvram.c module without needing any arch-specific code in that module. Atari does so here. Acked-by: Geert Uytterhoeven Signed-off-by: Finn Thain Signed-off-by: Greg Kroah-Hartman --- arch/m68k/atari/nvram.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/nvram.h | 14 ++++++++++++++ 2 files changed, 63 insertions(+) (limited to 'include/linux') diff --git a/arch/m68k/atari/nvram.c b/arch/m68k/atari/nvram.c index 1d767847ffa6..e75adebe6e7d 100644 --- a/arch/m68k/atari/nvram.c +++ b/arch/m68k/atari/nvram.c @@ -74,6 +74,55 @@ static void __nvram_set_checksum(void) __nvram_write_byte(sum, ATARI_CKS_LOC + 1); } +static ssize_t atari_nvram_read(char *buf, size_t count, loff_t *ppos) +{ + char *p = buf; + loff_t i; + + spin_lock_irq(&rtc_lock); + if (!__nvram_check_checksum()) { + spin_unlock_irq(&rtc_lock); + return -EIO; + } + for (i = *ppos; count > 0 && i < NVRAM_BYTES; --count, ++i, ++p) + *p = __nvram_read_byte(i); + spin_unlock_irq(&rtc_lock); + + *ppos = i; + return p - buf; +} + +static ssize_t atari_nvram_write(char *buf, size_t count, loff_t *ppos) +{ + char *p = buf; + loff_t i; + + spin_lock_irq(&rtc_lock); + if (!__nvram_check_checksum()) { + spin_unlock_irq(&rtc_lock); + return -EIO; + } + for (i = *ppos; count > 0 && i < NVRAM_BYTES; --count, ++i, ++p) + __nvram_write_byte(*p, i); + __nvram_set_checksum(); + spin_unlock_irq(&rtc_lock); + + *ppos = i; + return p - buf; +} + +static ssize_t atari_nvram_get_size(void) +{ + return NVRAM_BYTES; +} + +const struct nvram_ops arch_nvram_ops = { + .read = atari_nvram_read, + .write = atari_nvram_write, + .get_size = atari_nvram_get_size, +}; +EXPORT_SYMBOL(arch_nvram_ops); + #ifdef CONFIG_PROC_FS static struct { unsigned char val; diff --git a/include/linux/nvram.h b/include/linux/nvram.h index eb5b52a9a747..a1e01dc89759 100644 --- a/include/linux/nvram.h +++ b/include/linux/nvram.h @@ -5,8 +5,18 @@ #include #include +struct nvram_ops { + ssize_t (*get_size)(void); + ssize_t (*read)(char *, size_t, loff_t *); + ssize_t (*write)(char *, size_t, loff_t *); +}; + +extern const struct nvram_ops arch_nvram_ops; + static inline ssize_t nvram_get_size(void) { + if (arch_nvram_ops.get_size) + return arch_nvram_ops.get_size(); return -ENODEV; } @@ -21,11 +31,15 @@ static inline void nvram_write_byte(unsigned char val, int addr) static inline ssize_t nvram_read(char *buf, size_t count, loff_t *ppos) { + if (arch_nvram_ops.read) + return arch_nvram_ops.read(buf, count, ppos); return -ENODEV; } static inline ssize_t nvram_write(char *buf, size_t count, loff_t *ppos) { + if (arch_nvram_ops.write) + return arch_nvram_ops.write(buf, count, ppos); return -ENODEV; } -- cgit v1.2.3 From a156c7ba669c65b55c7afcc3994e1199cc0cad47 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Tue, 15 Jan 2019 15:18:56 +1100 Subject: powerpc: Replace nvram_* extern declarations with standard header Remove the nvram_read_byte() and nvram_write_byte() declarations in powerpc/include/asm/nvram.h and use the cross-platform static functions in linux/nvram.h instead. Tested-by: Stan Johnson Signed-off-by: Finn Thain Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/nvram.h | 6 ------ arch/powerpc/kernel/setup_32.c | 25 +------------------------ drivers/char/generic_nvram.c | 1 + drivers/video/fbdev/matrox/matroxfb_base.c | 2 +- include/linux/nvram.h | 3 +++ 5 files changed, 6 insertions(+), 31 deletions(-) (limited to 'include/linux') diff --git a/arch/powerpc/include/asm/nvram.h b/arch/powerpc/include/asm/nvram.h index 09a518bb7c03..56a388da9c4f 100644 --- a/arch/powerpc/include/asm/nvram.h +++ b/arch/powerpc/include/asm/nvram.h @@ -98,10 +98,4 @@ extern int nvram_write_os_partition(struct nvram_os_partition *part, unsigned int err_type, unsigned int error_log_cnt); -/* Determine NVRAM size */ -extern ssize_t nvram_get_size(void); - -/* Normal access to NVRAM */ -extern unsigned char nvram_read_byte(int i); -extern void nvram_write_byte(unsigned char c, int i); #endif /* _ASM_POWERPC_NVRAM_H */ diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 947f904688b0..f5107796e2d7 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -149,30 +150,6 @@ __setup("l3cr=", ppc_setup_l3cr); #ifdef CONFIG_GENERIC_NVRAM -/* Generic nvram hooks used by drivers/char/gen_nvram.c */ -unsigned char nvram_read_byte(int addr) -{ - if (ppc_md.nvram_read_val) - return ppc_md.nvram_read_val(addr); - return 0xff; -} -EXPORT_SYMBOL(nvram_read_byte); - -void nvram_write_byte(unsigned char val, int addr) -{ - if (ppc_md.nvram_write_val) - ppc_md.nvram_write_val(addr, val); -} -EXPORT_SYMBOL(nvram_write_byte); - -ssize_t nvram_get_size(void) -{ - if (ppc_md.nvram_size) - return ppc_md.nvram_size(); - return -1; -} -EXPORT_SYMBOL(nvram_get_size); - void nvram_sync(void) { if (ppc_md.nvram_sync) diff --git a/drivers/char/generic_nvram.c b/drivers/char/generic_nvram.c index ff5394f47587..0c22b9503e84 100644 --- a/drivers/char/generic_nvram.c +++ b/drivers/char/generic_nvram.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/video/fbdev/matrox/matroxfb_base.c b/drivers/video/fbdev/matrox/matroxfb_base.c index 838869c6490c..0a4e5bad33f4 100644 --- a/drivers/video/fbdev/matrox/matroxfb_base.c +++ b/drivers/video/fbdev/matrox/matroxfb_base.c @@ -111,12 +111,12 @@ #include "matroxfb_g450.h" #include #include +#include #include #include #ifdef CONFIG_PPC_PMAC #include -unsigned char nvram_read_byte(int); static int default_vmode = VMODE_NVRAM; static int default_cmode = CMODE_NVRAM; #endif diff --git a/include/linux/nvram.h b/include/linux/nvram.h index a1e01dc89759..79431dab87a1 100644 --- a/include/linux/nvram.h +++ b/include/linux/nvram.h @@ -15,8 +15,11 @@ extern const struct nvram_ops arch_nvram_ops; static inline ssize_t nvram_get_size(void) { +#ifdef CONFIG_PPC +#else if (arch_nvram_ops.get_size) return arch_nvram_ops.get_size(); +#endif return -ENODEV; } -- cgit v1.2.3 From d5bbb5021ce8d9ff561c7469f5b4589ccb3bc4a6 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Tue, 15 Jan 2019 15:18:56 +1100 Subject: char/nvram: Adopt arch_nvram_ops NVRAMs on different platforms and architectures have different attributes and access methods. E.g. some platforms have byte-at-a-time accessor functions while others have byte-range accessor functions. Some have checksum functionality while others do not. By calling ops struct methods via the common wrapper functions, the nvram module and other drivers can make use of the available NVRAM functionality in a portable way. Signed-off-by: Finn Thain Signed-off-by: Greg Kroah-Hartman --- drivers/char/nvram.c | 30 ++++++++++++++++++++++++------ include/linux/nvram.h | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/drivers/char/nvram.c b/drivers/char/nvram.c index c98775bfd896..2df391f78986 100644 --- a/drivers/char/nvram.c +++ b/drivers/char/nvram.c @@ -52,9 +52,11 @@ static DEFINE_MUTEX(nvram_mutex); static DEFINE_SPINLOCK(nvram_state_lock); static int nvram_open_cnt; /* #times opened */ static int nvram_open_mode; /* special open modes */ +static ssize_t nvram_size; #define NVRAM_WRITE 1 /* opened for writing (exclusive) */ #define NVRAM_EXCL 2 /* opened with O_EXCL */ +#ifdef CONFIG_X86 /* * These functions are provided to be called internally or by other parts of * the kernel. It's up to the caller to ensure correct checksum before reading @@ -145,6 +147,19 @@ void nvram_set_checksum(void) } #endif /* 0 */ +static ssize_t pc_nvram_get_size(void) +{ + return NVRAM_BYTES; +} + +const struct nvram_ops arch_nvram_ops = { + .read_byte = pc_nvram_read_byte, + .write_byte = pc_nvram_write_byte, + .get_size = pc_nvram_get_size, +}; +EXPORT_SYMBOL(arch_nvram_ops); +#endif /* CONFIG_X86 */ + /* * The are the file operation function for user access to /dev/nvram */ @@ -152,7 +167,7 @@ void nvram_set_checksum(void) static loff_t nvram_misc_llseek(struct file *file, loff_t offset, int origin) { return generic_file_llseek_size(file, offset, origin, MAX_LFS_FILESIZE, - NVRAM_BYTES); + nvram_size); } static ssize_t nvram_misc_read(struct file *file, char __user *buf, @@ -303,8 +318,7 @@ static int nvram_misc_release(struct inode *inode, struct file *file) return 0; } -#ifdef CONFIG_PROC_FS - +#if defined(CONFIG_X86) && defined(CONFIG_PROC_FS) static const char * const floppy_types[] = { "none", "5.25'' 360k", "5.25'' 1.2M", "3.5'' 720k", "3.5'' 1.44M", "3.5'' 2.88M", "3.5'' 2.88M" @@ -394,7 +408,7 @@ static int nvram_proc_read(struct seq_file *seq, void *offset) return 0; } -#endif /* CONFIG_PROC_FS */ +#endif /* CONFIG_X86 && CONFIG_PROC_FS */ static const struct file_operations nvram_misc_fops = { .owner = THIS_MODULE, @@ -416,13 +430,17 @@ static int __init nvram_module_init(void) { int ret; + nvram_size = nvram_get_size(); + if (nvram_size < 0) + return nvram_size; + ret = misc_register(&nvram_misc); if (ret) { pr_err("nvram: can't misc_register on minor=%d\n", NVRAM_MINOR); return ret; } -#ifdef CONFIG_PROC_FS +#if defined(CONFIG_X86) && defined(CONFIG_PROC_FS) if (!proc_create_single("driver/nvram", 0, NULL, nvram_proc_read)) { pr_err("nvram: can't create /proc/driver/nvram\n"); misc_deregister(&nvram_misc); @@ -436,7 +454,7 @@ static int __init nvram_module_init(void) static void __exit nvram_module_exit(void) { -#ifdef CONFIG_PROC_FS +#if defined(CONFIG_X86) && defined(CONFIG_PROC_FS) remove_proc_entry("driver/nvram", NULL); #endif misc_deregister(&nvram_misc); diff --git a/include/linux/nvram.h b/include/linux/nvram.h index 79431dab87a1..bb4ea8cc6ea6 100644 --- a/include/linux/nvram.h +++ b/include/linux/nvram.h @@ -5,8 +5,30 @@ #include #include +/** + * struct nvram_ops - NVRAM functionality made available to drivers + * @read: validate checksum (if any) then load a range of bytes from NVRAM + * @write: store a range of bytes to NVRAM then update checksum (if any) + * @read_byte: load a single byte from NVRAM + * @write_byte: store a single byte to NVRAM + * @get_size: return the fixed number of bytes in the NVRAM + * + * Architectures which provide an nvram ops struct need not implement all + * of these methods. If the NVRAM hardware can be accessed only one byte + * at a time then it may be sufficient to provide .read_byte and .write_byte. + * If the NVRAM has a checksum (and it is to be checked) the .read and + * .write methods can be used to implement that efficiently. + * + * Portable drivers may use the wrapper functions defined here. + * The nvram_read() and nvram_write() functions call the .read and .write + * methods when available and fall back on the .read_byte and .write_byte + * methods otherwise. + */ + struct nvram_ops { ssize_t (*get_size)(void); + unsigned char (*read_byte)(int); + void (*write_byte)(unsigned char, int); ssize_t (*read)(char *, size_t, loff_t *); ssize_t (*write)(char *, size_t, loff_t *); }; @@ -25,11 +47,21 @@ static inline ssize_t nvram_get_size(void) static inline unsigned char nvram_read_byte(int addr) { +#ifdef CONFIG_PPC +#else + if (arch_nvram_ops.read_byte) + return arch_nvram_ops.read_byte(addr); +#endif return 0xFF; } static inline void nvram_write_byte(unsigned char val, int addr) { +#ifdef CONFIG_PPC +#else + if (arch_nvram_ops.write_byte) + arch_nvram_ops.write_byte(val, addr); +#endif } static inline ssize_t nvram_read(char *buf, size_t count, loff_t *ppos) -- cgit v1.2.3 From 2d58636e0af724f38acad25246c1625efec36122 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Tue, 15 Jan 2019 15:18:56 +1100 Subject: char/nvram: Allow the set_checksum and initialize ioctls to be omitted The drivers/char/nvram.c module has previously supported only RTC "CMOS" NVRAM, for which it provides appropriate checksum ioctls. Make these ioctls optional so the module can be re-used with other kinds of NVRAM. The ops struct methods that implement the ioctls now return error codes so that a multi-platform kernel binary can do the right thing when running on hardware without a suitable NVRAM. Signed-off-by: Finn Thain Signed-off-by: Greg Kroah-Hartman --- drivers/char/nvram.c | 70 +++++++++++++++++++++++++++++---------------------- include/linux/nvram.h | 2 ++ 2 files changed, 42 insertions(+), 30 deletions(-) (limited to 'include/linux') diff --git a/drivers/char/nvram.c b/drivers/char/nvram.c index 2df391f78986..f88ef41d0598 100644 --- a/drivers/char/nvram.c +++ b/drivers/char/nvram.c @@ -136,16 +136,25 @@ static void __nvram_set_checksum(void) __nvram_write_byte(sum & 0xff, PC_CKS_LOC + 1); } -#if 0 -void nvram_set_checksum(void) +static long pc_nvram_set_checksum(void) { - unsigned long flags; + spin_lock_irq(&rtc_lock); + __nvram_set_checksum(); + spin_unlock_irq(&rtc_lock); + return 0; +} - spin_lock_irqsave(&rtc_lock, flags); +static long pc_nvram_initialize(void) +{ + ssize_t i; + + spin_lock_irq(&rtc_lock); + for (i = 0; i < NVRAM_BYTES; ++i) + __nvram_write_byte(0, i); __nvram_set_checksum(); - spin_unlock_irqrestore(&rtc_lock, flags); + spin_unlock_irq(&rtc_lock); + return 0; } -#endif /* 0 */ static ssize_t pc_nvram_get_size(void) { @@ -156,6 +165,8 @@ const struct nvram_ops arch_nvram_ops = { .read_byte = pc_nvram_read_byte, .write_byte = pc_nvram_write_byte, .get_size = pc_nvram_get_size, + .set_checksum = pc_nvram_set_checksum, + .initialize = pc_nvram_initialize, }; EXPORT_SYMBOL(arch_nvram_ops); #endif /* CONFIG_X86 */ @@ -241,51 +252,50 @@ checksum_err: static long nvram_misc_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { - int i; + long ret = -ENOTTY; switch (cmd) { - case NVRAM_INIT: /* initialize NVRAM contents and checksum */ if (!capable(CAP_SYS_ADMIN)) return -EACCES; - mutex_lock(&nvram_mutex); - spin_lock_irq(&rtc_lock); - - for (i = 0; i < NVRAM_BYTES; ++i) - __nvram_write_byte(0, i); - __nvram_set_checksum(); - - spin_unlock_irq(&rtc_lock); - mutex_unlock(&nvram_mutex); - return 0; - + if (arch_nvram_ops.initialize != NULL) { + mutex_lock(&nvram_mutex); + ret = arch_nvram_ops.initialize(); + mutex_unlock(&nvram_mutex); + } + break; case NVRAM_SETCKS: /* just set checksum, contents unchanged (maybe useful after * checksum garbaged somehow...) */ if (!capable(CAP_SYS_ADMIN)) return -EACCES; - mutex_lock(&nvram_mutex); - spin_lock_irq(&rtc_lock); - __nvram_set_checksum(); - spin_unlock_irq(&rtc_lock); - mutex_unlock(&nvram_mutex); - return 0; - - default: - return -ENOTTY; + if (arch_nvram_ops.set_checksum != NULL) { + mutex_lock(&nvram_mutex); + ret = arch_nvram_ops.set_checksum(); + mutex_unlock(&nvram_mutex); + } + break; } + return ret; } static int nvram_misc_open(struct inode *inode, struct file *file) { spin_lock(&nvram_state_lock); + /* Prevent multiple readers/writers if desired. */ if ((nvram_open_cnt && (file->f_flags & O_EXCL)) || - (nvram_open_mode & NVRAM_EXCL) || - ((file->f_mode & FMODE_WRITE) && (nvram_open_mode & NVRAM_WRITE))) { + (nvram_open_mode & NVRAM_EXCL)) { + spin_unlock(&nvram_state_lock); + return -EBUSY; + } + + /* Prevent multiple writers if the set_checksum ioctl is implemented. */ + if ((arch_nvram_ops.set_checksum != NULL) && + (file->f_mode & FMODE_WRITE) && (nvram_open_mode & NVRAM_WRITE)) { spin_unlock(&nvram_state_lock); return -EBUSY; } diff --git a/include/linux/nvram.h b/include/linux/nvram.h index bb4ea8cc6ea6..31c763087746 100644 --- a/include/linux/nvram.h +++ b/include/linux/nvram.h @@ -31,6 +31,8 @@ struct nvram_ops { void (*write_byte)(unsigned char, int); ssize_t (*read)(char *, size_t, loff_t *); ssize_t (*write)(char *, size_t, loff_t *); + long (*initialize)(void); + long (*set_checksum)(void); }; extern const struct nvram_ops arch_nvram_ops; -- cgit v1.2.3 From 109b3a89a7c48405d61a05d7a1720581a4f1574c Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Tue, 15 Jan 2019 15:18:56 +1100 Subject: char/nvram: Implement NVRAM read/write methods Refactor the RTC "CMOS" NVRAM functions so that they can be used as arch_nvram_ops methods. Checksumming logic is moved from the misc device operations to the nvram read/write operations. This makes the misc device implementation more generic. This preserves the locking mechanism such that "read if checksum valid" and "write and update checksum" remain atomic operations. Some platforms implement byte-range read/write methods which are similar to file_operations struct methods. Other platforms provide only byte-at-a-time methods. The former are more efficient but may be unavailable so fall back on the latter methods when necessary. Tested-by: Stan Johnson Signed-off-by: Finn Thain Signed-off-by: Greg Kroah-Hartman --- drivers/char/nvram.c | 120 +++++++++++++++++++++++++++++++------------------- include/linux/nvram.h | 32 +++++++++++++- 2 files changed, 104 insertions(+), 48 deletions(-) (limited to 'include/linux') diff --git a/drivers/char/nvram.c b/drivers/char/nvram.c index f88ef41d0598..adcc213c331e 100644 --- a/drivers/char/nvram.c +++ b/drivers/char/nvram.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include #include @@ -161,7 +162,46 @@ static ssize_t pc_nvram_get_size(void) return NVRAM_BYTES; } +static ssize_t pc_nvram_read(char *buf, size_t count, loff_t *ppos) +{ + char *p = buf; + loff_t i; + + spin_lock_irq(&rtc_lock); + if (!__nvram_check_checksum()) { + spin_unlock_irq(&rtc_lock); + return -EIO; + } + for (i = *ppos; count > 0 && i < NVRAM_BYTES; --count, ++i, ++p) + *p = __nvram_read_byte(i); + spin_unlock_irq(&rtc_lock); + + *ppos = i; + return p - buf; +} + +static ssize_t pc_nvram_write(char *buf, size_t count, loff_t *ppos) +{ + char *p = buf; + loff_t i; + + spin_lock_irq(&rtc_lock); + if (!__nvram_check_checksum()) { + spin_unlock_irq(&rtc_lock); + return -EIO; + } + for (i = *ppos; count > 0 && i < NVRAM_BYTES; --count, ++i, ++p) + __nvram_write_byte(*p, i); + __nvram_set_checksum(); + spin_unlock_irq(&rtc_lock); + + *ppos = i; + return p - buf; +} + const struct nvram_ops arch_nvram_ops = { + .read = pc_nvram_read, + .write = pc_nvram_write, .read_byte = pc_nvram_read_byte, .write_byte = pc_nvram_write_byte, .get_size = pc_nvram_get_size, @@ -184,69 +224,57 @@ static loff_t nvram_misc_llseek(struct file *file, loff_t offset, int origin) static ssize_t nvram_misc_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { - unsigned char contents[NVRAM_BYTES]; - unsigned i = *ppos; - unsigned char *tmp; - - spin_lock_irq(&rtc_lock); + char *tmp; + ssize_t ret; - if (!__nvram_check_checksum()) - goto checksum_err; - for (tmp = contents; count-- > 0 && i < NVRAM_BYTES; ++i, ++tmp) - *tmp = __nvram_read_byte(i); + if (!access_ok(buf, count)) + return -EFAULT; + if (*ppos >= nvram_size) + return 0; - spin_unlock_irq(&rtc_lock); + count = min_t(size_t, count, nvram_size - *ppos); + count = min_t(size_t, count, PAGE_SIZE); - if (copy_to_user(buf, contents, tmp - contents)) - return -EFAULT; + tmp = kmalloc(count, GFP_KERNEL); + if (!tmp) + return -ENOMEM; - *ppos = i; + ret = nvram_read(tmp, count, ppos); + if (ret <= 0) + goto out; - return tmp - contents; + if (copy_to_user(buf, tmp, ret)) { + *ppos -= ret; + ret = -EFAULT; + } -checksum_err: - spin_unlock_irq(&rtc_lock); - return -EIO; +out: + kfree(tmp); + return ret; } static ssize_t nvram_misc_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { - unsigned char contents[NVRAM_BYTES]; - unsigned i = *ppos; - unsigned char *tmp; - - if (i >= NVRAM_BYTES) - return 0; /* Past EOF */ - - if (count > NVRAM_BYTES - i) - count = NVRAM_BYTES - i; - if (count > NVRAM_BYTES) - return -EFAULT; /* Can't happen, but prove it to gcc */ + char *tmp; + ssize_t ret; - if (copy_from_user(contents, buf, count)) + if (!access_ok(buf, count)) return -EFAULT; + if (*ppos >= nvram_size) + return 0; - spin_lock_irq(&rtc_lock); - - if (!__nvram_check_checksum()) - goto checksum_err; - - for (tmp = contents; count--; ++i, ++tmp) - __nvram_write_byte(*tmp, i); + count = min_t(size_t, count, nvram_size - *ppos); + count = min_t(size_t, count, PAGE_SIZE); - __nvram_set_checksum(); - - spin_unlock_irq(&rtc_lock); + tmp = memdup_user(buf, count); + if (IS_ERR(tmp)) + return PTR_ERR(tmp); - *ppos = i; - - return tmp - contents; - -checksum_err: - spin_unlock_irq(&rtc_lock); - return -EIO; + ret = nvram_write(tmp, count, ppos); + kfree(tmp); + return ret; } static long nvram_misc_ioctl(struct file *file, unsigned int cmd, diff --git a/include/linux/nvram.h b/include/linux/nvram.h index 31c763087746..9df85703735c 100644 --- a/include/linux/nvram.h +++ b/include/linux/nvram.h @@ -66,18 +66,46 @@ static inline void nvram_write_byte(unsigned char val, int addr) #endif } +static inline ssize_t nvram_read_bytes(char *buf, size_t count, loff_t *ppos) +{ + ssize_t nvram_size = nvram_get_size(); + loff_t i; + char *p = buf; + + if (nvram_size < 0) + return nvram_size; + for (i = *ppos; count > 0 && i < nvram_size; ++i, ++p, --count) + *p = nvram_read_byte(i); + *ppos = i; + return p - buf; +} + +static inline ssize_t nvram_write_bytes(char *buf, size_t count, loff_t *ppos) +{ + ssize_t nvram_size = nvram_get_size(); + loff_t i; + char *p = buf; + + if (nvram_size < 0) + return nvram_size; + for (i = *ppos; count > 0 && i < nvram_size; ++i, ++p, --count) + nvram_write_byte(*p, i); + *ppos = i; + return p - buf; +} + static inline ssize_t nvram_read(char *buf, size_t count, loff_t *ppos) { if (arch_nvram_ops.read) return arch_nvram_ops.read(buf, count, ppos); - return -ENODEV; + return nvram_read_bytes(buf, count, ppos); } static inline ssize_t nvram_write(char *buf, size_t count, loff_t *ppos) { if (arch_nvram_ops.write) return arch_nvram_ops.write(buf, count, ppos); - return -ENODEV; + return nvram_write_bytes(buf, count, ppos); } #endif /* _LINUX_NVRAM_H */ -- cgit v1.2.3 From 95ac14b8a32817dcd1f13ae4787891484966d2d5 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Tue, 15 Jan 2019 15:18:56 +1100 Subject: powerpc: Implement nvram ioctls Add the powerpc-specific ioctls to the nvram module. This allows the nvram module to replace the generic_nvram module. Tested-by: Stan Johnson Signed-off-by: Finn Thain Signed-off-by: Greg Kroah-Hartman --- drivers/char/nvram.c | 38 ++++++++++++++++++++++++++++++++++++++ include/linux/nvram.h | 2 ++ 2 files changed, 40 insertions(+) (limited to 'include/linux') diff --git a/drivers/char/nvram.c b/drivers/char/nvram.c index c9e295d73dc5..944f05fddacd 100644 --- a/drivers/char/nvram.c +++ b/drivers/char/nvram.c @@ -48,6 +48,9 @@ #include #include +#ifdef CONFIG_PPC +#include +#endif static DEFINE_MUTEX(nvram_mutex); static DEFINE_SPINLOCK(nvram_state_lock); @@ -283,6 +286,38 @@ static long nvram_misc_ioctl(struct file *file, unsigned int cmd, long ret = -ENOTTY; switch (cmd) { +#ifdef CONFIG_PPC + case OBSOLETE_PMAC_NVRAM_GET_OFFSET: + pr_warn("nvram: Using obsolete PMAC_NVRAM_GET_OFFSET ioctl\n"); + /* fall through */ + case IOC_NVRAM_GET_OFFSET: + ret = -EINVAL; +#ifdef CONFIG_PPC_PMAC + if (machine_is(powermac)) { + int part, offset; + + if (copy_from_user(&part, (void __user *)arg, + sizeof(part)) != 0) + return -EFAULT; + if (part < pmac_nvram_OF || part > pmac_nvram_NR) + return -EINVAL; + offset = pmac_get_partition(part); + if (copy_to_user((void __user *)arg, + &offset, sizeof(offset)) != 0) + return -EFAULT; + ret = 0; + } +#endif + break; + case IOC_NVRAM_SYNC: + if (ppc_md.nvram_sync != NULL) { + mutex_lock(&nvram_mutex); + ppc_md.nvram_sync(); + mutex_unlock(&nvram_mutex); + } + ret = 0; + break; +#elif defined(CONFIG_X86) || defined(CONFIG_M68K) case NVRAM_INIT: /* initialize NVRAM contents and checksum */ if (!capable(CAP_SYS_ADMIN)) @@ -306,6 +341,7 @@ static long nvram_misc_ioctl(struct file *file, unsigned int cmd, mutex_unlock(&nvram_mutex); } break; +#endif /* CONFIG_X86 || CONFIG_M68K */ } return ret; } @@ -321,12 +357,14 @@ static int nvram_misc_open(struct inode *inode, struct file *file) return -EBUSY; } +#if defined(CONFIG_X86) || defined(CONFIG_M68K) /* Prevent multiple writers if the set_checksum ioctl is implemented. */ if ((arch_nvram_ops.set_checksum != NULL) && (file->f_mode & FMODE_WRITE) && (nvram_open_mode & NVRAM_WRITE)) { spin_unlock(&nvram_state_lock); return -EBUSY; } +#endif if (file->f_flags & O_EXCL) nvram_open_mode |= NVRAM_EXCL; diff --git a/include/linux/nvram.h b/include/linux/nvram.h index 9df85703735c..9e3a957c8f1f 100644 --- a/include/linux/nvram.h +++ b/include/linux/nvram.h @@ -31,8 +31,10 @@ struct nvram_ops { void (*write_byte)(unsigned char, int); ssize_t (*read)(char *, size_t, loff_t *); ssize_t (*write)(char *, size_t, loff_t *); +#if defined(CONFIG_X86) || defined(CONFIG_M68K) long (*initialize)(void); long (*set_checksum)(void); +#endif }; extern const struct nvram_ops arch_nvram_ops; -- cgit v1.2.3 From f9c3a570f5fc584f2ca2dd222d1b8c8537fc55f6 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Tue, 15 Jan 2019 15:18:56 +1100 Subject: powerpc: Enable HAVE_ARCH_NVRAM_OPS and disable GENERIC_NVRAM Switch PPC32 kernels from the generic_nvram module to the nvram module. Also fix a theoretical bug where CHRP omits the chrp_nvram_init() call when CONFIG_NVRAM_MODULE=m. Tested-by: Stan Johnson Signed-off-by: Finn Thain Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/Kconfig | 6 +----- arch/powerpc/include/asm/nvram.h | 3 --- arch/powerpc/kernel/setup_32.c | 11 ----------- arch/powerpc/platforms/chrp/Makefile | 2 +- arch/powerpc/platforms/chrp/setup.c | 2 +- arch/powerpc/platforms/powermac/setup.c | 3 +-- drivers/char/Kconfig | 19 +++++++++---------- include/linux/nvram.h | 20 ++++++++++++++++++++ 8 files changed, 33 insertions(+), 33 deletions(-) (limited to 'include/linux') diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 2890d36eb531..f62e6a3f9c4e 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -178,6 +178,7 @@ config PPC select HAVE_ARCH_KGDB select HAVE_ARCH_MMAP_RND_BITS select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT + select HAVE_ARCH_NVRAM_OPS if PPC32 select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_TRACEHOOK select HAVE_CBPF_JIT if !PPC64 @@ -274,11 +275,6 @@ config SYSVIPC_COMPAT depends on COMPAT && SYSVIPC default y -# All PPC32s use generic nvram driver through ppc_md -config GENERIC_NVRAM - bool - default y if PPC32 - config SCHED_OMIT_FRAME_POINTER bool default y diff --git a/arch/powerpc/include/asm/nvram.h b/arch/powerpc/include/asm/nvram.h index 56a388da9c4f..629a5cdcc865 100644 --- a/arch/powerpc/include/asm/nvram.h +++ b/arch/powerpc/include/asm/nvram.h @@ -78,9 +78,6 @@ extern int pmac_get_partition(int partition); extern u8 pmac_xpram_read(int xpaddr); extern void pmac_xpram_write(int xpaddr, u8 data); -/* Synchronize NVRAM */ -extern void nvram_sync(void); - /* Initialize NVRAM OS partition */ extern int __init nvram_init_os_partition(struct nvram_os_partition *part); diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index f5107796e2d7..c31082233a25 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -148,17 +148,6 @@ static int __init ppc_setup_l3cr(char *str) } __setup("l3cr=", ppc_setup_l3cr); -#ifdef CONFIG_GENERIC_NVRAM - -void nvram_sync(void) -{ - if (ppc_md.nvram_sync) - ppc_md.nvram_sync(); -} -EXPORT_SYMBOL(nvram_sync); - -#endif /* CONFIG_NVRAM */ - static int __init ppc_init(void) { /* clear the progress line */ diff --git a/arch/powerpc/platforms/chrp/Makefile b/arch/powerpc/platforms/chrp/Makefile index 4b3bfadc70fa..dc3465cc8bc6 100644 --- a/arch/powerpc/platforms/chrp/Makefile +++ b/arch/powerpc/platforms/chrp/Makefile @@ -1,3 +1,3 @@ obj-y += setup.o time.o pegasos_eth.o pci.o obj-$(CONFIG_SMP) += smp.o -obj-$(CONFIG_NVRAM) += nvram.o +obj-$(CONFIG_NVRAM:m=y) += nvram.o diff --git a/arch/powerpc/platforms/chrp/setup.c b/arch/powerpc/platforms/chrp/setup.c index e66644e0fb40..e8e804289c8e 100644 --- a/arch/powerpc/platforms/chrp/setup.c +++ b/arch/powerpc/platforms/chrp/setup.c @@ -550,7 +550,7 @@ static void __init chrp_init_IRQ(void) static void __init chrp_init2(void) { -#ifdef CONFIG_NVRAM +#if IS_ENABLED(CONFIG_NVRAM) chrp_nvram_init(); #endif diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c index 2e8221e20ee8..b47f49cf9c4d 100644 --- a/arch/powerpc/platforms/powermac/setup.c +++ b/arch/powerpc/platforms/powermac/setup.c @@ -316,8 +316,7 @@ static void __init pmac_setup_arch(void) find_via_pmu(); smu_init(); -#if defined(CONFIG_NVRAM) || defined(CONFIG_NVRAM_MODULE) || \ - defined(CONFIG_PPC64) +#if IS_ENABLED(CONFIG_NVRAM) || defined(CONFIG_PPC64) pmac_nvram_init(); #endif #ifdef CONFIG_PPC32 diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig index ce9979529cf3..72866a004f07 100644 --- a/drivers/char/Kconfig +++ b/drivers/char/Kconfig @@ -244,25 +244,24 @@ source "drivers/char/hw_random/Kconfig" config NVRAM tristate "/dev/nvram support" - depends on X86 || GENERIC_NVRAM || HAVE_ARCH_NVRAM_OPS - default M68K + depends on X86 || HAVE_ARCH_NVRAM_OPS + default M68K || PPC ---help--- If you say Y here and create a character special file /dev/nvram with major number 10 and minor number 144 using mknod ("man mknod"), - you get read and write access to the extra bytes of non-volatile - memory in the real time clock (RTC), which is contained in every PC - and most Ataris. The actual number of bytes varies, depending on the - nvram in the system, but is usually 114 (128-14 for the RTC). - - This memory is conventionally called "CMOS RAM" on PCs and "NVRAM" - on Ataris. /dev/nvram may be used to view settings there, or to - change them (with some utility). It could also be used to frequently + you get read and write access to the non-volatile memory. + + /dev/nvram may be used to view settings in NVRAM or to change them + (with some utility). It could also be used to frequently save a few bits of very important data that may not be lost over power-off and for which writing to disk is too insecure. Note however that most NVRAM space in a PC belongs to the BIOS and you should NEVER idly tamper with it. See Ralf Brown's interrupt list for a guide to the use of CMOS bytes by your BIOS. + This memory is conventionally called "NVRAM" on PowerPC machines, + "CMOS RAM" on PCs, "NVRAM" on Ataris and "PRAM" on Macintoshes. + To compile this driver as a module, choose M here: the module will be called nvram. diff --git a/include/linux/nvram.h b/include/linux/nvram.h index 9e3a957c8f1f..d29d9c93a927 100644 --- a/include/linux/nvram.h +++ b/include/linux/nvram.h @@ -5,6 +5,10 @@ #include #include +#ifdef CONFIG_PPC +#include +#endif + /** * struct nvram_ops - NVRAM functionality made available to drivers * @read: validate checksum (if any) then load a range of bytes from NVRAM @@ -42,6 +46,8 @@ extern const struct nvram_ops arch_nvram_ops; static inline ssize_t nvram_get_size(void) { #ifdef CONFIG_PPC + if (ppc_md.nvram_size) + return ppc_md.nvram_size(); #else if (arch_nvram_ops.get_size) return arch_nvram_ops.get_size(); @@ -52,6 +58,8 @@ static inline ssize_t nvram_get_size(void) static inline unsigned char nvram_read_byte(int addr) { #ifdef CONFIG_PPC + if (ppc_md.nvram_read_val) + return ppc_md.nvram_read_val(addr); #else if (arch_nvram_ops.read_byte) return arch_nvram_ops.read_byte(addr); @@ -62,6 +70,8 @@ static inline unsigned char nvram_read_byte(int addr) static inline void nvram_write_byte(unsigned char val, int addr) { #ifdef CONFIG_PPC + if (ppc_md.nvram_write_val) + ppc_md.nvram_write_val(addr, val); #else if (arch_nvram_ops.write_byte) arch_nvram_ops.write_byte(val, addr); @@ -98,15 +108,25 @@ static inline ssize_t nvram_write_bytes(char *buf, size_t count, loff_t *ppos) static inline ssize_t nvram_read(char *buf, size_t count, loff_t *ppos) { +#ifdef CONFIG_PPC + if (ppc_md.nvram_read) + return ppc_md.nvram_read(buf, count, ppos); +#else if (arch_nvram_ops.read) return arch_nvram_ops.read(buf, count, ppos); +#endif return nvram_read_bytes(buf, count, ppos); } static inline ssize_t nvram_write(char *buf, size_t count, loff_t *ppos) { +#ifdef CONFIG_PPC + if (ppc_md.nvram_write) + return ppc_md.nvram_write(buf, count, ppos); +#else if (arch_nvram_ops.write) return arch_nvram_ops.write(buf, count, ppos); +#endif return nvram_write_bytes(buf, count, ppos); } -- cgit v1.2.3 From 8092e79204e7884f4bee3584ecfe6cf4a124d129 Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Thu, 20 Dec 2018 23:28:37 -0800 Subject: ihex: Share code between ihex_validate_fw() and ihex_next_binrec() Convert both ihex_validate_fw() and ihex_next_binrec() to use a helper function to calculate next record offest. This way we only have one place implementing next record offset calculation logic. No functional change intended. Cc: Chris Healy Cc: Kyle McMartin Cc: Andrew Morton Cc: Masahiro Yamada Cc: David Woodhouse Cc: Greg Kroah-Hartman Cc: linux-kernel Signed-off-by: Andrey Smirnov Signed-off-by: Greg Kroah-Hartman --- include/linux/ihex.h | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ihex.h b/include/linux/ihex.h index 75c194391869..9c701521176b 100644 --- a/include/linux/ihex.h +++ b/include/linux/ihex.h @@ -23,29 +23,34 @@ struct ihex_binrec { /* Find the next record, taking into account the 4-byte alignment */ static inline const struct ihex_binrec * -ihex_next_binrec(const struct ihex_binrec *rec) +__ihex_next_binrec(const struct ihex_binrec *rec) { int next = ((be16_to_cpu(rec->len) + 5) & ~3) - 2; rec = (void *)&rec->data[next]; + return rec; +} + +static inline const struct ihex_binrec * +ihex_next_binrec(const struct ihex_binrec *rec) +{ + rec = __ihex_next_binrec(rec); + return be16_to_cpu(rec->len) ? rec : NULL; } /* Check that ihex_next_binrec() won't take us off the end of the image... */ static inline int ihex_validate_fw(const struct firmware *fw) { - const struct ihex_binrec *rec; - size_t ofs = 0; + const struct ihex_binrec *end, *rec; - while (ofs <= fw->size - sizeof(*rec)) { - rec = (void *)&fw->data[ofs]; + rec = (const void *)fw->data; + end = (const void *)&fw->data[fw->size - sizeof(*end)]; + for (; rec <= end; rec = __ihex_next_binrec(rec)) { /* Zero length marks end of records */ if (!be16_to_cpu(rec->len)) return 0; - - /* Point to next record... */ - ofs += (sizeof(*rec) + be16_to_cpu(rec->len) + 3) & ~3; } return -EINVAL; } -- cgit v1.2.3 From 5158c36ec9d0b3343f58987cec7ebfd866331fd0 Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Thu, 20 Dec 2018 23:28:38 -0800 Subject: ihex: Check if zero-length record is at the end of the blob When verifying the validity of IHEX file we need to make sure that zero-length record we found is located at the end of the file. Not doing that could result in an invalid file with a bogus zero-length in the middle short-circuiting the check and being reported as valid. Cc: Chris Healy Cc: Kyle McMartin Cc: Andrew Morton Cc: Masahiro Yamada Cc: David Woodhouse Cc: Greg Kroah-Hartman Cc: linux-kernel Signed-off-by: Andrey Smirnov Signed-off-by: Greg Kroah-Hartman --- include/linux/ihex.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ihex.h b/include/linux/ihex.h index 9c701521176b..9130f307a420 100644 --- a/include/linux/ihex.h +++ b/include/linux/ihex.h @@ -49,7 +49,7 @@ static inline int ihex_validate_fw(const struct firmware *fw) for (; rec <= end; rec = __ihex_next_binrec(rec)) { /* Zero length marks end of records */ - if (!be16_to_cpu(rec->len)) + if (rec == end && !be16_to_cpu(rec->len)) return 0; } return -EINVAL; -- cgit v1.2.3 From 9fb4ab4d3dd665a62da9c176a89e7c7feaf5d9e4 Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Thu, 20 Dec 2018 23:28:39 -0800 Subject: ihex: Simplify next record offset calculation Next record calucaltion can be reduced to a much more tivial ALIGN operation as follows: 1. Splitting 5 into 2 + 3 we get next = ((be16_to_cpu(rec->len) + 2 + 3) & ~3) - 2 (1) 2. Using ALIGN macro we reduce (1) to: ALIGN(be16_to_cpu(rec->len) + 2, 4) - 2 (2) 3. Subsituting 'next' in original next record calucation we get: (void *)&rec->data[ALIGN(be16_to_cpu(rec->len) + 2, 4) - 2] (3) 4. Converting array index to pointer arithmetic we convert (3) into: (void *)rec + sizeof(*rec) + ALIGN(be16_to_cpu(rec->len) + 2, 4) - 2 (4) 5. Subsituting sizeof(*rec) with its value, 6, and substracting 2, in (4) we get: (void *)rec + ALIGN(be16_to_cpu(rec->len) + 2, 4) + 4 (5) 6. Since ALIGN(X, 4) + 4 == ALIGN(X + 4, 4), (5) can be converted to: (void *)rec + ALIGN(be16_to_cpu(rec->len) + 6, 4) (6) 5. Subsituting 6 in (6) to sizeof(*rec) we get: (void *)rec + ALIGN(be16_to_cpu(rec->len) + sizeof(*rec), 4) (7) Using expression (7) should make it more clear that next record is located by adding full size of the current record (payload + auxiliary data) aligned to 4 bytes, to the location of the current one. No functional change intended. Cc: Chris Healy Cc: Kyle McMartin Cc: Andrew Morton Cc: Masahiro Yamada Cc: David Woodhouse Cc: Greg Kroah-Hartman Cc: linux-kernel Signed-off-by: Andrey Smirnov Signed-off-by: Greg Kroah-Hartman --- include/linux/ihex.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ihex.h b/include/linux/ihex.h index 9130f307a420..98cb5ce0b0a0 100644 --- a/include/linux/ihex.h +++ b/include/linux/ihex.h @@ -21,14 +21,18 @@ struct ihex_binrec { uint8_t data[0]; } __attribute__((packed)); +static inline uint16_t ihex_binrec_size(const struct ihex_binrec *p) +{ + return be16_to_cpu(p->len) + sizeof(*p); +} + /* Find the next record, taking into account the 4-byte alignment */ static inline const struct ihex_binrec * __ihex_next_binrec(const struct ihex_binrec *rec) { - int next = ((be16_to_cpu(rec->len) + 5) & ~3) - 2; - rec = (void *)&rec->data[next]; + const void *p = rec; - return rec; + return p + ALIGN(ihex_binrec_size(rec), 4); } static inline const struct ihex_binrec * -- cgit v1.2.3 From 11f1ceca7031deefc1a34236ab7b94360016b71d Mon Sep 17 00:00:00 2001 From: Georgi Djakov Date: Wed, 16 Jan 2019 18:10:56 +0200 Subject: interconnect: Add generic on-chip interconnect API This patch introduces a new API to get requirements and configure the interconnect buses across the entire chipset to fit with the current demand. The API is using a consumer/provider-based model, where the providers are the interconnect buses and the consumers could be various drivers. The consumers request interconnect resources (path) between endpoints and set the desired constraints on this data flow path. The providers receive requests from consumers and aggregate these requests for all master-slave pairs on that path. Then the providers configure each node along the path to support a bandwidth that satisfies all bandwidth requests that cross through that node. The topology could be complicated and multi-tiered and is SoC specific. Reviewed-by: Evan Green Signed-off-by: Georgi Djakov Signed-off-by: Greg Kroah-Hartman --- Documentation/interconnect/interconnect.rst | 94 +++++ drivers/Kconfig | 2 + drivers/Makefile | 1 + drivers/interconnect/Kconfig | 10 + drivers/interconnect/Makefile | 5 + drivers/interconnect/core.c | 567 ++++++++++++++++++++++++++++ include/linux/interconnect-provider.h | 125 ++++++ include/linux/interconnect.h | 52 +++ 8 files changed, 856 insertions(+) create mode 100644 Documentation/interconnect/interconnect.rst create mode 100644 drivers/interconnect/Kconfig create mode 100644 drivers/interconnect/Makefile create mode 100644 drivers/interconnect/core.c create mode 100644 include/linux/interconnect-provider.h create mode 100644 include/linux/interconnect.h (limited to 'include/linux') diff --git a/Documentation/interconnect/interconnect.rst b/Documentation/interconnect/interconnect.rst new file mode 100644 index 000000000000..b8107dcc4cd3 --- /dev/null +++ b/Documentation/interconnect/interconnect.rst @@ -0,0 +1,94 @@ +.. SPDX-License-Identifier: GPL-2.0 + +===================================== +GENERIC SYSTEM INTERCONNECT SUBSYSTEM +===================================== + +Introduction +------------ + +This framework is designed to provide a standard kernel interface to control +the settings of the interconnects on an SoC. These settings can be throughput, +latency and priority between multiple interconnected devices or functional +blocks. This can be controlled dynamically in order to save power or provide +maximum performance. + +The interconnect bus is hardware with configurable parameters, which can be +set on a data path according to the requests received from various drivers. +An example of interconnect buses are the interconnects between various +components or functional blocks in chipsets. There can be multiple interconnects +on an SoC that can be multi-tiered. + +Below is a simplified diagram of a real-world SoC interconnect bus topology. + +:: + + +----------------+ +----------------+ + | HW Accelerator |--->| M NoC |<---------------+ + +----------------+ +----------------+ | + | | +------------+ + +-----+ +-------------+ V +------+ | | + | DDR | | +--------+ | PCIe | | | + +-----+ | | Slaves | +------+ | | + ^ ^ | +--------+ | | C NoC | + | | V V | | + +------------------+ +------------------------+ | | +-----+ + | |-->| |-->| |-->| CPU | + | |-->| |<--| | +-----+ + | Mem NoC | | S NoC | +------------+ + | |<--| |---------+ | + | |<--| |<------+ | | +--------+ + +------------------+ +------------------------+ | | +-->| Slaves | + ^ ^ ^ ^ ^ | | +--------+ + | | | | | | V + +------+ | +-----+ +-----+ +---------+ +----------------+ +--------+ + | CPUs | | | GPU | | DSP | | Masters |-->| P NoC |-->| Slaves | + +------+ | +-----+ +-----+ +---------+ +----------------+ +--------+ + | + +-------+ + | Modem | + +-------+ + +Terminology +----------- + +Interconnect provider is the software definition of the interconnect hardware. +The interconnect providers on the above diagram are M NoC, S NoC, C NoC, P NoC +and Mem NoC. + +Interconnect node is the software definition of the interconnect hardware +port. Each interconnect provider consists of multiple interconnect nodes, +which are connected to other SoC components including other interconnect +providers. The point on the diagram where the CPUs connect to the memory is +called an interconnect node, which belongs to the Mem NoC interconnect provider. + +Interconnect endpoints are the first or the last element of the path. Every +endpoint is a node, but not every node is an endpoint. + +Interconnect path is everything between two endpoints including all the nodes +that have to be traversed to reach from a source to destination node. It may +include multiple master-slave pairs across several interconnect providers. + +Interconnect consumers are the entities which make use of the data paths exposed +by the providers. The consumers send requests to providers requesting various +throughput, latency and priority. Usually the consumers are device drivers, that +send request based on their needs. An example for a consumer is a video decoder +that supports various formats and image sizes. + +Interconnect providers +---------------------- + +Interconnect provider is an entity that implements methods to initialize and +configure interconnect bus hardware. The interconnect provider drivers should +be registered with the interconnect provider core. + +.. kernel-doc:: include/linux/interconnect-provider.h + +Interconnect consumers +---------------------- + +Interconnect consumers are the clients which use the interconnect APIs to +get paths between endpoints and set their bandwidth/latency/QoS requirements +for these interconnect paths. + +.. kernel-doc:: include/linux/interconnect.h diff --git a/drivers/Kconfig b/drivers/Kconfig index 4f9f99057ff8..45f9decb9848 100644 --- a/drivers/Kconfig +++ b/drivers/Kconfig @@ -228,4 +228,6 @@ source "drivers/siox/Kconfig" source "drivers/slimbus/Kconfig" +source "drivers/interconnect/Kconfig" + endmenu diff --git a/drivers/Makefile b/drivers/Makefile index e1ce029d28fd..bb15b9d0e793 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -186,3 +186,4 @@ obj-$(CONFIG_MULTIPLEXER) += mux/ obj-$(CONFIG_UNISYS_VISORBUS) += visorbus/ obj-$(CONFIG_SIOX) += siox/ obj-$(CONFIG_GNSS) += gnss/ +obj-$(CONFIG_INTERCONNECT) += interconnect/ diff --git a/drivers/interconnect/Kconfig b/drivers/interconnect/Kconfig new file mode 100644 index 000000000000..a261c7d41deb --- /dev/null +++ b/drivers/interconnect/Kconfig @@ -0,0 +1,10 @@ +menuconfig INTERCONNECT + tristate "On-Chip Interconnect management support" + help + Support for management of the on-chip interconnects. + + This framework is designed to provide a generic interface for + managing the interconnects in a SoC. + + If unsure, say no. + diff --git a/drivers/interconnect/Makefile b/drivers/interconnect/Makefile new file mode 100644 index 000000000000..7a01f33b5593 --- /dev/null +++ b/drivers/interconnect/Makefile @@ -0,0 +1,5 @@ +# SPDX-License-Identifier: GPL-2.0 + +icc-core-objs := core.o + +obj-$(CONFIG_INTERCONNECT) += icc-core.o diff --git a/drivers/interconnect/core.c b/drivers/interconnect/core.c new file mode 100644 index 000000000000..2b937b4f43c4 --- /dev/null +++ b/drivers/interconnect/core.c @@ -0,0 +1,567 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Interconnect framework core driver + * + * Copyright (c) 2017-2019, Linaro Ltd. + * Author: Georgi Djakov + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static DEFINE_IDR(icc_idr); +static LIST_HEAD(icc_providers); +static DEFINE_MUTEX(icc_lock); + +/** + * struct icc_req - constraints that are attached to each node + * @req_node: entry in list of requests for the particular @node + * @node: the interconnect node to which this constraint applies + * @dev: reference to the device that sets the constraints + * @avg_bw: an integer describing the average bandwidth in kBps + * @peak_bw: an integer describing the peak bandwidth in kBps + */ +struct icc_req { + struct hlist_node req_node; + struct icc_node *node; + struct device *dev; + u32 avg_bw; + u32 peak_bw; +}; + +/** + * struct icc_path - interconnect path structure + * @num_nodes: number of hops (nodes) + * @reqs: array of the requests applicable to this path of nodes + */ +struct icc_path { + size_t num_nodes; + struct icc_req reqs[]; +}; + +static struct icc_node *node_find(const int id) +{ + return idr_find(&icc_idr, id); +} + +static struct icc_path *path_init(struct device *dev, struct icc_node *dst, + ssize_t num_nodes) +{ + struct icc_node *node = dst; + struct icc_path *path; + int i; + + path = kzalloc(struct_size(path, reqs, num_nodes), GFP_KERNEL); + if (!path) + return ERR_PTR(-ENOMEM); + + path->num_nodes = num_nodes; + + for (i = num_nodes - 1; i >= 0; i--) { + node->provider->users++; + hlist_add_head(&path->reqs[i].req_node, &node->req_list); + path->reqs[i].node = node; + path->reqs[i].dev = dev; + /* reference to previous node was saved during path traversal */ + node = node->reverse; + } + + return path; +} + +static struct icc_path *path_find(struct device *dev, struct icc_node *src, + struct icc_node *dst) +{ + struct icc_path *path = ERR_PTR(-EPROBE_DEFER); + struct icc_node *n, *node = NULL; + struct list_head traverse_list; + struct list_head edge_list; + struct list_head visited_list; + size_t i, depth = 1; + bool found = false; + + INIT_LIST_HEAD(&traverse_list); + INIT_LIST_HEAD(&edge_list); + INIT_LIST_HEAD(&visited_list); + + list_add(&src->search_list, &traverse_list); + src->reverse = NULL; + + do { + list_for_each_entry_safe(node, n, &traverse_list, search_list) { + if (node == dst) { + found = true; + list_splice_init(&edge_list, &visited_list); + list_splice_init(&traverse_list, &visited_list); + break; + } + for (i = 0; i < node->num_links; i++) { + struct icc_node *tmp = node->links[i]; + + if (!tmp) { + path = ERR_PTR(-ENOENT); + goto out; + } + + if (tmp->is_traversed) + continue; + + tmp->is_traversed = true; + tmp->reverse = node; + list_add_tail(&tmp->search_list, &edge_list); + } + } + + if (found) + break; + + list_splice_init(&traverse_list, &visited_list); + list_splice_init(&edge_list, &traverse_list); + + /* count the hops including the source */ + depth++; + + } while (!list_empty(&traverse_list)); + +out: + + /* reset the traversed state */ + list_for_each_entry_reverse(n, &visited_list, search_list) + n->is_traversed = false; + + if (found) + path = path_init(dev, dst, depth); + + return path; +} + +/* + * We want the path to honor all bandwidth requests, so the average and peak + * bandwidth requirements from each consumer are aggregated at each node. + * The aggregation is platform specific, so each platform can customize it by + * implementing its own aggregate() function. + */ + +static int aggregate_requests(struct icc_node *node) +{ + struct icc_provider *p = node->provider; + struct icc_req *r; + + node->avg_bw = 0; + node->peak_bw = 0; + + hlist_for_each_entry(r, &node->req_list, req_node) + p->aggregate(node, r->avg_bw, r->peak_bw, + &node->avg_bw, &node->peak_bw); + + return 0; +} + +static int apply_constraints(struct icc_path *path) +{ + struct icc_node *next, *prev = NULL; + int ret = -EINVAL; + int i; + + for (i = 0; i < path->num_nodes; i++) { + next = path->reqs[i].node; + + /* + * Both endpoints should be valid master-slave pairs of the + * same interconnect provider that will be configured. + */ + if (!prev || next->provider != prev->provider) { + prev = next; + continue; + } + + /* set the constraints */ + ret = next->provider->set(prev, next); + if (ret) + goto out; + + prev = next; + } +out: + return ret; +} + +/** + * icc_set_bw() - set bandwidth constraints on an interconnect path + * @path: reference to the path returned by icc_get() + * @avg_bw: average bandwidth in kilobytes per second + * @peak_bw: peak bandwidth in kilobytes per second + * + * This function is used by an interconnect consumer to express its own needs + * in terms of bandwidth for a previously requested path between two endpoints. + * The requests are aggregated and each node is updated accordingly. The entire + * path is locked by a mutex to ensure that the set() is completed. + * The @path can be NULL when the "interconnects" DT properties is missing, + * which will mean that no constraints will be set. + * + * Returns 0 on success, or an appropriate error code otherwise. + */ +int icc_set_bw(struct icc_path *path, u32 avg_bw, u32 peak_bw) +{ + struct icc_node *node; + size_t i; + int ret; + + if (!path) + return 0; + + mutex_lock(&icc_lock); + + for (i = 0; i < path->num_nodes; i++) { + node = path->reqs[i].node; + + /* update the consumer request for this path */ + path->reqs[i].avg_bw = avg_bw; + path->reqs[i].peak_bw = peak_bw; + + /* aggregate requests for this node */ + aggregate_requests(node); + } + + ret = apply_constraints(path); + if (ret) + pr_debug("interconnect: error applying constraints (%d)\n", + ret); + + mutex_unlock(&icc_lock); + + return ret; +} +EXPORT_SYMBOL_GPL(icc_set_bw); + +/** + * icc_get() - return a handle for path between two endpoints + * @dev: the device requesting the path + * @src_id: source device port id + * @dst_id: destination device port id + * + * This function will search for a path between two endpoints and return an + * icc_path handle on success. Use icc_put() to release + * constraints when they are not needed anymore. + * If the interconnect API is disabled, NULL is returned and the consumer + * drivers will still build. Drivers are free to handle this specifically, + * but they don't have to. + * + * Return: icc_path pointer on success, ERR_PTR() on error or NULL if the + * interconnect API is disabled. + */ +struct icc_path *icc_get(struct device *dev, const int src_id, const int dst_id) +{ + struct icc_node *src, *dst; + struct icc_path *path = ERR_PTR(-EPROBE_DEFER); + + mutex_lock(&icc_lock); + + src = node_find(src_id); + if (!src) + goto out; + + dst = node_find(dst_id); + if (!dst) + goto out; + + path = path_find(dev, src, dst); + if (IS_ERR(path)) + dev_err(dev, "%s: invalid path=%ld\n", __func__, PTR_ERR(path)); + +out: + mutex_unlock(&icc_lock); + return path; +} +EXPORT_SYMBOL_GPL(icc_get); + +/** + * icc_put() - release the reference to the icc_path + * @path: interconnect path + * + * Use this function to release the constraints on a path when the path is + * no longer needed. The constraints will be re-aggregated. + */ +void icc_put(struct icc_path *path) +{ + struct icc_node *node; + size_t i; + int ret; + + if (!path || WARN_ON(IS_ERR(path))) + return; + + ret = icc_set_bw(path, 0, 0); + if (ret) + pr_err("%s: error (%d)\n", __func__, ret); + + mutex_lock(&icc_lock); + for (i = 0; i < path->num_nodes; i++) { + node = path->reqs[i].node; + hlist_del(&path->reqs[i].req_node); + if (!WARN_ON(!node->provider->users)) + node->provider->users--; + } + mutex_unlock(&icc_lock); + + kfree(path); +} +EXPORT_SYMBOL_GPL(icc_put); + +static struct icc_node *icc_node_create_nolock(int id) +{ + struct icc_node *node; + + /* check if node already exists */ + node = node_find(id); + if (node) + return node; + + node = kzalloc(sizeof(*node), GFP_KERNEL); + if (!node) + return ERR_PTR(-ENOMEM); + + id = idr_alloc(&icc_idr, node, id, id + 1, GFP_KERNEL); + if (id < 0) { + WARN(1, "%s: couldn't get idr\n", __func__); + kfree(node); + return ERR_PTR(id); + } + + node->id = id; + + return node; +} + +/** + * icc_node_create() - create a node + * @id: node id + * + * Return: icc_node pointer on success, or ERR_PTR() on error + */ +struct icc_node *icc_node_create(int id) +{ + struct icc_node *node; + + mutex_lock(&icc_lock); + + node = icc_node_create_nolock(id); + + mutex_unlock(&icc_lock); + + return node; +} +EXPORT_SYMBOL_GPL(icc_node_create); + +/** + * icc_node_destroy() - destroy a node + * @id: node id + */ +void icc_node_destroy(int id) +{ + struct icc_node *node; + + mutex_lock(&icc_lock); + + node = node_find(id); + if (node) { + idr_remove(&icc_idr, node->id); + WARN_ON(!hlist_empty(&node->req_list)); + } + + mutex_unlock(&icc_lock); + + kfree(node); +} +EXPORT_SYMBOL_GPL(icc_node_destroy); + +/** + * icc_link_create() - create a link between two nodes + * @node: source node id + * @dst_id: destination node id + * + * Create a link between two nodes. The nodes might belong to different + * interconnect providers and the @dst_id node might not exist (if the + * provider driver has not probed yet). So just create the @dst_id node + * and when the actual provider driver is probed, the rest of the node + * data is filled. + * + * Return: 0 on success, or an error code otherwise + */ +int icc_link_create(struct icc_node *node, const int dst_id) +{ + struct icc_node *dst; + struct icc_node **new; + int ret = 0; + + if (!node->provider) + return -EINVAL; + + mutex_lock(&icc_lock); + + dst = node_find(dst_id); + if (!dst) { + dst = icc_node_create_nolock(dst_id); + + if (IS_ERR(dst)) { + ret = PTR_ERR(dst); + goto out; + } + } + + new = krealloc(node->links, + (node->num_links + 1) * sizeof(*node->links), + GFP_KERNEL); + if (!new) { + ret = -ENOMEM; + goto out; + } + + node->links = new; + node->links[node->num_links++] = dst; + +out: + mutex_unlock(&icc_lock); + + return ret; +} +EXPORT_SYMBOL_GPL(icc_link_create); + +/** + * icc_link_destroy() - destroy a link between two nodes + * @src: pointer to source node + * @dst: pointer to destination node + * + * Return: 0 on success, or an error code otherwise + */ +int icc_link_destroy(struct icc_node *src, struct icc_node *dst) +{ + struct icc_node **new; + size_t slot; + int ret = 0; + + if (IS_ERR_OR_NULL(src)) + return -EINVAL; + + if (IS_ERR_OR_NULL(dst)) + return -EINVAL; + + mutex_lock(&icc_lock); + + for (slot = 0; slot < src->num_links; slot++) + if (src->links[slot] == dst) + break; + + if (WARN_ON(slot == src->num_links)) { + ret = -ENXIO; + goto out; + } + + src->links[slot] = src->links[--src->num_links]; + + new = krealloc(src->links, src->num_links * sizeof(*src->links), + GFP_KERNEL); + if (new) + src->links = new; + +out: + mutex_unlock(&icc_lock); + + return ret; +} +EXPORT_SYMBOL_GPL(icc_link_destroy); + +/** + * icc_node_add() - add interconnect node to interconnect provider + * @node: pointer to the interconnect node + * @provider: pointer to the interconnect provider + */ +void icc_node_add(struct icc_node *node, struct icc_provider *provider) +{ + mutex_lock(&icc_lock); + + node->provider = provider; + list_add_tail(&node->node_list, &provider->nodes); + + mutex_unlock(&icc_lock); +} +EXPORT_SYMBOL_GPL(icc_node_add); + +/** + * icc_node_del() - delete interconnect node from interconnect provider + * @node: pointer to the interconnect node + */ +void icc_node_del(struct icc_node *node) +{ + mutex_lock(&icc_lock); + + list_del(&node->node_list); + + mutex_unlock(&icc_lock); +} +EXPORT_SYMBOL_GPL(icc_node_del); + +/** + * icc_provider_add() - add a new interconnect provider + * @provider: the interconnect provider that will be added into topology + * + * Return: 0 on success, or an error code otherwise + */ +int icc_provider_add(struct icc_provider *provider) +{ + if (WARN_ON(!provider->set)) + return -EINVAL; + + mutex_lock(&icc_lock); + + INIT_LIST_HEAD(&provider->nodes); + list_add_tail(&provider->provider_list, &icc_providers); + + mutex_unlock(&icc_lock); + + dev_dbg(provider->dev, "interconnect provider added to topology\n"); + + return 0; +} +EXPORT_SYMBOL_GPL(icc_provider_add); + +/** + * icc_provider_del() - delete previously added interconnect provider + * @provider: the interconnect provider that will be removed from topology + * + * Return: 0 on success, or an error code otherwise + */ +int icc_provider_del(struct icc_provider *provider) +{ + mutex_lock(&icc_lock); + if (provider->users) { + pr_warn("interconnect provider still has %d users\n", + provider->users); + mutex_unlock(&icc_lock); + return -EBUSY; + } + + if (!list_empty(&provider->nodes)) { + pr_warn("interconnect provider still has nodes\n"); + mutex_unlock(&icc_lock); + return -EBUSY; + } + + list_del(&provider->provider_list); + mutex_unlock(&icc_lock); + + return 0; +} +EXPORT_SYMBOL_GPL(icc_provider_del); + +MODULE_AUTHOR("Georgi Djakov "); +MODULE_DESCRIPTION("Interconnect Driver Core"); +MODULE_LICENSE("GPL v2"); diff --git a/include/linux/interconnect-provider.h b/include/linux/interconnect-provider.h new file mode 100644 index 000000000000..78208a754181 --- /dev/null +++ b/include/linux/interconnect-provider.h @@ -0,0 +1,125 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2018, Linaro Ltd. + * Author: Georgi Djakov + */ + +#ifndef __LINUX_INTERCONNECT_PROVIDER_H +#define __LINUX_INTERCONNECT_PROVIDER_H + +#include + +#define icc_units_to_bps(bw) ((bw) * 1000ULL) + +struct icc_node; + +/** + * struct icc_provider - interconnect provider (controller) entity that might + * provide multiple interconnect controls + * + * @provider_list: list of the registered interconnect providers + * @nodes: internal list of the interconnect provider nodes + * @set: pointer to device specific set operation function + * @aggregate: pointer to device specific aggregate operation function + * @dev: the device this interconnect provider belongs to + * @users: count of active users + * @data: pointer to private data + */ +struct icc_provider { + struct list_head provider_list; + struct list_head nodes; + int (*set)(struct icc_node *src, struct icc_node *dst); + int (*aggregate)(struct icc_node *node, u32 avg_bw, u32 peak_bw, + u32 *agg_avg, u32 *agg_peak); + struct device *dev; + int users; + void *data; +}; + +/** + * struct icc_node - entity that is part of the interconnect topology + * + * @id: platform specific node id + * @name: node name used in debugfs + * @links: a list of targets pointing to where we can go next when traversing + * @num_links: number of links to other interconnect nodes + * @provider: points to the interconnect provider of this node + * @node_list: the list entry in the parent provider's "nodes" list + * @search_list: list used when walking the nodes graph + * @reverse: pointer to previous node when walking the nodes graph + * @is_traversed: flag that is used when walking the nodes graph + * @req_list: a list of QoS constraint requests associated with this node + * @avg_bw: aggregated value of average bandwidth requests from all consumers + * @peak_bw: aggregated value of peak bandwidth requests from all consumers + * @data: pointer to private data + */ +struct icc_node { + int id; + const char *name; + struct icc_node **links; + size_t num_links; + + struct icc_provider *provider; + struct list_head node_list; + struct list_head search_list; + struct icc_node *reverse; + u8 is_traversed:1; + struct hlist_head req_list; + u32 avg_bw; + u32 peak_bw; + void *data; +}; + +#if IS_ENABLED(CONFIG_INTERCONNECT) + +struct icc_node *icc_node_create(int id); +void icc_node_destroy(int id); +int icc_link_create(struct icc_node *node, const int dst_id); +int icc_link_destroy(struct icc_node *src, struct icc_node *dst); +void icc_node_add(struct icc_node *node, struct icc_provider *provider); +void icc_node_del(struct icc_node *node); +int icc_provider_add(struct icc_provider *provider); +int icc_provider_del(struct icc_provider *provider); + +#else + +static inline struct icc_node *icc_node_create(int id) +{ + return ERR_PTR(-ENOTSUPP); +} + +void icc_node_destroy(int id) +{ +} + +static inline int icc_link_create(struct icc_node *node, const int dst_id) +{ + return -ENOTSUPP; +} + +int icc_link_destroy(struct icc_node *src, struct icc_node *dst) +{ + return -ENOTSUPP; +} + +void icc_node_add(struct icc_node *node, struct icc_provider *provider) +{ +} + +void icc_node_del(struct icc_node *node) +{ +} + +static inline int icc_provider_add(struct icc_provider *provider) +{ + return -ENOTSUPP; +} + +static inline int icc_provider_del(struct icc_provider *provider) +{ + return -ENOTSUPP; +} + +#endif /* CONFIG_INTERCONNECT */ + +#endif /* __LINUX_INTERCONNECT_PROVIDER_H */ diff --git a/include/linux/interconnect.h b/include/linux/interconnect.h new file mode 100644 index 000000000000..c331afb3a2c8 --- /dev/null +++ b/include/linux/interconnect.h @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2018-2019, Linaro Ltd. + * Author: Georgi Djakov + */ + +#ifndef __LINUX_INTERCONNECT_H +#define __LINUX_INTERCONNECT_H + +#include +#include + +/* macros for converting to icc units */ +#define Bps_to_icc(x) ((x) / 1000) +#define kBps_to_icc(x) (x) +#define MBps_to_icc(x) ((x) * 1000) +#define GBps_to_icc(x) ((x) * 1000 * 1000) +#define bps_to_icc(x) (1) +#define kbps_to_icc(x) ((x) / 8 + ((x) % 8 ? 1 : 0)) +#define Mbps_to_icc(x) ((x) * 1000 / 8) +#define Gbps_to_icc(x) ((x) * 1000 * 1000 / 8) + +struct icc_path; +struct device; + +#if IS_ENABLED(CONFIG_INTERCONNECT) + +struct icc_path *icc_get(struct device *dev, const int src_id, + const int dst_id); +void icc_put(struct icc_path *path); +int icc_set_bw(struct icc_path *path, u32 avg_bw, u32 peak_bw); + +#else + +static inline struct icc_path *icc_get(struct device *dev, const int src_id, + const int dst_id) +{ + return NULL; +} + +static inline void icc_put(struct icc_path *path) +{ +} + +static inline int icc_set_bw(struct icc_path *path, u32 avg_bw, u32 peak_bw) +{ + return 0; +} + +#endif /* CONFIG_INTERCONNECT */ + +#endif /* __LINUX_INTERCONNECT_H */ -- cgit v1.2.3 From 87e3031b6fbd83ea83adf1bf9602bcce313ee787 Mon Sep 17 00:00:00 2001 From: Georgi Djakov Date: Wed, 16 Jan 2019 18:10:58 +0200 Subject: interconnect: Allow endpoints translation via DT Currently we support only platform data for specifying the interconnect endpoints. As now the endpoints are hard-coded into the consumer driver this may lead to complications when a single driver is used by multiple SoCs, which may have different interconnect topology. To avoid cluttering the consumer drivers, introduce a translation function to help us get the board specific interconnect data from device-tree. Reviewed-by: Evan Green Signed-off-by: Georgi Djakov Signed-off-by: Greg Kroah-Hartman --- drivers/interconnect/core.c | 149 ++++++++++++++++++++++++++++++++++ include/linux/interconnect-provider.h | 17 ++++ include/linux/interconnect.h | 7 ++ 3 files changed, 173 insertions(+) (limited to 'include/linux') diff --git a/drivers/interconnect/core.c b/drivers/interconnect/core.c index 2b937b4f43c4..a8c2bd35197f 100644 --- a/drivers/interconnect/core.c +++ b/drivers/interconnect/core.c @@ -15,6 +15,7 @@ #include #include #include +#include #include static DEFINE_IDR(icc_idr); @@ -194,6 +195,152 @@ out: return ret; } +/* of_icc_xlate_onecell() - Translate function using a single index. + * @spec: OF phandle args to map into an interconnect node. + * @data: private data (pointer to struct icc_onecell_data) + * + * This is a generic translate function that can be used to model simple + * interconnect providers that have one device tree node and provide + * multiple interconnect nodes. A single cell is used as an index into + * an array of icc nodes specified in the icc_onecell_data struct when + * registering the provider. + */ +struct icc_node *of_icc_xlate_onecell(struct of_phandle_args *spec, + void *data) +{ + struct icc_onecell_data *icc_data = data; + unsigned int idx = spec->args[0]; + + if (idx >= icc_data->num_nodes) { + pr_err("%s: invalid index %u\n", __func__, idx); + return ERR_PTR(-EINVAL); + } + + return icc_data->nodes[idx]; +} +EXPORT_SYMBOL_GPL(of_icc_xlate_onecell); + +/** + * of_icc_get_from_provider() - Look-up interconnect node + * @spec: OF phandle args to use for look-up + * + * Looks for interconnect provider under the node specified by @spec and if + * found, uses xlate function of the provider to map phandle args to node. + * + * Returns a valid pointer to struct icc_node on success or ERR_PTR() + * on failure. + */ +static struct icc_node *of_icc_get_from_provider(struct of_phandle_args *spec) +{ + struct icc_node *node = ERR_PTR(-EPROBE_DEFER); + struct icc_provider *provider; + + if (!spec || spec->args_count != 1) + return ERR_PTR(-EINVAL); + + mutex_lock(&icc_lock); + list_for_each_entry(provider, &icc_providers, provider_list) { + if (provider->dev->of_node == spec->np) + node = provider->xlate(spec, provider->data); + if (!IS_ERR(node)) + break; + } + mutex_unlock(&icc_lock); + + return node; +} + +/** + * of_icc_get() - get a path handle from a DT node based on name + * @dev: device pointer for the consumer device + * @name: interconnect path name + * + * This function will search for a path between two endpoints and return an + * icc_path handle on success. Use icc_put() to release constraints when they + * are not needed anymore. + * If the interconnect API is disabled, NULL is returned and the consumer + * drivers will still build. Drivers are free to handle this specifically, + * but they don't have to. + * + * Return: icc_path pointer on success or ERR_PTR() on error. NULL is returned + * when the API is disabled or the "interconnects" DT property is missing. + */ +struct icc_path *of_icc_get(struct device *dev, const char *name) +{ + struct icc_path *path = ERR_PTR(-EPROBE_DEFER); + struct icc_node *src_node, *dst_node; + struct device_node *np = NULL; + struct of_phandle_args src_args, dst_args; + int idx = 0; + int ret; + + if (!dev || !dev->of_node) + return ERR_PTR(-ENODEV); + + np = dev->of_node; + + /* + * When the consumer DT node do not have "interconnects" property + * return a NULL path to skip setting constraints. + */ + if (!of_find_property(np, "interconnects", NULL)) + return NULL; + + /* + * We use a combination of phandle and specifier for endpoint. For now + * lets support only global ids and extend this in the future if needed + * without breaking DT compatibility. + */ + if (name) { + idx = of_property_match_string(np, "interconnect-names", name); + if (idx < 0) + return ERR_PTR(idx); + } + + ret = of_parse_phandle_with_args(np, "interconnects", + "#interconnect-cells", idx * 2, + &src_args); + if (ret) + return ERR_PTR(ret); + + of_node_put(src_args.np); + + ret = of_parse_phandle_with_args(np, "interconnects", + "#interconnect-cells", idx * 2 + 1, + &dst_args); + if (ret) + return ERR_PTR(ret); + + of_node_put(dst_args.np); + + src_node = of_icc_get_from_provider(&src_args); + + if (IS_ERR(src_node)) { + if (PTR_ERR(src_node) != -EPROBE_DEFER) + dev_err(dev, "error finding src node: %ld\n", + PTR_ERR(src_node)); + return ERR_CAST(src_node); + } + + dst_node = of_icc_get_from_provider(&dst_args); + + if (IS_ERR(dst_node)) { + if (PTR_ERR(dst_node) != -EPROBE_DEFER) + dev_err(dev, "error finding dst node: %ld\n", + PTR_ERR(dst_node)); + return ERR_CAST(dst_node); + } + + mutex_lock(&icc_lock); + path = path_find(dev, src_node, dst_node); + if (IS_ERR(path)) + dev_err(dev, "%s: invalid path=%ld\n", __func__, PTR_ERR(path)); + mutex_unlock(&icc_lock); + + return path; +} +EXPORT_SYMBOL_GPL(of_icc_get); + /** * icc_set_bw() - set bandwidth constraints on an interconnect path * @path: reference to the path returned by icc_get() @@ -519,6 +666,8 @@ int icc_provider_add(struct icc_provider *provider) { if (WARN_ON(!provider->set)) return -EINVAL; + if (WARN_ON(!provider->xlate)) + return -EINVAL; mutex_lock(&icc_lock); diff --git a/include/linux/interconnect-provider.h b/include/linux/interconnect-provider.h index 78208a754181..63caccadc2db 100644 --- a/include/linux/interconnect-provider.h +++ b/include/linux/interconnect-provider.h @@ -12,6 +12,21 @@ #define icc_units_to_bps(bw) ((bw) * 1000ULL) struct icc_node; +struct of_phandle_args; + +/** + * struct icc_onecell_data - driver data for onecell interconnect providers + * + * @num_nodes: number of nodes in this device + * @nodes: array of pointers to the nodes in this device + */ +struct icc_onecell_data { + unsigned int num_nodes; + struct icc_node *nodes[]; +}; + +struct icc_node *of_icc_xlate_onecell(struct of_phandle_args *spec, + void *data); /** * struct icc_provider - interconnect provider (controller) entity that might @@ -21,6 +36,7 @@ struct icc_node; * @nodes: internal list of the interconnect provider nodes * @set: pointer to device specific set operation function * @aggregate: pointer to device specific aggregate operation function + * @xlate: provider-specific callback for mapping nodes from phandle arguments * @dev: the device this interconnect provider belongs to * @users: count of active users * @data: pointer to private data @@ -31,6 +47,7 @@ struct icc_provider { int (*set)(struct icc_node *src, struct icc_node *dst); int (*aggregate)(struct icc_node *node, u32 avg_bw, u32 peak_bw, u32 *agg_avg, u32 *agg_peak); + struct icc_node* (*xlate)(struct of_phandle_args *spec, void *data); struct device *dev; int users; void *data; diff --git a/include/linux/interconnect.h b/include/linux/interconnect.h index c331afb3a2c8..dc25864755ba 100644 --- a/include/linux/interconnect.h +++ b/include/linux/interconnect.h @@ -27,6 +27,7 @@ struct device; struct icc_path *icc_get(struct device *dev, const int src_id, const int dst_id); +struct icc_path *of_icc_get(struct device *dev, const char *name); void icc_put(struct icc_path *path); int icc_set_bw(struct icc_path *path, u32 avg_bw, u32 peak_bw); @@ -38,6 +39,12 @@ static inline struct icc_path *icc_get(struct device *dev, const int src_id, return NULL; } +static inline struct icc_path *of_icc_get(struct device *dev, + const char *name) +{ + return NULL; +} + static inline void icc_put(struct icc_path *path) { } -- cgit v1.2.3 From c81d64d3dc1f2decf8f3a9354416b7496b5c389b Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Wed, 16 Jan 2019 11:25:21 -0700 Subject: io-64-nonatomic: add io{read|write}64[be]{_lo_hi|_hi_lo} macros This patch adds generic io{read|write}64[be]{_lo_hi|_hi_lo} macros if they are not already defined by the architecture. (As they are provided by the generic iomap library). The patch also points io{read|write}64[be] to the variant specified by the header name. This is because new drivers are encouraged to use ioreadXX, et al instead of readX[1], et al -- and mixing ioreadXX with readq is pretty ugly. [1] LDD3: section 9.4.2 Signed-off-by: Logan Gunthorpe Reviewed-by: Andy Shevchenko Cc: Christoph Hellwig Cc: Arnd Bergmann Cc: Alan Cox Cc: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- include/linux/io-64-nonatomic-hi-lo.h | 64 +++++++++++++++++++++++++++++++++++ include/linux/io-64-nonatomic-lo-hi.h | 64 +++++++++++++++++++++++++++++++++++ 2 files changed, 128 insertions(+) (limited to 'include/linux') diff --git a/include/linux/io-64-nonatomic-hi-lo.h b/include/linux/io-64-nonatomic-hi-lo.h index 862d786a904f..ae21b72cce85 100644 --- a/include/linux/io-64-nonatomic-hi-lo.h +++ b/include/linux/io-64-nonatomic-hi-lo.h @@ -55,4 +55,68 @@ static inline void hi_lo_writeq_relaxed(__u64 val, volatile void __iomem *addr) #define writeq_relaxed hi_lo_writeq_relaxed #endif +#ifndef ioread64_hi_lo +#define ioread64_hi_lo ioread64_hi_lo +static inline u64 ioread64_hi_lo(void __iomem *addr) +{ + u32 low, high; + + high = ioread32(addr + sizeof(u32)); + low = ioread32(addr); + + return low + ((u64)high << 32); +} +#endif + +#ifndef iowrite64_hi_lo +#define iowrite64_hi_lo iowrite64_hi_lo +static inline void iowrite64_hi_lo(u64 val, void __iomem *addr) +{ + iowrite32(val >> 32, addr + sizeof(u32)); + iowrite32(val, addr); +} +#endif + +#ifndef ioread64be_hi_lo +#define ioread64be_hi_lo ioread64be_hi_lo +static inline u64 ioread64be_hi_lo(void __iomem *addr) +{ + u32 low, high; + + high = ioread32be(addr); + low = ioread32be(addr + sizeof(u32)); + + return low + ((u64)high << 32); +} +#endif + +#ifndef iowrite64be_hi_lo +#define iowrite64be_hi_lo iowrite64be_hi_lo +static inline void iowrite64be_hi_lo(u64 val, void __iomem *addr) +{ + iowrite32be(val >> 32, addr); + iowrite32be(val, addr + sizeof(u32)); +} +#endif + +#ifndef ioread64 +#define ioread64_is_nonatomic +#define ioread64 ioread64_hi_lo +#endif + +#ifndef iowrite64 +#define iowrite64_is_nonatomic +#define iowrite64 iowrite64_hi_lo +#endif + +#ifndef ioread64be +#define ioread64be_is_nonatomic +#define ioread64be ioread64be_hi_lo +#endif + +#ifndef iowrite64be +#define iowrite64be_is_nonatomic +#define iowrite64be iowrite64be_hi_lo +#endif + #endif /* _LINUX_IO_64_NONATOMIC_HI_LO_H_ */ diff --git a/include/linux/io-64-nonatomic-lo-hi.h b/include/linux/io-64-nonatomic-lo-hi.h index d042e7bb5adb..faaa842dbdb9 100644 --- a/include/linux/io-64-nonatomic-lo-hi.h +++ b/include/linux/io-64-nonatomic-lo-hi.h @@ -55,4 +55,68 @@ static inline void lo_hi_writeq_relaxed(__u64 val, volatile void __iomem *addr) #define writeq_relaxed lo_hi_writeq_relaxed #endif +#ifndef ioread64_lo_hi +#define ioread64_lo_hi ioread64_lo_hi +static inline u64 ioread64_lo_hi(void __iomem *addr) +{ + u32 low, high; + + low = ioread32(addr); + high = ioread32(addr + sizeof(u32)); + + return low + ((u64)high << 32); +} +#endif + +#ifndef iowrite64_lo_hi +#define iowrite64_lo_hi iowrite64_lo_hi +static inline void iowrite64_lo_hi(u64 val, void __iomem *addr) +{ + iowrite32(val, addr); + iowrite32(val >> 32, addr + sizeof(u32)); +} +#endif + +#ifndef ioread64be_lo_hi +#define ioread64be_lo_hi ioread64be_lo_hi +static inline u64 ioread64be_lo_hi(void __iomem *addr) +{ + u32 low, high; + + low = ioread32be(addr + sizeof(u32)); + high = ioread32be(addr); + + return low + ((u64)high << 32); +} +#endif + +#ifndef iowrite64be_lo_hi +#define iowrite64be_lo_hi iowrite64be_lo_hi +static inline void iowrite64be_lo_hi(u64 val, void __iomem *addr) +{ + iowrite32be(val, addr + sizeof(u32)); + iowrite32be(val >> 32, addr); +} +#endif + +#ifndef ioread64 +#define ioread64_is_nonatomic +#define ioread64 ioread64_lo_hi +#endif + +#ifndef iowrite64 +#define iowrite64_is_nonatomic +#define iowrite64 iowrite64_lo_hi +#endif + +#ifndef ioread64be +#define ioread64be_is_nonatomic +#define ioread64be ioread64be_lo_hi +#endif + +#ifndef iowrite64be +#define iowrite64be_is_nonatomic +#define iowrite64be iowrite64be_lo_hi +#endif + #endif /* _LINUX_IO_64_NONATOMIC_LO_HI_H_ */ -- cgit v1.2.3 From 51c48b310183ab6ba5419edfc6a8de889cc04521 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Sat, 19 Jan 2019 11:35:04 -0600 Subject: PCI: Probe bridge window attributes once at enumeration-time pci_bridge_check_ranges() determines whether a bridge supports the optional I/O and prefetchable memory windows and sets the flag bits in the bridge resources. This *could* be done once during enumeration except that the resource allocation code completely clears the flag bits, e.g., in the pci_assign_unassigned_bridge_resources() path. The problem with pci_bridge_check_ranges() in the resource allocation path is that we may allocate resources after devices have been claimed by drivers, and pci_bridge_check_ranges() *changes* the window registers to determine whether they're writable. This may break concurrent accesses to devices behind the bridge. Add a new pci_read_bridge_windows() to determine whether a bridge supports the optional windows, call it once during enumeration, remember the results, and change pci_bridge_check_ranges() so it doesn't touch the bridge windows but sets the flag bits based on those remembered results. Link: https://lore.kernel.org/linux-pci/1506151482-113560-1-git-send-email-wangzhou1@hisilicon.com Link: https://lists.gnu.org/archive/html/qemu-devel/2018-12/msg02082.html Reported-by: Yandong Xu Tested-by: Yandong Xu Signed-off-by: Bjorn Helgaas Cc: Michael S. Tsirkin Cc: Sagi Grimberg Cc: Ofer Hayut Cc: Roy Shterman Cc: Keith Busch Cc: Zhou Wang --- drivers/pci/probe.c | 52 +++++++++++++++++++++++++++++++++++++++++++++++++ drivers/pci/setup-bus.c | 45 ++++-------------------------------------- include/linux/pci.h | 3 +++ 3 files changed, 59 insertions(+), 41 deletions(-) (limited to 'include/linux') diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 257b9f6f2ebb..2ef8b954c65a 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -348,6 +348,57 @@ static void pci_read_bases(struct pci_dev *dev, unsigned int howmany, int rom) } } +static void pci_read_bridge_windows(struct pci_dev *bridge) +{ + u16 io; + u32 pmem, tmp; + + pci_read_config_word(bridge, PCI_IO_BASE, &io); + if (!io) { + pci_write_config_word(bridge, PCI_IO_BASE, 0xe0f0); + pci_read_config_word(bridge, PCI_IO_BASE, &io); + pci_write_config_word(bridge, PCI_IO_BASE, 0x0); + } + if (io) + bridge->io_window = 1; + + /* + * DECchip 21050 pass 2 errata: the bridge may miss an address + * disconnect boundary by one PCI data phase. Workaround: do not + * use prefetching on this device. + */ + if (bridge->vendor == PCI_VENDOR_ID_DEC && bridge->device == 0x0001) + return; + + pci_read_config_dword(bridge, PCI_PREF_MEMORY_BASE, &pmem); + if (!pmem) { + pci_write_config_dword(bridge, PCI_PREF_MEMORY_BASE, + 0xffe0fff0); + pci_read_config_dword(bridge, PCI_PREF_MEMORY_BASE, &pmem); + pci_write_config_dword(bridge, PCI_PREF_MEMORY_BASE, 0x0); + } + if (!pmem) + return; + + bridge->pref_window = 1; + + if ((pmem & PCI_PREF_RANGE_TYPE_MASK) == PCI_PREF_RANGE_TYPE_64) { + + /* + * Bridge claims to have a 64-bit prefetchable memory + * window; verify that the upper bits are actually + * writable. + */ + pci_read_config_dword(bridge, PCI_PREF_BASE_UPPER32, &pmem); + pci_write_config_dword(bridge, PCI_PREF_BASE_UPPER32, + 0xffffffff); + pci_read_config_dword(bridge, PCI_PREF_BASE_UPPER32, &tmp); + pci_write_config_dword(bridge, PCI_PREF_BASE_UPPER32, pmem); + if (tmp) + bridge->pref_64_window = 1; + } +} + static void pci_read_bridge_io(struct pci_bus *child) { struct pci_dev *dev = child->self; @@ -1739,6 +1790,7 @@ int pci_setup_device(struct pci_dev *dev) pci_read_irq(dev); dev->transparent = ((dev->class & 0xff) == 1); pci_read_bases(dev, 2, PCI_ROM_ADDRESS1); + pci_read_bridge_windows(dev); set_pcie_hotplug_bridge(dev); pos = pci_find_capability(dev, PCI_CAP_ID_SSVID); if (pos) { diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c index ed960436df5e..1941bb0a6c13 100644 --- a/drivers/pci/setup-bus.c +++ b/drivers/pci/setup-bus.c @@ -735,58 +735,21 @@ int pci_claim_bridge_resource(struct pci_dev *bridge, int i) base/limit registers must be read-only and read as 0. */ static void pci_bridge_check_ranges(struct pci_bus *bus) { - u16 io; - u32 pmem; struct pci_dev *bridge = bus->self; - struct resource *b_res; + struct resource *b_res = &bridge->resource[PCI_BRIDGE_RESOURCES]; - b_res = &bridge->resource[PCI_BRIDGE_RESOURCES]; b_res[1].flags |= IORESOURCE_MEM; - pci_read_config_word(bridge, PCI_IO_BASE, &io); - if (!io) { - pci_write_config_word(bridge, PCI_IO_BASE, 0xe0f0); - pci_read_config_word(bridge, PCI_IO_BASE, &io); - pci_write_config_word(bridge, PCI_IO_BASE, 0x0); - } - if (io) + if (bridge->io_window) b_res[0].flags |= IORESOURCE_IO; - /* DECchip 21050 pass 2 errata: the bridge may miss an address - disconnect boundary by one PCI data phase. - Workaround: do not use prefetching on this device. */ - if (bridge->vendor == PCI_VENDOR_ID_DEC && bridge->device == 0x0001) - return; - - pci_read_config_dword(bridge, PCI_PREF_MEMORY_BASE, &pmem); - if (!pmem) { - pci_write_config_dword(bridge, PCI_PREF_MEMORY_BASE, - 0xffe0fff0); - pci_read_config_dword(bridge, PCI_PREF_MEMORY_BASE, &pmem); - pci_write_config_dword(bridge, PCI_PREF_MEMORY_BASE, 0x0); - } - if (pmem) { + if (bridge->pref_window) { b_res[2].flags |= IORESOURCE_MEM | IORESOURCE_PREFETCH; - if ((pmem & PCI_PREF_RANGE_TYPE_MASK) == - PCI_PREF_RANGE_TYPE_64) { + if (bridge->pref_64_window) { b_res[2].flags |= IORESOURCE_MEM_64; b_res[2].flags |= PCI_PREF_RANGE_TYPE_64; } } - - /* double check if bridge does support 64 bit pref */ - if (b_res[2].flags & IORESOURCE_MEM_64) { - u32 mem_base_hi, tmp; - pci_read_config_dword(bridge, PCI_PREF_BASE_UPPER32, - &mem_base_hi); - pci_write_config_dword(bridge, PCI_PREF_BASE_UPPER32, - 0xffffffff); - pci_read_config_dword(bridge, PCI_PREF_BASE_UPPER32, &tmp); - if (!tmp) - b_res[2].flags &= ~IORESOURCE_MEM_64; - pci_write_config_dword(bridge, PCI_PREF_BASE_UPPER32, - mem_base_hi); - } } /* Helper function for sizing routines: find first available diff --git a/include/linux/pci.h b/include/linux/pci.h index 65f1d8c2f082..40b327b814aa 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -373,6 +373,9 @@ struct pci_dev { bool match_driver; /* Skip attaching driver */ unsigned int transparent:1; /* Subtractive decode bridge */ + unsigned int io_window:1; /* Bridge has I/O window */ + unsigned int pref_window:1; /* Bridge has pref mem window */ + unsigned int pref_64_window:1; /* Pref mem window is 64-bit */ unsigned int multifunction:1; /* Multi-function device */ unsigned int is_busmaster:1; /* Is busmaster */ -- cgit v1.2.3 From 856c395cfa63b94a1d8215182f0243c222f6f927 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 17 Jan 2019 23:27:11 -0800 Subject: net: introduce a knob to control whether to inherit devconf config There have been many people complaining about the inconsistent behaviors of IPv4 and IPv6 devconf when creating new network namespaces. Currently, for IPv4, we inherit all current settings from init_net, but for IPv6 we reset all setting to default. This patch introduces a new /proc file /proc/sys/net/core/devconf_inherit_init_net to control the behavior of whether to inhert sysctl current settings from init_net. This file itself is only available in init_net. As demonstrated below: Initial setup in init_net: # cat /proc/sys/net/ipv4/conf/all/rp_filter 2 # cat /proc/sys/net/ipv6/conf/all/accept_dad 1 Default value 0 (current behavior): # ip netns del test # ip netns add test # ip netns exec test cat /proc/sys/net/ipv4/conf/all/rp_filter 2 # ip netns exec test cat /proc/sys/net/ipv6/conf/all/accept_dad 0 Set to 1 (inherit from init_net): # echo 1 > /proc/sys/net/core/devconf_inherit_init_net # ip netns del test # ip netns add test # ip netns exec test cat /proc/sys/net/ipv4/conf/all/rp_filter 2 # ip netns exec test cat /proc/sys/net/ipv6/conf/all/accept_dad 1 Set to 2 (reset to default): # echo 2 > /proc/sys/net/core/devconf_inherit_init_net # ip netns del test # ip netns add test # ip netns exec test cat /proc/sys/net/ipv4/conf/all/rp_filter 0 # ip netns exec test cat /proc/sys/net/ipv6/conf/all/accept_dad 0 Set to a value out of range (invalid): # echo 3 > /proc/sys/net/core/devconf_inherit_init_net -bash: echo: write error: Invalid argument # echo -1 > /proc/sys/net/core/devconf_inherit_init_net -bash: echo: write error: Invalid argument Reported-by: Zhu Yanjun Reported-by: Tonghao Zhang Cc: Nicolas Dichtel Signed-off-by: Cong Wang Acked-by: Nicolas Dichtel Acked-by: Tonghao Zhang Signed-off-by: David S. Miller --- Documentation/sysctl/net.txt | 14 ++++++++++++++ include/linux/netdevice.h | 1 + net/core/sysctl_net_core.c | 18 ++++++++++++++++++ net/ipv4/devinet.c | 43 ++++++++++++++++++++----------------------- net/ipv6/addrconf.c | 5 +++++ 5 files changed, 58 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/Documentation/sysctl/net.txt b/Documentation/sysctl/net.txt index 2793d4eac55f..bc0680706870 100644 --- a/Documentation/sysctl/net.txt +++ b/Documentation/sysctl/net.txt @@ -291,6 +291,20 @@ user space is responsible for creating them if needed. Default : 0 (for compatibility reasons) +devconf_inherit_init_net +---------------------------- + +Controls if a new network namespace should inherit all current +settings under /proc/sys/net/{ipv4,ipv6}/conf/{all,default}/. By +default, we keep the current behavior: for IPv4 we inherit all current +settings from init_net and for IPv6 we reset all settings to default. + +If set to 1, both IPv4 and IPv6 settings are forced to inherit from +current ones in init_net. If set to 2, both IPv4 and IPv6 settings are +forced to reset to their default values. + +Default : 0 (for compatibility reasons) + 2. /proc/sys/net/unix - Parameters for Unix domain sockets ------------------------------------------------------- diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index a57b9a853aab..e675ef97a426 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -630,6 +630,7 @@ struct netdev_queue { } ____cacheline_aligned_in_smp; extern int sysctl_fb_tunnels_only_for_init_net; +extern int sysctl_devconf_inherit_init_net; static inline bool net_has_fallback_tunnels(const struct net *net) { diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index d67ec17f2cc8..84bf2861f45f 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -36,6 +36,15 @@ static int net_msg_warn; /* Unused, but still a sysctl */ int sysctl_fb_tunnels_only_for_init_net __read_mostly = 0; EXPORT_SYMBOL(sysctl_fb_tunnels_only_for_init_net); +/* 0 - Keep current behavior: + * IPv4: inherit all current settings from init_net + * IPv6: reset all settings to default + * 1 - Both inherit all current settings from init_net + * 2 - Both reset all settings to default + */ +int sysctl_devconf_inherit_init_net __read_mostly; +EXPORT_SYMBOL(sysctl_devconf_inherit_init_net); + #ifdef CONFIG_RPS static int rps_sock_flow_sysctl(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) @@ -544,6 +553,15 @@ static struct ctl_table net_core_table[] = { .extra1 = &zero, .extra2 = &one, }, + { + .procname = "devconf_inherit_init_net", + .data = &sysctl_devconf_inherit_init_net, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &two, + }, { } }; diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index cd027639df2f..cd9033245b98 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -2591,32 +2591,32 @@ static __net_init int devinet_init_net(struct net *net) int err; struct ipv4_devconf *all, *dflt; #ifdef CONFIG_SYSCTL - struct ctl_table *tbl = ctl_forward_entry; + struct ctl_table *tbl; struct ctl_table_header *forw_hdr; #endif err = -ENOMEM; - all = &ipv4_devconf; - dflt = &ipv4_devconf_dflt; - - if (!net_eq(net, &init_net)) { - all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL); - if (!all) - goto err_alloc_all; + all = kmemdup(&ipv4_devconf, sizeof(ipv4_devconf), GFP_KERNEL); + if (!all) + goto err_alloc_all; - dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL); - if (!dflt) - goto err_alloc_dflt; + dflt = kmemdup(&ipv4_devconf_dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL); + if (!dflt) + goto err_alloc_dflt; #ifdef CONFIG_SYSCTL - tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL); - if (!tbl) - goto err_alloc_ctl; + tbl = kmemdup(ctl_forward_entry, sizeof(ctl_forward_entry), GFP_KERNEL); + if (!tbl) + goto err_alloc_ctl; - tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1]; - tbl[0].extra1 = all; - tbl[0].extra2 = net; + tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1]; + tbl[0].extra1 = all; + tbl[0].extra2 = net; #endif + + if (sysctl_devconf_inherit_init_net != 2 && !net_eq(net, &init_net)) { + memcpy(all, init_net.ipv4.devconf_all, sizeof(ipv4_devconf)); + memcpy(dflt, init_net.ipv4.devconf_dflt, sizeof(ipv4_devconf_dflt)); } #ifdef CONFIG_SYSCTL @@ -2646,15 +2646,12 @@ err_reg_ctl: err_reg_dflt: __devinet_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL); err_reg_all: - if (tbl != ctl_forward_entry) - kfree(tbl); + kfree(tbl); err_alloc_ctl: #endif - if (dflt != &ipv4_devconf_dflt) - kfree(dflt); + kfree(dflt); err_alloc_dflt: - if (all != &ipv4_devconf) - kfree(all); + kfree(all); err_alloc_all: return err; } diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 57198b3c86da..48cd36311901 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -6902,6 +6902,11 @@ static int __net_init addrconf_init_net(struct net *net) if (!dflt) goto err_alloc_dflt; + if (sysctl_devconf_inherit_init_net == 1 && !net_eq(net, &init_net)) { + memcpy(all, init_net.ipv6.devconf_all, sizeof(ipv6_devconf)); + memcpy(dflt, init_net.ipv6.devconf_dflt, sizeof(ipv6_devconf_dflt)); + } + /* these will be inherited by all namespaces */ dflt->autoconf = ipv6_defaults.autoconf; dflt->disable_ipv6 = ipv6_defaults.disable_ipv6; -- cgit v1.2.3 From 5b93ac542301026eff8954589cf59f801d03db3e Mon Sep 17 00:00:00 2001 From: Rajendra Nayak Date: Thu, 10 Jan 2019 09:32:02 +0530 Subject: OPP: Add support for parsing the 'opp-level' property Now that the OPP bindings are updated to include an optional 'opp-level' property, add support to parse it from device tree and store it as part of dev_pm_opp structure. Also add and export an helper 'dev_pm_opp_get_level()' that can be used to get the level value read from device tree when present. Reviewed-by: Stephen Boyd Acked-by: Viresh Kumar Signed-off-by: Rajendra Nayak Signed-off-by: Bjorn Andersson Signed-off-by: Andy Gross --- drivers/opp/core.c | 18 ++++++++++++++++++ drivers/opp/of.c | 2 ++ drivers/opp/opp.h | 2 ++ include/linux/pm_opp.h | 7 +++++++ 4 files changed, 29 insertions(+) (limited to 'include/linux') diff --git a/drivers/opp/core.c b/drivers/opp/core.c index e5507add8f04..90b78a122be9 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -130,6 +130,24 @@ unsigned long dev_pm_opp_get_freq(struct dev_pm_opp *opp) } EXPORT_SYMBOL_GPL(dev_pm_opp_get_freq); +/** + * dev_pm_opp_get_level() - Gets the level corresponding to an available opp + * @opp: opp for which level value has to be returned for + * + * Return: level read from device tree corresponding to the opp, else + * return 0. + */ +unsigned int dev_pm_opp_get_level(struct dev_pm_opp *opp) +{ + if (IS_ERR_OR_NULL(opp) || !opp->available) { + pr_err("%s: Invalid parameters\n", __func__); + return 0; + } + + return opp->level; +} +EXPORT_SYMBOL_GPL(dev_pm_opp_get_level); + /** * dev_pm_opp_is_turbo() - Returns if opp is turbo OPP or not * @opp: opp for which turbo mode is being verified diff --git a/drivers/opp/of.c b/drivers/opp/of.c index 06f0f632ec47..1779f2c93291 100644 --- a/drivers/opp/of.c +++ b/drivers/opp/of.c @@ -594,6 +594,8 @@ static struct dev_pm_opp *_opp_add_static_v2(struct opp_table *opp_table, new_opp->rate = (unsigned long)rate; } + of_property_read_u32(np, "opp-level", &new_opp->level); + /* Check if the OPP supports hardware's hierarchy of versions or not */ if (!_opp_is_supported(dev, opp_table, np)) { dev_dbg(dev, "OPP not supported by hardware: %llu\n", rate); diff --git a/drivers/opp/opp.h b/drivers/opp/opp.h index e24d81497375..4458175aa661 100644 --- a/drivers/opp/opp.h +++ b/drivers/opp/opp.h @@ -60,6 +60,7 @@ extern struct list_head opp_tables; * @suspend: true if suspend OPP * @pstate: Device's power domain's performance state. * @rate: Frequency in hertz + * @level: Performance level * @supplies: Power supplies voltage/current values * @clock_latency_ns: Latency (in nanoseconds) of switching to this OPP's * frequency from any other OPP's frequency. @@ -80,6 +81,7 @@ struct dev_pm_opp { bool suspend; unsigned int pstate; unsigned long rate; + unsigned int level; struct dev_pm_opp_supply *supplies; diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index 0a2a88e5a383..473d2c7516f0 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -86,6 +86,8 @@ unsigned long dev_pm_opp_get_voltage(struct dev_pm_opp *opp); unsigned long dev_pm_opp_get_freq(struct dev_pm_opp *opp); +unsigned int dev_pm_opp_get_level(struct dev_pm_opp *opp); + bool dev_pm_opp_is_turbo(struct dev_pm_opp *opp); int dev_pm_opp_get_opp_count(struct device *dev); @@ -157,6 +159,11 @@ static inline unsigned long dev_pm_opp_get_freq(struct dev_pm_opp *opp) return 0; } +static inline unsigned int dev_pm_opp_get_level(struct dev_pm_opp *opp) +{ + return 0; +} + static inline bool dev_pm_opp_is_turbo(struct dev_pm_opp *opp) { return false; -- cgit v1.2.3 From ba5ea614622dca6d675b4cc8a97270569ae13a23 Mon Sep 17 00:00:00 2001 From: Linus Lüssing Date: Mon, 21 Jan 2019 07:26:25 +0100 Subject: bridge: simplify ip_mc_check_igmp() and ipv6_mc_check_mld() calls MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch refactors ip_mc_check_igmp(), ipv6_mc_check_mld() and their callers (more precisely, the Linux bridge) to not rely on the skb_trimmed parameter anymore. An skb with its tail trimmed to the IP packet length was initially introduced for the following three reasons: 1) To be able to verify the ICMPv6 checksum. 2) To be able to distinguish the version of an IGMP or MLD query. They are distinguishable only by their size. 3) To avoid parsing data for an IGMPv3 or MLDv2 report that is beyond the IP packet but still within the skb. The first case still uses a cloned and potentially trimmed skb to verfiy. However, there is no need to propagate it to the caller. For the second and third case explicit IP packet length checks were added. This hopefully makes ip_mc_check_igmp() and ipv6_mc_check_mld() easier to read and verfiy, as well as easier to use. Signed-off-by: Linus Lüssing Signed-off-by: David S. Miller --- include/linux/igmp.h | 11 ++++++++- include/linux/ip.h | 5 ++++ include/linux/ipv6.h | 6 +++++ include/net/addrconf.h | 12 +++++++++- net/batman-adv/multicast.c | 4 ++-- net/bridge/br_multicast.c | 57 +++++++++++++++++++++++----------------------- net/ipv4/igmp.c | 23 ++++--------------- net/ipv6/mcast_snoop.c | 24 ++++--------------- 8 files changed, 70 insertions(+), 72 deletions(-) (limited to 'include/linux') diff --git a/include/linux/igmp.h b/include/linux/igmp.h index 119f53941c12..8b4348f69bc5 100644 --- a/include/linux/igmp.h +++ b/include/linux/igmp.h @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -106,6 +107,14 @@ struct ip_mc_list { #define IGMPV3_QQIC(value) IGMPV3_EXP(0x80, 4, 3, value) #define IGMPV3_MRC(value) IGMPV3_EXP(0x80, 4, 3, value) +static inline int ip_mc_may_pull(struct sk_buff *skb, unsigned int len) +{ + if (skb_transport_offset(skb) + ip_transport_len(skb) < len) + return -EINVAL; + + return pskb_may_pull(skb, len); +} + extern int ip_check_mc_rcu(struct in_device *dev, __be32 mc_addr, __be32 src_addr, u8 proto); extern int igmp_rcv(struct sk_buff *); extern int ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr); @@ -130,6 +139,6 @@ extern void ip_mc_unmap(struct in_device *); extern void ip_mc_remap(struct in_device *); extern void ip_mc_dec_group(struct in_device *in_dev, __be32 addr); extern void ip_mc_inc_group(struct in_device *in_dev, __be32 addr); -int ip_mc_check_igmp(struct sk_buff *skb, struct sk_buff **skb_trimmed); +int ip_mc_check_igmp(struct sk_buff *skb); #endif diff --git a/include/linux/ip.h b/include/linux/ip.h index 492bc6513533..482b7b7c9f30 100644 --- a/include/linux/ip.h +++ b/include/linux/ip.h @@ -34,4 +34,9 @@ static inline struct iphdr *ipip_hdr(const struct sk_buff *skb) { return (struct iphdr *)skb_transport_header(skb); } + +static inline unsigned int ip_transport_len(const struct sk_buff *skb) +{ + return ntohs(ip_hdr(skb)->tot_len) - skb_network_header_len(skb); +} #endif /* _LINUX_IP_H */ diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 495e834c1367..6d45ce784bea 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -104,6 +104,12 @@ static inline struct ipv6hdr *ipipv6_hdr(const struct sk_buff *skb) return (struct ipv6hdr *)skb_transport_header(skb); } +static inline unsigned int ipv6_transport_len(const struct sk_buff *skb) +{ + return ntohs(ipv6_hdr(skb)->payload_len) + sizeof(struct ipv6hdr) - + skb_network_header_len(skb); +} + /* This structure contains results of exthdrs parsing as offsets from skb->nh. diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 1656c5978498..daf11dcb0f70 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -49,6 +49,7 @@ struct prefix_info { struct in6_addr prefix; }; +#include #include #include #include @@ -201,6 +202,15 @@ u32 ipv6_addr_label(struct net *net, const struct in6_addr *addr, /* * multicast prototypes (mcast.c) */ +static inline int ipv6_mc_may_pull(struct sk_buff *skb, + unsigned int len) +{ + if (skb_transport_offset(skb) + ipv6_transport_len(skb) < len) + return -EINVAL; + + return pskb_may_pull(skb, len); +} + int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr); int ipv6_sock_mc_drop(struct sock *sk, int ifindex, @@ -219,7 +229,7 @@ void ipv6_mc_unmap(struct inet6_dev *idev); void ipv6_mc_remap(struct inet6_dev *idev); void ipv6_mc_init_dev(struct inet6_dev *idev); void ipv6_mc_destroy_dev(struct inet6_dev *idev); -int ipv6_mc_check_mld(struct sk_buff *skb, struct sk_buff **skb_trimmed); +int ipv6_mc_check_mld(struct sk_buff *skb); void addrconf_dad_failure(struct sk_buff *skb, struct inet6_ifaddr *ifp); bool ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group, diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c index 69244e4598f5..1dd70f048e7b 100644 --- a/net/batman-adv/multicast.c +++ b/net/batman-adv/multicast.c @@ -674,7 +674,7 @@ static void batadv_mcast_mla_update(struct work_struct *work) */ static bool batadv_mcast_is_report_ipv4(struct sk_buff *skb) { - if (ip_mc_check_igmp(skb, NULL) < 0) + if (ip_mc_check_igmp(skb) < 0) return false; switch (igmp_hdr(skb)->type) { @@ -741,7 +741,7 @@ static int batadv_mcast_forw_mode_check_ipv4(struct batadv_priv *bat_priv, */ static bool batadv_mcast_is_report_ipv6(struct sk_buff *skb) { - if (ipv6_mc_check_mld(skb, NULL) < 0) + if (ipv6_mc_check_mld(skb) < 0) return false; switch (icmp6_hdr(skb)->icmp6_type) { diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 3aeff0895669..156c4905639e 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -938,7 +938,7 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br, for (i = 0; i < num; i++) { len += sizeof(*grec); - if (!pskb_may_pull(skb, len)) + if (!ip_mc_may_pull(skb, len)) return -EINVAL; grec = (void *)(skb->data + len - sizeof(*grec)); @@ -946,7 +946,7 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br, type = grec->grec_type; len += ntohs(grec->grec_nsrcs) * 4; - if (!pskb_may_pull(skb, len)) + if (!ip_mc_may_pull(skb, len)) return -EINVAL; /* We treat this as an IGMPv2 report for now. */ @@ -985,15 +985,17 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br, struct sk_buff *skb, u16 vid) { + unsigned int nsrcs_offset; const unsigned char *src; struct icmp6hdr *icmp6h; struct mld2_grec *grec; + unsigned int grec_len; int i; int len; int num; int err = 0; - if (!pskb_may_pull(skb, sizeof(*icmp6h))) + if (!ipv6_mc_may_pull(skb, sizeof(*icmp6h))) return -EINVAL; icmp6h = icmp6_hdr(skb); @@ -1003,21 +1005,25 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br, for (i = 0; i < num; i++) { __be16 *nsrcs, _nsrcs; - nsrcs = skb_header_pointer(skb, - len + offsetof(struct mld2_grec, - grec_nsrcs), + nsrcs_offset = len + offsetof(struct mld2_grec, grec_nsrcs); + + if (skb_transport_offset(skb) + ipv6_transport_len(skb) < + nsrcs_offset + sizeof(_nsrcs)) + return -EINVAL; + + nsrcs = skb_header_pointer(skb, nsrcs_offset, sizeof(_nsrcs), &_nsrcs); if (!nsrcs) return -EINVAL; - if (!pskb_may_pull(skb, - len + sizeof(*grec) + - sizeof(struct in6_addr) * ntohs(*nsrcs))) + grec_len = sizeof(*grec) + + sizeof(struct in6_addr) * ntohs(*nsrcs); + + if (!ipv6_mc_may_pull(skb, len + grec_len)) return -EINVAL; grec = (struct mld2_grec *)(skb->data + len); - len += sizeof(*grec) + - sizeof(struct in6_addr) * ntohs(*nsrcs); + len += grec_len; /* We treat these as MLDv1 reports for now. */ switch (grec->grec_type) { @@ -1219,6 +1225,7 @@ static void br_ip4_multicast_query(struct net_bridge *br, struct sk_buff *skb, u16 vid) { + unsigned int transport_len = ip_transport_len(skb); const struct iphdr *iph = ip_hdr(skb); struct igmphdr *ih = igmp_hdr(skb); struct net_bridge_mdb_entry *mp; @@ -1228,7 +1235,6 @@ static void br_ip4_multicast_query(struct net_bridge *br, struct br_ip saddr; unsigned long max_delay; unsigned long now = jiffies; - unsigned int offset = skb_transport_offset(skb); __be32 group; spin_lock(&br->multicast_lock); @@ -1238,14 +1244,14 @@ static void br_ip4_multicast_query(struct net_bridge *br, group = ih->group; - if (skb->len == offset + sizeof(*ih)) { + if (transport_len == sizeof(*ih)) { max_delay = ih->code * (HZ / IGMP_TIMER_SCALE); if (!max_delay) { max_delay = 10 * HZ; group = 0; } - } else if (skb->len >= offset + sizeof(*ih3)) { + } else if (transport_len >= sizeof(*ih3)) { ih3 = igmpv3_query_hdr(skb); if (ih3->nsrcs) goto out; @@ -1296,6 +1302,7 @@ static int br_ip6_multicast_query(struct net_bridge *br, struct sk_buff *skb, u16 vid) { + unsigned int transport_len = ipv6_transport_len(skb); const struct ipv6hdr *ip6h = ipv6_hdr(skb); struct mld_msg *mld; struct net_bridge_mdb_entry *mp; @@ -1315,7 +1322,7 @@ static int br_ip6_multicast_query(struct net_bridge *br, (port && port->state == BR_STATE_DISABLED)) goto out; - if (skb->len == offset + sizeof(*mld)) { + if (transport_len == sizeof(*mld)) { if (!pskb_may_pull(skb, offset + sizeof(*mld))) { err = -EINVAL; goto out; @@ -1581,12 +1588,11 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br, struct sk_buff *skb, u16 vid) { - struct sk_buff *skb_trimmed = NULL; const unsigned char *src; struct igmphdr *ih; int err; - err = ip_mc_check_igmp(skb, &skb_trimmed); + err = ip_mc_check_igmp(skb); if (err == -ENOMSG) { if (!ipv4_is_local_multicast(ip_hdr(skb)->daddr)) { @@ -1612,19 +1618,16 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br, err = br_ip4_multicast_add_group(br, port, ih->group, vid, src); break; case IGMPV3_HOST_MEMBERSHIP_REPORT: - err = br_ip4_multicast_igmp3_report(br, port, skb_trimmed, vid); + err = br_ip4_multicast_igmp3_report(br, port, skb, vid); break; case IGMP_HOST_MEMBERSHIP_QUERY: - br_ip4_multicast_query(br, port, skb_trimmed, vid); + br_ip4_multicast_query(br, port, skb, vid); break; case IGMP_HOST_LEAVE_MESSAGE: br_ip4_multicast_leave_group(br, port, ih->group, vid, src); break; } - if (skb_trimmed && skb_trimmed != skb) - kfree_skb(skb_trimmed); - br_multicast_count(br, port, skb, BR_INPUT_SKB_CB(skb)->igmp, BR_MCAST_DIR_RX); @@ -1637,12 +1640,11 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, struct sk_buff *skb, u16 vid) { - struct sk_buff *skb_trimmed = NULL; const unsigned char *src; struct mld_msg *mld; int err; - err = ipv6_mc_check_mld(skb, &skb_trimmed); + err = ipv6_mc_check_mld(skb); if (err == -ENOMSG) { if (!ipv6_addr_is_ll_all_nodes(&ipv6_hdr(skb)->daddr)) @@ -1664,10 +1666,10 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, src); break; case ICMPV6_MLD2_REPORT: - err = br_ip6_multicast_mld2_report(br, port, skb_trimmed, vid); + err = br_ip6_multicast_mld2_report(br, port, skb, vid); break; case ICMPV6_MGM_QUERY: - err = br_ip6_multicast_query(br, port, skb_trimmed, vid); + err = br_ip6_multicast_query(br, port, skb, vid); break; case ICMPV6_MGM_REDUCTION: src = eth_hdr(skb)->h_source; @@ -1675,9 +1677,6 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, break; } - if (skb_trimmed && skb_trimmed != skb) - kfree_skb(skb_trimmed); - br_multicast_count(br, port, skb, BR_INPUT_SKB_CB(skb)->igmp, BR_MCAST_DIR_RX); diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 765b2b32c4a4..b1f6d93282d7 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -1544,7 +1544,7 @@ static inline __sum16 ip_mc_validate_checksum(struct sk_buff *skb) return skb_checksum_simple_validate(skb); } -static int __ip_mc_check_igmp(struct sk_buff *skb, struct sk_buff **skb_trimmed) +static int __ip_mc_check_igmp(struct sk_buff *skb) { struct sk_buff *skb_chk; @@ -1566,16 +1566,10 @@ static int __ip_mc_check_igmp(struct sk_buff *skb, struct sk_buff **skb_trimmed) if (ret) goto err; - if (skb_trimmed) - *skb_trimmed = skb_chk; - /* free now unneeded clone */ - else if (skb_chk != skb) - kfree_skb(skb_chk); - ret = 0; err: - if (ret && skb_chk && skb_chk != skb) + if (skb_chk && skb_chk != skb) kfree_skb(skb_chk); return ret; @@ -1584,7 +1578,6 @@ err: /** * ip_mc_check_igmp - checks whether this is a sane IGMP packet * @skb: the skb to validate - * @skb_trimmed: to store an skb pointer trimmed to IPv4 packet tail (optional) * * Checks whether an IPv4 packet is a valid IGMP packet. If so sets * skb transport header accordingly and returns zero. @@ -1594,18 +1587,10 @@ err: * -ENOMSG: IP header validation succeeded but it is not an IGMP packet. * -ENOMEM: A memory allocation failure happened. * - * Optionally, an skb pointer might be provided via skb_trimmed (or set it - * to NULL): After parsing an IGMP packet successfully it will point to - * an skb which has its tail aligned to the IP packet end. This might - * either be the originally provided skb or a trimmed, cloned version if - * the skb frame had data beyond the IP packet. A cloned skb allows us - * to leave the original skb and its full frame unchanged (which might be - * desirable for layer 2 frame jugglers). - * * Caller needs to set the skb network header and free any returned skb if it * differs from the provided skb. */ -int ip_mc_check_igmp(struct sk_buff *skb, struct sk_buff **skb_trimmed) +int ip_mc_check_igmp(struct sk_buff *skb) { int ret = ip_mc_check_iphdr(skb); @@ -1615,7 +1600,7 @@ int ip_mc_check_igmp(struct sk_buff *skb, struct sk_buff **skb_trimmed) if (ip_hdr(skb)->protocol != IPPROTO_IGMP) return -ENOMSG; - return __ip_mc_check_igmp(skb, skb_trimmed); + return __ip_mc_check_igmp(skb); } EXPORT_SYMBOL(ip_mc_check_igmp); diff --git a/net/ipv6/mcast_snoop.c b/net/ipv6/mcast_snoop.c index 9405b04eecc6..1a917dc80d5e 100644 --- a/net/ipv6/mcast_snoop.c +++ b/net/ipv6/mcast_snoop.c @@ -136,8 +136,7 @@ static inline __sum16 ipv6_mc_validate_checksum(struct sk_buff *skb) return skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo); } -static int __ipv6_mc_check_mld(struct sk_buff *skb, - struct sk_buff **skb_trimmed) +static int __ipv6_mc_check_mld(struct sk_buff *skb) { struct sk_buff *skb_chk = NULL; @@ -160,16 +159,10 @@ static int __ipv6_mc_check_mld(struct sk_buff *skb, if (ret) goto err; - if (skb_trimmed) - *skb_trimmed = skb_chk; - /* free now unneeded clone */ - else if (skb_chk != skb) - kfree_skb(skb_chk); - ret = 0; err: - if (ret && skb_chk && skb_chk != skb) + if (skb_chk && skb_chk != skb) kfree_skb(skb_chk); return ret; @@ -178,7 +171,6 @@ err: /** * ipv6_mc_check_mld - checks whether this is a sane MLD packet * @skb: the skb to validate - * @skb_trimmed: to store an skb pointer trimmed to IPv6 packet tail (optional) * * Checks whether an IPv6 packet is a valid MLD packet. If so sets * skb transport header accordingly and returns zero. @@ -188,18 +180,10 @@ err: * -ENOMSG: IP header validation succeeded but it is not an MLD packet. * -ENOMEM: A memory allocation failure happened. * - * Optionally, an skb pointer might be provided via skb_trimmed (or set it - * to NULL): After parsing an MLD packet successfully it will point to - * an skb which has its tail aligned to the IP packet end. This might - * either be the originally provided skb or a trimmed, cloned version if - * the skb frame had data beyond the IP packet. A cloned skb allows us - * to leave the original skb and its full frame unchanged (which might be - * desirable for layer 2 frame jugglers). - * * Caller needs to set the skb network header and free any returned skb if it * differs from the provided skb. */ -int ipv6_mc_check_mld(struct sk_buff *skb, struct sk_buff **skb_trimmed) +int ipv6_mc_check_mld(struct sk_buff *skb) { int ret; @@ -211,6 +195,6 @@ int ipv6_mc_check_mld(struct sk_buff *skb, struct sk_buff **skb_trimmed) if (ret < 0) return ret; - return __ipv6_mc_check_mld(skb, skb_trimmed); + return __ipv6_mc_check_mld(skb); } EXPORT_SYMBOL(ipv6_mc_check_mld); -- cgit v1.2.3 From 4b3087c7e37f9e499127201849e33960dc81da11 Mon Sep 17 00:00:00 2001 From: Linus Lüssing Date: Mon, 21 Jan 2019 07:26:28 +0100 Subject: bridge: Snoop Multicast Router Advertisements MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When multiple multicast routers are present in a broadcast domain then only one of them will be detectable via IGMP/MLD query snooping. The multicast router with the lowest IP address will become the selected and active querier while all other multicast routers will then refrain from sending queries. To detect such rather silent multicast routers, too, RFC4286 ("Multicast Router Discovery") provides a standardized protocol to detect multicast routers for multicast snooping switches. This patch implements the necessary MRD Advertisement message parsing and after successful processing adds such routers to the internal multicast router list. Signed-off-by: Linus Lüssing Signed-off-by: David S. Miller --- include/linux/in.h | 5 +++++ include/net/addrconf.h | 15 +++++++++++++ include/uapi/linux/icmpv6.h | 2 ++ include/uapi/linux/igmp.h | 1 + net/bridge/br_multicast.c | 55 +++++++++++++++++++++++++++++++++++++++++++++ net/ipv6/mcast_snoop.c | 5 ++++- 6 files changed, 82 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/in.h b/include/linux/in.h index 31b493734763..435e7f2a513a 100644 --- a/include/linux/in.h +++ b/include/linux/in.h @@ -60,6 +60,11 @@ static inline bool ipv4_is_lbcast(__be32 addr) return addr == htonl(INADDR_BROADCAST); } +static inline bool ipv4_is_all_snoopers(__be32 addr) +{ + return addr == htonl(INADDR_ALLSNOOPERS_GROUP); +} + static inline bool ipv4_is_zeronet(__be32 addr) { return (addr & htonl(0xff000000)) == htonl(0x00000000); diff --git a/include/net/addrconf.h b/include/net/addrconf.h index daf11dcb0f70..20d523ee2fec 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -229,6 +229,7 @@ void ipv6_mc_unmap(struct inet6_dev *idev); void ipv6_mc_remap(struct inet6_dev *idev); void ipv6_mc_init_dev(struct inet6_dev *idev); void ipv6_mc_destroy_dev(struct inet6_dev *idev); +int ipv6_mc_check_icmpv6(struct sk_buff *skb); int ipv6_mc_check_mld(struct sk_buff *skb); void addrconf_dad_failure(struct sk_buff *skb, struct inet6_ifaddr *ifp); @@ -499,6 +500,20 @@ static inline bool ipv6_addr_is_solict_mult(const struct in6_addr *addr) #endif } +static inline bool ipv6_addr_is_all_snoopers(const struct in6_addr *addr) +{ +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 + __be64 *p = (__be64 *)addr; + + return ((p[0] ^ cpu_to_be64(0xff02000000000000UL)) | + (p[1] ^ cpu_to_be64(0x6a))) == 0UL; +#else + return ((addr->s6_addr32[0] ^ htonl(0xff020000)) | + addr->s6_addr32[1] | addr->s6_addr32[2] | + (addr->s6_addr32[3] ^ htonl(0x0000006a))) == 0; +#endif +} + #ifdef CONFIG_PROC_FS int if6_proc_init(void); void if6_proc_exit(void); diff --git a/include/uapi/linux/icmpv6.h b/include/uapi/linux/icmpv6.h index caf8dc019250..325395f56bfa 100644 --- a/include/uapi/linux/icmpv6.h +++ b/include/uapi/linux/icmpv6.h @@ -108,6 +108,8 @@ struct icmp6hdr { #define ICMPV6_MOBILE_PREFIX_SOL 146 #define ICMPV6_MOBILE_PREFIX_ADV 147 +#define ICMPV6_MRDISC_ADV 151 + /* * Codes for Destination Unreachable */ diff --git a/include/uapi/linux/igmp.h b/include/uapi/linux/igmp.h index 7e44ac02ca18..90c28bc466c6 100644 --- a/include/uapi/linux/igmp.h +++ b/include/uapi/linux/igmp.h @@ -93,6 +93,7 @@ struct igmpv3_query { #define IGMP_MTRACE_RESP 0x1e #define IGMP_MTRACE 0x1f +#define IGMP_MRDISC_ADV 0x30 /* From RFC4286 */ /* * Use the BSD names for these for compatibility diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 2366f4a2780e..2c46c7aca571 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -29,10 +30,12 @@ #include #include #if IS_ENABLED(CONFIG_IPV6) +#include #include #include #include #include +#include #endif #include "br_private.h" @@ -1583,6 +1586,19 @@ static void br_multicast_pim(struct net_bridge *br, br_multicast_mark_router(br, port); } +static int br_ip4_multicast_mrd_rcv(struct net_bridge *br, + struct net_bridge_port *port, + struct sk_buff *skb) +{ + if (ip_hdr(skb)->protocol != IPPROTO_IGMP || + igmp_hdr(skb)->type != IGMP_MRDISC_ADV) + return -ENOMSG; + + br_multicast_mark_router(br, port); + + return 0; +} + static int br_multicast_ipv4_rcv(struct net_bridge *br, struct net_bridge_port *port, struct sk_buff *skb, @@ -1600,7 +1616,15 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br, } else if (pim_ipv4_all_pim_routers(ip_hdr(skb)->daddr)) { if (ip_hdr(skb)->protocol == IPPROTO_PIM) br_multicast_pim(br, port, skb); + } else if (ipv4_is_all_snoopers(ip_hdr(skb)->daddr)) { + err = br_ip4_multicast_mrd_rcv(br, port, skb); + + if (err < 0 && err != -ENOMSG) { + br_multicast_err_count(br, port, skb->protocol); + return err; + } } + return 0; } else if (err < 0) { br_multicast_err_count(br, port, skb->protocol); @@ -1635,6 +1659,27 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br, } #if IS_ENABLED(CONFIG_IPV6) +static int br_ip6_multicast_mrd_rcv(struct net_bridge *br, + struct net_bridge_port *port, + struct sk_buff *skb) +{ + int ret; + + if (ipv6_hdr(skb)->nexthdr != IPPROTO_ICMPV6) + return -ENOMSG; + + ret = ipv6_mc_check_icmpv6(skb); + if (ret < 0) + return ret; + + if (icmp6_hdr(skb)->icmp6_type != ICMPV6_MRDISC_ADV) + return -ENOMSG; + + br_multicast_mark_router(br, port); + + return 0; +} + static int br_multicast_ipv6_rcv(struct net_bridge *br, struct net_bridge_port *port, struct sk_buff *skb, @@ -1649,6 +1694,16 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, if (err == -ENOMSG) { if (!ipv6_addr_is_ll_all_nodes(&ipv6_hdr(skb)->daddr)) BR_INPUT_SKB_CB(skb)->mrouters_only = 1; + + if (ipv6_addr_is_all_snoopers(&ipv6_hdr(skb)->daddr)) { + err = br_ip6_multicast_mrd_rcv(br, port, skb); + + if (err < 0 && err != -ENOMSG) { + br_multicast_err_count(br, port, skb->protocol); + return err; + } + } + return 0; } else if (err < 0) { br_multicast_err_count(br, port, skb->protocol); diff --git a/net/ipv6/mcast_snoop.c b/net/ipv6/mcast_snoop.c index a72ddfc40eb3..55e2ac179f28 100644 --- a/net/ipv6/mcast_snoop.c +++ b/net/ipv6/mcast_snoop.c @@ -41,6 +41,8 @@ static int ipv6_mc_check_ip6hdr(struct sk_buff *skb) if (skb->len < len || len <= offset) return -EINVAL; + skb_set_transport_header(skb, offset); + return 0; } @@ -142,7 +144,7 @@ static inline __sum16 ipv6_mc_validate_checksum(struct sk_buff *skb) return skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo); } -static int ipv6_mc_check_icmpv6(struct sk_buff *skb) +int ipv6_mc_check_icmpv6(struct sk_buff *skb) { unsigned int len = skb_transport_offset(skb) + sizeof(struct icmp6hdr); unsigned int transport_len = ipv6_transport_len(skb); @@ -161,6 +163,7 @@ static int ipv6_mc_check_icmpv6(struct sk_buff *skb) return 0; } +EXPORT_SYMBOL(ipv6_mc_check_icmpv6); /** * ipv6_mc_check_mld - checks whether this is a sane MLD packet -- cgit v1.2.3 From 6815d8b09282c1df8e016bd2fabf25ada6d4462b Mon Sep 17 00:00:00 2001 From: Yangbo Lu Date: Mon, 21 Jan 2019 18:41:39 +0800 Subject: ptp_qoriq: support external trigger stamp FIFO The external trigger stamp FIFO was introduced as a new feature for QorIQ 1588 timer IP block. This patch is to support it by adding a new dts property "fsl,extts-fifo". Any QorIQ 1588 timer supporting this feature is required to add this property in its dts node. In addition, the FIFO should be cleaned up before enabling external trigger interrupts. Otherwise, there will be interrupts immediately just after enabling external trigger interrupts. Signed-off-by: Yangbo Lu Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/ptp/ptp_qoriq.c | 68 ++++++++++++++++++++++++++++++++++--------- include/linux/fsl/ptp_qoriq.h | 3 ++ 2 files changed, 57 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/drivers/ptp/ptp_qoriq.c b/drivers/ptp/ptp_qoriq.c index 274321471d50..a2e7702db3a4 100644 --- a/drivers/ptp/ptp_qoriq.c +++ b/drivers/ptp/ptp_qoriq.c @@ -88,6 +88,49 @@ static void set_fipers(struct qoriq_ptp *qoriq_ptp) qoriq_write(®s->fiper_regs->tmr_fiper2, qoriq_ptp->tmr_fiper2); } +static int extts_clean_up(struct qoriq_ptp *qoriq_ptp, int index, + bool update_event) +{ + struct qoriq_ptp_registers *regs = &qoriq_ptp->regs; + struct ptp_clock_event event; + void __iomem *reg_etts_l; + void __iomem *reg_etts_h; + u32 valid, stat, lo, hi; + + switch (index) { + case 0: + valid = ETS1_VLD; + reg_etts_l = ®s->etts_regs->tmr_etts1_l; + reg_etts_h = ®s->etts_regs->tmr_etts1_h; + break; + case 1: + valid = ETS2_VLD; + reg_etts_l = ®s->etts_regs->tmr_etts2_l; + reg_etts_h = ®s->etts_regs->tmr_etts2_h; + break; + default: + return -EINVAL; + } + + event.type = PTP_CLOCK_EXTTS; + event.index = index; + + do { + lo = qoriq_read(reg_etts_l); + hi = qoriq_read(reg_etts_h); + + if (update_event) { + event.timestamp = ((u64) hi) << 32; + event.timestamp |= lo; + ptp_clock_event(qoriq_ptp->clock, &event); + } + + stat = qoriq_read(®s->ctrl_regs->tmr_stat); + } while (qoriq_ptp->extts_fifo_support && (stat & valid)); + + return 0; +} + /* * Interrupt service routine */ @@ -111,24 +154,12 @@ static irqreturn_t isr(int irq, void *priv) if (irqs & ETS1) { ack |= ETS1; - hi = qoriq_read(®s->etts_regs->tmr_etts1_h); - lo = qoriq_read(®s->etts_regs->tmr_etts1_l); - event.type = PTP_CLOCK_EXTTS; - event.index = 0; - event.timestamp = ((u64) hi) << 32; - event.timestamp |= lo; - ptp_clock_event(qoriq_ptp->clock, &event); + extts_clean_up(qoriq_ptp, 0, true); } if (irqs & ETS2) { ack |= ETS2; - hi = qoriq_read(®s->etts_regs->tmr_etts2_h); - lo = qoriq_read(®s->etts_regs->tmr_etts2_l); - event.type = PTP_CLOCK_EXTTS; - event.index = 1; - event.timestamp = ((u64) hi) << 32; - event.timestamp |= lo; - ptp_clock_event(qoriq_ptp->clock, &event); + extts_clean_up(qoriq_ptp, 1, true); } if (irqs & ALM2) { @@ -278,6 +309,10 @@ static int ptp_qoriq_enable(struct ptp_clock_info *ptp, default: return -EINVAL; } + + if (on) + extts_clean_up(qoriq_ptp, rq->extts.index, false); + break; case PTP_CLK_REQ_PPS: bit = PP1EN; @@ -441,6 +476,11 @@ static int qoriq_ptp_probe(struct platform_device *dev) if (of_property_read_u32(node, "fsl,cksel", &qoriq_ptp->cksel)) qoriq_ptp->cksel = DEFAULT_CKSEL; + if (of_property_read_bool(node, "fsl,extts-fifo")) + qoriq_ptp->extts_fifo_support = true; + else + qoriq_ptp->extts_fifo_support = false; + if (of_property_read_u32(node, "fsl,tclk-period", &qoriq_ptp->tclk_period) || of_property_read_u32(node, diff --git a/include/linux/fsl/ptp_qoriq.h b/include/linux/fsl/ptp_qoriq.h index c1f003aadcce..43b4b442f6a4 100644 --- a/include/linux/fsl/ptp_qoriq.h +++ b/include/linux/fsl/ptp_qoriq.h @@ -120,6 +120,8 @@ struct qoriq_ptp_registers { /* Bit definitions for the TMR_STAT register */ #define STAT_VEC_SHIFT (0) /* Timer general purpose status vector */ #define STAT_VEC_MASK (0x3f) +#define ETS1_VLD (1<<24) +#define ETS2_VLD (1<<25) /* Bit definitions for the TMR_PRSC register */ #define PRSC_OCK_SHIFT (0) /* Output clock division/prescale factor. */ @@ -141,6 +143,7 @@ struct qoriq_ptp { struct ptp_clock *clock; struct ptp_clock_info caps; struct resource *rsrc; + bool extts_fifo_support; int irq; int phc_index; u64 alarm_interval; /* for periodic alarm */ -- cgit v1.2.3 From 19df7510d5cf077c2e88a7690fb7617e6d341beb Mon Sep 17 00:00:00 2001 From: Yangbo Lu Date: Mon, 21 Jan 2019 18:41:42 +0800 Subject: ptp: add debugfs support for ptp_qoriq This patch is to add debugfs support for ptp_qoriq. Current debugfs supports to control fiper1/fiper2 loopback mode. If the loopback mode is enabled, the fiper1/fiper2 pulse is looped back into trigger1/ trigger2 input. This is very useful for validating hardware and driver without external hardware. Below is an example to enable fiper1 loopback. echo 1 > /sys/kernel/debug/2d10e00.ptp_clock/fiper1-loopback Signed-off-by: Yangbo Lu Signed-off-by: David S. Miller --- drivers/ptp/Kconfig | 2 +- drivers/ptp/Makefile | 4 +- drivers/ptp/ptp_qoriq.c | 3 ++ drivers/ptp/ptp_qoriq_debugfs.c | 101 ++++++++++++++++++++++++++++++++++++++++ include/linux/fsl/ptp_qoriq.h | 12 +++++ 5 files changed, 120 insertions(+), 2 deletions(-) create mode 100644 drivers/ptp/ptp_qoriq_debugfs.c (limited to 'include/linux') diff --git a/drivers/ptp/Kconfig b/drivers/ptp/Kconfig index d137c480db46..aeb4a8b2e0af 100644 --- a/drivers/ptp/Kconfig +++ b/drivers/ptp/Kconfig @@ -53,7 +53,7 @@ config PTP_1588_CLOCK_QORIQ packets using the SO_TIMESTAMPING API. To compile this driver as a module, choose M here: the module - will be called ptp_qoriq. + will be called ptp-qoriq. config PTP_1588_CLOCK_IXP46X tristate "Intel IXP46x as PTP clock" diff --git a/drivers/ptp/Makefile b/drivers/ptp/Makefile index 19efa9cfa950..677d1d178a3e 100644 --- a/drivers/ptp/Makefile +++ b/drivers/ptp/Makefile @@ -9,4 +9,6 @@ obj-$(CONFIG_PTP_1588_CLOCK_DTE) += ptp_dte.o obj-$(CONFIG_PTP_1588_CLOCK_IXP46X) += ptp_ixp46x.o obj-$(CONFIG_PTP_1588_CLOCK_PCH) += ptp_pch.o obj-$(CONFIG_PTP_1588_CLOCK_KVM) += ptp_kvm.o -obj-$(CONFIG_PTP_1588_CLOCK_QORIQ) += ptp_qoriq.o +obj-$(CONFIG_PTP_1588_CLOCK_QORIQ) += ptp-qoriq.o +ptp-qoriq-y += ptp_qoriq.o +ptp-qoriq-$(CONFIG_DEBUG_FS) += ptp_qoriq_debugfs.o diff --git a/drivers/ptp/ptp_qoriq.c b/drivers/ptp/ptp_qoriq.c index a2e7702db3a4..43416b2e8a13 100644 --- a/drivers/ptp/ptp_qoriq.c +++ b/drivers/ptp/ptp_qoriq.c @@ -471,6 +471,7 @@ static int qoriq_ptp_probe(struct platform_device *dev) err = -EINVAL; + qoriq_ptp->dev = &dev->dev; qoriq_ptp->caps = ptp_qoriq_caps; if (of_property_read_u32(node, "fsl,cksel", &qoriq_ptp->cksel)) @@ -572,6 +573,7 @@ static int qoriq_ptp_probe(struct platform_device *dev) } qoriq_ptp->phc_index = ptp_clock_index(qoriq_ptp->clock); + ptp_qoriq_create_debugfs(qoriq_ptp); platform_set_drvdata(dev, qoriq_ptp); return 0; @@ -597,6 +599,7 @@ static int qoriq_ptp_remove(struct platform_device *dev) qoriq_write(®s->ctrl_regs->tmr_temask, 0); qoriq_write(®s->ctrl_regs->tmr_ctrl, 0); + ptp_qoriq_remove_debugfs(qoriq_ptp); ptp_clock_unregister(qoriq_ptp->clock); iounmap(qoriq_ptp->base); release_resource(qoriq_ptp->rsrc); diff --git a/drivers/ptp/ptp_qoriq_debugfs.c b/drivers/ptp/ptp_qoriq_debugfs.c new file mode 100644 index 000000000000..d904332b240d --- /dev/null +++ b/drivers/ptp/ptp_qoriq_debugfs.c @@ -0,0 +1,101 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* Copyright 2019 NXP + */ +#include +#include +#include + +static int ptp_qoriq_fiper1_lpbk_get(void *data, u64 *val) +{ + struct qoriq_ptp *qoriq_ptp = data; + struct qoriq_ptp_registers *regs = &qoriq_ptp->regs; + u32 ctrl; + + ctrl = qoriq_read(®s->ctrl_regs->tmr_ctrl); + *val = ctrl & PP1L ? 1 : 0; + + return 0; +} + +static int ptp_qoriq_fiper1_lpbk_set(void *data, u64 val) +{ + struct qoriq_ptp *qoriq_ptp = data; + struct qoriq_ptp_registers *regs = &qoriq_ptp->regs; + u32 ctrl; + + ctrl = qoriq_read(®s->ctrl_regs->tmr_ctrl); + if (val == 0) + ctrl &= ~PP1L; + else + ctrl |= PP1L; + + qoriq_write(®s->ctrl_regs->tmr_ctrl, ctrl); + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(ptp_qoriq_fiper1_fops, ptp_qoriq_fiper1_lpbk_get, + ptp_qoriq_fiper1_lpbk_set, "%llu\n"); + +static int ptp_qoriq_fiper2_lpbk_get(void *data, u64 *val) +{ + struct qoriq_ptp *qoriq_ptp = data; + struct qoriq_ptp_registers *regs = &qoriq_ptp->regs; + u32 ctrl; + + ctrl = qoriq_read(®s->ctrl_regs->tmr_ctrl); + *val = ctrl & PP2L ? 1 : 0; + + return 0; +} + +static int ptp_qoriq_fiper2_lpbk_set(void *data, u64 val) +{ + struct qoriq_ptp *qoriq_ptp = data; + struct qoriq_ptp_registers *regs = &qoriq_ptp->regs; + u32 ctrl; + + ctrl = qoriq_read(®s->ctrl_regs->tmr_ctrl); + if (val == 0) + ctrl &= ~PP2L; + else + ctrl |= PP2L; + + qoriq_write(®s->ctrl_regs->tmr_ctrl, ctrl); + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(ptp_qoriq_fiper2_fops, ptp_qoriq_fiper2_lpbk_get, + ptp_qoriq_fiper2_lpbk_set, "%llu\n"); + +void ptp_qoriq_create_debugfs(struct qoriq_ptp *qoriq_ptp) +{ + struct dentry *root; + + root = debugfs_create_dir(dev_name(qoriq_ptp->dev), NULL); + if (IS_ERR(root)) + return; + if (!root) + goto err_root; + + qoriq_ptp->debugfs_root = root; + + if (!debugfs_create_file("fiper1-loopback", 0600, root, qoriq_ptp, + &ptp_qoriq_fiper1_fops)) + goto err_node; + if (!debugfs_create_file("fiper2-loopback", 0600, root, qoriq_ptp, + &ptp_qoriq_fiper2_fops)) + goto err_node; + return; + +err_node: + debugfs_remove_recursive(root); + qoriq_ptp->debugfs_root = NULL; +err_root: + dev_err(qoriq_ptp->dev, "failed to initialize debugfs\n"); +} + +void ptp_qoriq_remove_debugfs(struct qoriq_ptp *qoriq_ptp) +{ + debugfs_remove_recursive(qoriq_ptp->debugfs_root); + qoriq_ptp->debugfs_root = NULL; +} diff --git a/include/linux/fsl/ptp_qoriq.h b/include/linux/fsl/ptp_qoriq.h index 43b4b442f6a4..94e9797e434c 100644 --- a/include/linux/fsl/ptp_qoriq.h +++ b/include/linux/fsl/ptp_qoriq.h @@ -143,6 +143,8 @@ struct qoriq_ptp { struct ptp_clock *clock; struct ptp_clock_info caps; struct resource *rsrc; + struct dentry *debugfs_root; + struct device *dev; bool extts_fifo_support; int irq; int phc_index; @@ -169,4 +171,14 @@ static inline void qoriq_write(unsigned __iomem *addr, u32 val) iowrite32be(val, addr); } +#ifdef CONFIG_DEBUG_FS +void ptp_qoriq_create_debugfs(struct qoriq_ptp *qoriq_ptp); +void ptp_qoriq_remove_debugfs(struct qoriq_ptp *qoriq_ptp); +#else +static inline void ptp_qoriq_create_debugfs(struct qoriq_ptp *qoriq_ptp) +{ } +static inline void ptp_qoriq_remove_debugfs(struct qoriq_ptp *qoriq_ptp) +{ } +#endif + #endif -- cgit v1.2.3 From 51eea52d26d4939b788b7244c28cf47e902b4c4c Mon Sep 17 00:00:00 2001 From: Lubomir Rintel Date: Wed, 16 Jan 2019 16:13:31 +0100 Subject: pxa2xx: replace spi_master with spi_controller It's also a slave controller driver now, calling it "master" is slightly misleading. Signed-off-by: Lubomir Rintel Acked-by: Robert Jarzmik Signed-off-by: Mark Brown --- Documentation/spi/pxa2xx | 10 +-- arch/arm/mach-pxa/cm-x255.c | 2 +- arch/arm/mach-pxa/cm-x270.c | 2 +- arch/arm/mach-pxa/corgi.c | 2 +- arch/arm/mach-pxa/devices.c | 2 +- arch/arm/mach-pxa/em-x270.c | 4 +- arch/arm/mach-pxa/hx4700.c | 2 +- arch/arm/mach-pxa/icontrol.c | 4 +- arch/arm/mach-pxa/littleton.c | 2 +- arch/arm/mach-pxa/lubbock.c | 2 +- arch/arm/mach-pxa/magician.c | 2 +- arch/arm/mach-pxa/pcm027.c | 2 +- arch/arm/mach-pxa/poodle.c | 2 +- arch/arm/mach-pxa/spitz.c | 2 +- arch/arm/mach-pxa/stargate2.c | 6 +- arch/arm/mach-pxa/tosa.c | 2 +- arch/arm/mach-pxa/z2.c | 4 +- arch/arm/mach-pxa/zeus.c | 2 +- drivers/spi/spi-pxa2xx-dma.c | 58 +++++++-------- drivers/spi/spi-pxa2xx-pci.c | 4 +- drivers/spi/spi-pxa2xx.c | 156 ++++++++++++++++++++--------------------- drivers/spi/spi-pxa2xx.h | 4 +- include/linux/spi/pxa2xx_spi.h | 4 +- 23 files changed, 140 insertions(+), 140 deletions(-) (limited to 'include/linux') diff --git a/Documentation/spi/pxa2xx b/Documentation/spi/pxa2xx index 13a0b7fb192f..551325b66b23 100644 --- a/Documentation/spi/pxa2xx +++ b/Documentation/spi/pxa2xx @@ -21,15 +21,15 @@ Typically a SPI master is defined in the arch/.../mach-*/board-*.c as a "platform device". The master configuration is passed to the driver via a table found in include/linux/spi/pxa2xx_spi.h: -struct pxa2xx_spi_master { +struct pxa2xx_spi_controller { u16 num_chipselect; u8 enable_dma; }; -The "pxa2xx_spi_master.num_chipselect" field is used to determine the number of +The "pxa2xx_spi_controller.num_chipselect" field is used to determine the number of slave device (chips) attached to this SPI master. -The "pxa2xx_spi_master.enable_dma" field informs the driver that SSP DMA should +The "pxa2xx_spi_controller.enable_dma" field informs the driver that SSP DMA should be used. This caused the driver to acquire two DMA channels: rx_channel and tx_channel. The rx_channel has a higher DMA service priority the tx_channel. See the "PXA2xx Developer Manual" section "DMA Controller". @@ -51,7 +51,7 @@ static struct resource pxa_spi_nssp_resources[] = { }, }; -static struct pxa2xx_spi_master pxa_nssp_master_info = { +static struct pxa2xx_spi_controller pxa_nssp_master_info = { .num_chipselect = 1, /* Matches the number of chips attached to NSSP */ .enable_dma = 1, /* Enables NSSP DMA */ }; @@ -206,7 +206,7 @@ DMA and PIO I/O Support ----------------------- The pxa2xx_spi driver supports both DMA and interrupt driven PIO message transfers. The driver defaults to PIO mode and DMA transfers must be enabled -by setting the "enable_dma" flag in the "pxa2xx_spi_master" structure. The DMA +by setting the "enable_dma" flag in the "pxa2xx_spi_controller" structure. The DMA mode supports both coherent and stream based DMA mappings. The following logic is used to determine the type of I/O to be used on diff --git a/arch/arm/mach-pxa/cm-x255.c b/arch/arm/mach-pxa/cm-x255.c index fa8e7dd4d898..4401dfcd7e68 100644 --- a/arch/arm/mach-pxa/cm-x255.c +++ b/arch/arm/mach-pxa/cm-x255.c @@ -98,7 +98,7 @@ static unsigned long cmx255_pin_config[] = { }; #if defined(CONFIG_SPI_PXA2XX) -static struct pxa2xx_spi_master pxa_ssp_master_info = { +static struct pxa2xx_spi_controller pxa_ssp_master_info = { .num_chipselect = 1, }; diff --git a/arch/arm/mach-pxa/cm-x270.c b/arch/arm/mach-pxa/cm-x270.c index f7081a50dc67..279eeca7add0 100644 --- a/arch/arm/mach-pxa/cm-x270.c +++ b/arch/arm/mach-pxa/cm-x270.c @@ -313,7 +313,7 @@ static inline void cmx270_init_mmc(void) {} #endif #if defined(CONFIG_SPI_PXA2XX) || defined(CONFIG_SPI_PXA2XX_MODULE) -static struct pxa2xx_spi_master cm_x270_spi_info = { +static struct pxa2xx_spi_controller cm_x270_spi_info = { .num_chipselect = 1, .enable_dma = 1, }; diff --git a/arch/arm/mach-pxa/corgi.c b/arch/arm/mach-pxa/corgi.c index c9732cace5e3..7ecf559bd71c 100644 --- a/arch/arm/mach-pxa/corgi.c +++ b/arch/arm/mach-pxa/corgi.c @@ -530,7 +530,7 @@ static struct pxa2xx_udc_mach_info udc_info __initdata = { }; #if IS_ENABLED(CONFIG_SPI_PXA2XX) -static struct pxa2xx_spi_master corgi_spi_info = { +static struct pxa2xx_spi_controller corgi_spi_info = { .num_chipselect = 3, }; diff --git a/arch/arm/mach-pxa/devices.c b/arch/arm/mach-pxa/devices.c index a24783a03827..524d6093e0c7 100644 --- a/arch/arm/mach-pxa/devices.c +++ b/arch/arm/mach-pxa/devices.c @@ -1065,7 +1065,7 @@ struct platform_device pxa93x_device_gpio = { /* pxa2xx-spi platform-device ID equals respective SSP platform-device ID + 1. * See comment in arch/arm/mach-pxa/ssp.c::ssp_probe() */ -void __init pxa2xx_set_spi_info(unsigned id, struct pxa2xx_spi_master *info) +void __init pxa2xx_set_spi_info(unsigned id, struct pxa2xx_spi_controller *info) { struct platform_device *pd; diff --git a/arch/arm/mach-pxa/em-x270.c b/arch/arm/mach-pxa/em-x270.c index 32c1edeb3f14..5e372760f16e 100644 --- a/arch/arm/mach-pxa/em-x270.c +++ b/arch/arm/mach-pxa/em-x270.c @@ -689,7 +689,7 @@ static inline void em_x270_init_lcd(void) {} #endif #if defined(CONFIG_SPI_PXA2XX) || defined(CONFIG_SPI_PXA2XX_MODULE) -static struct pxa2xx_spi_master em_x270_spi_info = { +static struct pxa2xx_spi_controller em_x270_spi_info = { .num_chipselect = 1, }; @@ -703,7 +703,7 @@ static struct tdo24m_platform_data em_x270_tdo24m_pdata = { .model = TDO35S, }; -static struct pxa2xx_spi_master em_x270_spi_2_info = { +static struct pxa2xx_spi_controller em_x270_spi_2_info = { .num_chipselect = 1, .enable_dma = 1, }; diff --git a/arch/arm/mach-pxa/hx4700.c b/arch/arm/mach-pxa/hx4700.c index b79b757fdd41..c3b47557b3f2 100644 --- a/arch/arm/mach-pxa/hx4700.c +++ b/arch/arm/mach-pxa/hx4700.c @@ -629,7 +629,7 @@ static struct spi_board_info tsc2046_board_info[] __initdata = { }, }; -static struct pxa2xx_spi_master pxa_ssp2_master_info = { +static struct pxa2xx_spi_controller pxa_ssp2_master_info = { .num_chipselect = 1, .enable_dma = 1, }; diff --git a/arch/arm/mach-pxa/icontrol.c b/arch/arm/mach-pxa/icontrol.c index cbaf4f6edcda..7e30452e3840 100644 --- a/arch/arm/mach-pxa/icontrol.c +++ b/arch/arm/mach-pxa/icontrol.c @@ -115,12 +115,12 @@ static struct spi_board_info mcp251x_board_info[] = { } }; -static struct pxa2xx_spi_master pxa_ssp3_spi_master_info = { +static struct pxa2xx_spi_controller pxa_ssp3_spi_master_info = { .num_chipselect = 2, .enable_dma = 1 }; -static struct pxa2xx_spi_master pxa_ssp4_spi_master_info = { +static struct pxa2xx_spi_controller pxa_ssp4_spi_master_info = { .num_chipselect = 2, .enable_dma = 1 }; diff --git a/arch/arm/mach-pxa/littleton.c b/arch/arm/mach-pxa/littleton.c index 39db4898dc4a..464b8bd2bcb9 100644 --- a/arch/arm/mach-pxa/littleton.c +++ b/arch/arm/mach-pxa/littleton.c @@ -191,7 +191,7 @@ static inline void littleton_init_lcd(void) {}; #endif /* CONFIG_FB_PXA || CONFIG_FB_PXA_MODULE */ #if defined(CONFIG_SPI_PXA2XX) || defined(CONFIG_SPI_PXA2XX_MODULE) -static struct pxa2xx_spi_master littleton_spi_info = { +static struct pxa2xx_spi_controller littleton_spi_info = { .num_chipselect = 1, }; diff --git a/arch/arm/mach-pxa/lubbock.c b/arch/arm/mach-pxa/lubbock.c index a1391e113ef4..c1bd0d544981 100644 --- a/arch/arm/mach-pxa/lubbock.c +++ b/arch/arm/mach-pxa/lubbock.c @@ -197,7 +197,7 @@ static struct platform_device sa1111_device = { * (to J5) and poking board registers (as done below). Else it's only useful * for the temperature sensors. */ -static struct pxa2xx_spi_master pxa_ssp_master_info = { +static struct pxa2xx_spi_controller pxa_ssp_master_info = { .num_chipselect = 1, }; diff --git a/arch/arm/mach-pxa/magician.c b/arch/arm/mach-pxa/magician.c index 08b079653c3f..618bcff4cdc9 100644 --- a/arch/arm/mach-pxa/magician.c +++ b/arch/arm/mach-pxa/magician.c @@ -932,7 +932,7 @@ struct pxa2xx_spi_chip tsc2046_chip_info = { .gpio_cs = GPIO14_MAGICIAN_TSC2046_CS, }; -static struct pxa2xx_spi_master magician_spi_info = { +static struct pxa2xx_spi_controller magician_spi_info = { .num_chipselect = 1, .enable_dma = 1, }; diff --git a/arch/arm/mach-pxa/pcm027.c b/arch/arm/mach-pxa/pcm027.c index ccca9f7575c3..e2e613449660 100644 --- a/arch/arm/mach-pxa/pcm027.c +++ b/arch/arm/mach-pxa/pcm027.c @@ -132,7 +132,7 @@ static struct platform_device smc91x_device = { /* * SPI host and devices */ -static struct pxa2xx_spi_master pxa_ssp_master_info = { +static struct pxa2xx_spi_controller pxa_ssp_master_info = { .num_chipselect = 1, }; diff --git a/arch/arm/mach-pxa/poodle.c b/arch/arm/mach-pxa/poodle.c index c2a43d4cfd3e..9450a523cd0b 100644 --- a/arch/arm/mach-pxa/poodle.c +++ b/arch/arm/mach-pxa/poodle.c @@ -196,7 +196,7 @@ struct platform_device poodle_locomo_device = { EXPORT_SYMBOL(poodle_locomo_device); #if defined(CONFIG_SPI_PXA2XX) || defined(CONFIG_SPI_PXA2XX_MODULE) -static struct pxa2xx_spi_master poodle_spi_info = { +static struct pxa2xx_spi_controller poodle_spi_info = { .num_chipselect = 1, }; diff --git a/arch/arm/mach-pxa/spitz.c b/arch/arm/mach-pxa/spitz.c index 306818e2cf54..8dac824a85df 100644 --- a/arch/arm/mach-pxa/spitz.c +++ b/arch/arm/mach-pxa/spitz.c @@ -572,7 +572,7 @@ static struct spi_board_info spitz_spi_devices[] = { }, }; -static struct pxa2xx_spi_master spitz_spi_info = { +static struct pxa2xx_spi_controller spitz_spi_info = { .num_chipselect = 3, }; diff --git a/arch/arm/mach-pxa/stargate2.c b/arch/arm/mach-pxa/stargate2.c index e0d6c872270a..c28d19b126a7 100644 --- a/arch/arm/mach-pxa/stargate2.c +++ b/arch/arm/mach-pxa/stargate2.c @@ -337,15 +337,15 @@ static struct platform_device stargate2_flash_device = { .num_resources = 1, }; -static struct pxa2xx_spi_master pxa_ssp_master_0_info = { +static struct pxa2xx_spi_controller pxa_ssp_master_0_info = { .num_chipselect = 1, }; -static struct pxa2xx_spi_master pxa_ssp_master_1_info = { +static struct pxa2xx_spi_controller pxa_ssp_master_1_info = { .num_chipselect = 1, }; -static struct pxa2xx_spi_master pxa_ssp_master_2_info = { +static struct pxa2xx_spi_controller pxa_ssp_master_2_info = { .num_chipselect = 1, }; diff --git a/arch/arm/mach-pxa/tosa.c b/arch/arm/mach-pxa/tosa.c index e8a93c088c35..7439798d58e4 100644 --- a/arch/arm/mach-pxa/tosa.c +++ b/arch/arm/mach-pxa/tosa.c @@ -813,7 +813,7 @@ static struct platform_device tosa_bt_device = { .dev.platform_data = &tosa_bt_data, }; -static struct pxa2xx_spi_master pxa_ssp_master_info = { +static struct pxa2xx_spi_controller pxa_ssp_master_info = { .num_chipselect = 1, }; diff --git a/arch/arm/mach-pxa/z2.c b/arch/arm/mach-pxa/z2.c index e2353e75bb28..ad082e11e2a4 100644 --- a/arch/arm/mach-pxa/z2.c +++ b/arch/arm/mach-pxa/z2.c @@ -607,12 +607,12 @@ static struct spi_board_info spi_board_info[] __initdata = { }, }; -static struct pxa2xx_spi_master pxa_ssp1_master_info = { +static struct pxa2xx_spi_controller pxa_ssp1_master_info = { .num_chipselect = 1, .enable_dma = 1, }; -static struct pxa2xx_spi_master pxa_ssp2_master_info = { +static struct pxa2xx_spi_controller pxa_ssp2_master_info = { .num_chipselect = 1, }; diff --git a/arch/arm/mach-pxa/zeus.c b/arch/arm/mach-pxa/zeus.c index c411f79d4cb5..bdbcf46595f9 100644 --- a/arch/arm/mach-pxa/zeus.c +++ b/arch/arm/mach-pxa/zeus.c @@ -391,7 +391,7 @@ static struct platform_device zeus_sram_device = { }; /* SPI interface on SSP3 */ -static struct pxa2xx_spi_master pxa2xx_spi_ssp3_master_info = { +static struct pxa2xx_spi_controller pxa2xx_spi_ssp3_master_info = { .num_chipselect = 1, .enable_dma = 1, }; diff --git a/drivers/spi/spi-pxa2xx-dma.c b/drivers/spi/spi-pxa2xx-dma.c index 2fa7f4b43492..15592598273e 100644 --- a/drivers/spi/spi-pxa2xx-dma.c +++ b/drivers/spi/spi-pxa2xx-dma.c @@ -23,7 +23,7 @@ static void pxa2xx_spi_dma_transfer_complete(struct driver_data *drv_data, bool error) { - struct spi_message *msg = drv_data->master->cur_msg; + struct spi_message *msg = drv_data->controller->cur_msg; /* * It is possible that one CPU is handling ROR interrupt and other @@ -59,7 +59,7 @@ static void pxa2xx_spi_dma_transfer_complete(struct driver_data *drv_data, msg->status = -EIO; } - spi_finalize_current_transfer(drv_data->master); + spi_finalize_current_transfer(drv_data->controller); } } @@ -74,7 +74,7 @@ pxa2xx_spi_dma_prepare_one(struct driver_data *drv_data, struct spi_transfer *xfer) { struct chip_data *chip = - spi_get_ctldata(drv_data->master->cur_msg->spi); + spi_get_ctldata(drv_data->controller->cur_msg->spi); enum dma_slave_buswidth width; struct dma_slave_config cfg; struct dma_chan *chan; @@ -102,14 +102,14 @@ pxa2xx_spi_dma_prepare_one(struct driver_data *drv_data, cfg.dst_maxburst = chip->dma_burst_size; sgt = &xfer->tx_sg; - chan = drv_data->master->dma_tx; + chan = drv_data->controller->dma_tx; } else { cfg.src_addr = drv_data->ssdr_physical; cfg.src_addr_width = width; cfg.src_maxburst = chip->dma_burst_size; sgt = &xfer->rx_sg; - chan = drv_data->master->dma_rx; + chan = drv_data->controller->dma_rx; } ret = dmaengine_slave_config(chan, &cfg); @@ -130,8 +130,8 @@ irqreturn_t pxa2xx_spi_dma_transfer(struct driver_data *drv_data) if (status & SSSR_ROR) { dev_err(&drv_data->pdev->dev, "FIFO overrun\n"); - dmaengine_terminate_async(drv_data->master->dma_rx); - dmaengine_terminate_async(drv_data->master->dma_tx); + dmaengine_terminate_async(drv_data->controller->dma_rx); + dmaengine_terminate_async(drv_data->controller->dma_tx); pxa2xx_spi_dma_transfer_complete(drv_data, true); return IRQ_HANDLED; @@ -171,15 +171,15 @@ int pxa2xx_spi_dma_prepare(struct driver_data *drv_data, return 0; err_rx: - dmaengine_terminate_async(drv_data->master->dma_tx); + dmaengine_terminate_async(drv_data->controller->dma_tx); err_tx: return err; } void pxa2xx_spi_dma_start(struct driver_data *drv_data) { - dma_async_issue_pending(drv_data->master->dma_rx); - dma_async_issue_pending(drv_data->master->dma_tx); + dma_async_issue_pending(drv_data->controller->dma_rx); + dma_async_issue_pending(drv_data->controller->dma_tx); atomic_set(&drv_data->dma_running, 1); } @@ -187,30 +187,30 @@ void pxa2xx_spi_dma_start(struct driver_data *drv_data) void pxa2xx_spi_dma_stop(struct driver_data *drv_data) { atomic_set(&drv_data->dma_running, 0); - dmaengine_terminate_sync(drv_data->master->dma_rx); - dmaengine_terminate_sync(drv_data->master->dma_tx); + dmaengine_terminate_sync(drv_data->controller->dma_rx); + dmaengine_terminate_sync(drv_data->controller->dma_tx); } int pxa2xx_spi_dma_setup(struct driver_data *drv_data) { - struct pxa2xx_spi_master *pdata = drv_data->master_info; + struct pxa2xx_spi_controller *pdata = drv_data->controller_info; struct device *dev = &drv_data->pdev->dev; - struct spi_controller *master = drv_data->master; + struct spi_controller *controller = drv_data->controller; dma_cap_mask_t mask; dma_cap_zero(mask); dma_cap_set(DMA_SLAVE, mask); - master->dma_tx = dma_request_slave_channel_compat(mask, + controller->dma_tx = dma_request_slave_channel_compat(mask, pdata->dma_filter, pdata->tx_param, dev, "tx"); - if (!master->dma_tx) + if (!controller->dma_tx) return -ENODEV; - master->dma_rx = dma_request_slave_channel_compat(mask, + controller->dma_rx = dma_request_slave_channel_compat(mask, pdata->dma_filter, pdata->rx_param, dev, "rx"); - if (!master->dma_rx) { - dma_release_channel(master->dma_tx); - master->dma_tx = NULL; + if (!controller->dma_rx) { + dma_release_channel(controller->dma_tx); + controller->dma_tx = NULL; return -ENODEV; } @@ -219,17 +219,17 @@ int pxa2xx_spi_dma_setup(struct driver_data *drv_data) void pxa2xx_spi_dma_release(struct driver_data *drv_data) { - struct spi_controller *master = drv_data->master; + struct spi_controller *controller = drv_data->controller; - if (master->dma_rx) { - dmaengine_terminate_sync(master->dma_rx); - dma_release_channel(master->dma_rx); - master->dma_rx = NULL; + if (controller->dma_rx) { + dmaengine_terminate_sync(controller->dma_rx); + dma_release_channel(controller->dma_rx); + controller->dma_rx = NULL; } - if (master->dma_tx) { - dmaengine_terminate_sync(master->dma_tx); - dma_release_channel(master->dma_tx); - master->dma_tx = NULL; + if (controller->dma_tx) { + dmaengine_terminate_sync(controller->dma_tx); + dma_release_channel(controller->dma_tx); + controller->dma_tx = NULL; } } diff --git a/drivers/spi/spi-pxa2xx-pci.c b/drivers/spi/spi-pxa2xx-pci.c index 869f188b02eb..1727fdfbac28 100644 --- a/drivers/spi/spi-pxa2xx-pci.c +++ b/drivers/spi/spi-pxa2xx-pci.c @@ -197,7 +197,7 @@ static int pxa2xx_spi_pci_probe(struct pci_dev *dev, struct platform_device_info pi; int ret; struct platform_device *pdev; - struct pxa2xx_spi_master spi_pdata; + struct pxa2xx_spi_controller spi_pdata; struct ssp_device *ssp; struct pxa_spi_info *c; char buf[40]; @@ -265,7 +265,7 @@ static int pxa2xx_spi_pci_probe(struct pci_dev *dev, static void pxa2xx_spi_pci_remove(struct pci_dev *dev) { struct platform_device *pdev = pci_get_drvdata(dev); - struct pxa2xx_spi_master *spi_pdata; + struct pxa2xx_spi_controller *spi_pdata; spi_pdata = dev_get_platdata(&pdev->dev); diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c index d84b893a64d7..69e874a2ad1e 100644 --- a/drivers/spi/spi-pxa2xx.c +++ b/drivers/spi/spi-pxa2xx.c @@ -328,7 +328,7 @@ static void lpss_ssp_setup(struct driver_data *drv_data) __lpss_ssp_write_priv(drv_data, config->reg_cs_ctrl, value); /* Enable multiblock DMA transfers */ - if (drv_data->master_info->enable_dma) { + if (drv_data->controller_info->enable_dma) { __lpss_ssp_write_priv(drv_data, config->reg_ssp, 1); if (config->reg_general >= 0) { @@ -368,7 +368,7 @@ static void lpss_ssp_select_cs(struct spi_device *spi, __lpss_ssp_write_priv(drv_data, config->reg_cs_ctrl, value); ndelay(1000000000 / - (drv_data->master->max_speed_hz / 2)); + (drv_data->controller->max_speed_hz / 2)); } } @@ -567,7 +567,7 @@ static int u32_reader(struct driver_data *drv_data) static void reset_sccr1(struct driver_data *drv_data) { struct chip_data *chip = - spi_get_ctldata(drv_data->master->cur_msg->spi); + spi_get_ctldata(drv_data->controller->cur_msg->spi); u32 sccr1_reg; sccr1_reg = pxa2xx_spi_read(drv_data, SSCR1) & ~drv_data->int_cr1; @@ -599,8 +599,8 @@ static void int_error_stop(struct driver_data *drv_data, const char* msg) dev_err(&drv_data->pdev->dev, "%s\n", msg); - drv_data->master->cur_msg->status = -EIO; - spi_finalize_current_transfer(drv_data->master); + drv_data->controller->cur_msg->status = -EIO; + spi_finalize_current_transfer(drv_data->controller); } static void int_transfer_complete(struct driver_data *drv_data) @@ -611,7 +611,7 @@ static void int_transfer_complete(struct driver_data *drv_data) if (!pxa25x_ssp_comp(drv_data)) pxa2xx_spi_write(drv_data, SSTO, 0); - spi_finalize_current_transfer(drv_data->master); + spi_finalize_current_transfer(drv_data->controller); } static irqreturn_t interrupt_transfer(struct driver_data *drv_data) @@ -747,7 +747,7 @@ static irqreturn_t ssp_int(int irq, void *dev_id) pxa2xx_spi_write(drv_data, SSCR1, sccr1_reg & ~drv_data->int_cr1); pxa2xx_spi_write(drv_data, SSCR1, sccr1_reg); - if (!drv_data->master->cur_msg) { + if (!drv_data->controller->cur_msg) { handle_bad_msg(drv_data); /* Never fail */ return IRQ_HANDLED; @@ -879,7 +879,7 @@ static unsigned int quark_x1000_get_clk_div(int rate, u32 *dds) static unsigned int ssp_get_clk_div(struct driver_data *drv_data, int rate) { - unsigned long ssp_clk = drv_data->master->max_speed_hz; + unsigned long ssp_clk = drv_data->controller->max_speed_hz; const struct ssp_device *ssp = drv_data->ssp; rate = min_t(int, ssp_clk, rate); @@ -894,7 +894,7 @@ static unsigned int pxa2xx_ssp_get_clk_div(struct driver_data *drv_data, int rate) { struct chip_data *chip = - spi_get_ctldata(drv_data->master->cur_msg->spi); + spi_get_ctldata(drv_data->controller->cur_msg->spi); unsigned int clk_div; switch (drv_data->ssp_type) { @@ -908,7 +908,7 @@ static unsigned int pxa2xx_ssp_get_clk_div(struct driver_data *drv_data, return clk_div << 8; } -static bool pxa2xx_spi_can_dma(struct spi_controller *master, +static bool pxa2xx_spi_can_dma(struct spi_controller *controller, struct spi_device *spi, struct spi_transfer *xfer) { @@ -919,12 +919,12 @@ static bool pxa2xx_spi_can_dma(struct spi_controller *master, xfer->len >= chip->dma_burst_size; } -static int pxa2xx_spi_transfer_one(struct spi_controller *master, +static int pxa2xx_spi_transfer_one(struct spi_controller *controller, struct spi_device *spi, struct spi_transfer *transfer) { - struct driver_data *drv_data = spi_controller_get_devdata(master); - struct spi_message *message = master->cur_msg; + struct driver_data *drv_data = spi_controller_get_devdata(controller); + struct spi_message *message = controller->cur_msg; struct chip_data *chip = spi_get_ctldata(message->spi); u32 dma_thresh = chip->dma_threshold; u32 dma_burst = chip->dma_burst_size; @@ -1006,9 +1006,9 @@ static int pxa2xx_spi_transfer_one(struct spi_controller *master, "DMA burst size reduced to match bits_per_word\n"); } - dma_mapped = master->can_dma && - master->can_dma(master, message->spi, transfer) && - master->cur_msg_mapped; + dma_mapped = controller->can_dma && + controller->can_dma(controller, message->spi, transfer) && + controller->cur_msg_mapped; if (dma_mapped) { /* Ensure we have the correct interrupt handler */ @@ -1036,12 +1036,12 @@ static int pxa2xx_spi_transfer_one(struct spi_controller *master, cr0 = pxa2xx_configure_sscr0(drv_data, clk_div, bits); if (!pxa25x_ssp_comp(drv_data)) dev_dbg(&message->spi->dev, "%u Hz actual, %s\n", - master->max_speed_hz + controller->max_speed_hz / (1 + ((cr0 & SSCR0_SCR(0xfff)) >> 8)), dma_mapped ? "DMA" : "PIO"); else dev_dbg(&message->spi->dev, "%u Hz actual, %s\n", - master->max_speed_hz / 2 + controller->max_speed_hz / 2 / (1 + ((cr0 & SSCR0_SCR(0x0ff)) >> 8)), dma_mapped ? "DMA" : "PIO"); @@ -1092,7 +1092,7 @@ static int pxa2xx_spi_transfer_one(struct spi_controller *master, } } - if (spi_controller_is_slave(master)) { + if (spi_controller_is_slave(controller)) { while (drv_data->write(drv_data)) ; if (drv_data->gpiod_ready) { @@ -1111,9 +1111,9 @@ static int pxa2xx_spi_transfer_one(struct spi_controller *master, return 1; } -static int pxa2xx_spi_slave_abort(struct spi_master *master) +static int pxa2xx_spi_slave_abort(struct spi_controller *controller) { - struct driver_data *drv_data = spi_controller_get_devdata(master); + struct driver_data *drv_data = spi_controller_get_devdata(controller); /* Stop and reset SSP */ write_SSSR_CS(drv_data, drv_data->clear_sr); @@ -1126,16 +1126,16 @@ static int pxa2xx_spi_slave_abort(struct spi_master *master) dev_dbg(&drv_data->pdev->dev, "transfer aborted\n"); - drv_data->master->cur_msg->status = -EINTR; - spi_finalize_current_transfer(drv_data->master); + drv_data->controller->cur_msg->status = -EINTR; + spi_finalize_current_transfer(drv_data->controller); return 0; } -static void pxa2xx_spi_handle_err(struct spi_controller *master, +static void pxa2xx_spi_handle_err(struct spi_controller *controller, struct spi_message *msg) { - struct driver_data *drv_data = spi_controller_get_devdata(master); + struct driver_data *drv_data = spi_controller_get_devdata(controller); /* Disable the SSP */ pxa2xx_spi_write(drv_data, SSCR0, @@ -1159,9 +1159,9 @@ static void pxa2xx_spi_handle_err(struct spi_controller *master, pxa2xx_spi_dma_stop(drv_data); } -static int pxa2xx_spi_unprepare_transfer(struct spi_controller *master) +static int pxa2xx_spi_unprepare_transfer(struct spi_controller *controller) { - struct driver_data *drv_data = spi_controller_get_devdata(master); + struct driver_data *drv_data = spi_controller_get_devdata(controller); /* Disable the SSP now */ pxa2xx_spi_write(drv_data, SSCR0, @@ -1260,7 +1260,7 @@ static int setup(struct spi_device *spi) break; default: tx_hi_thres = 0; - if (spi_controller_is_slave(drv_data->master)) { + if (spi_controller_is_slave(drv_data->controller)) { tx_thres = 1; rx_thres = 2; } else { @@ -1287,7 +1287,7 @@ static int setup(struct spi_device *spi) chip->frm = spi->chip_select; } - chip->enable_dma = drv_data->master_info->enable_dma; + chip->enable_dma = drv_data->controller_info->enable_dma; chip->timeout = TIMOUT_DFLT; } @@ -1310,7 +1310,7 @@ static int setup(struct spi_device *spi) if (chip_info->enable_loopback) chip->cr1 = SSCR1_LBM; } - if (spi_controller_is_slave(drv_data->master)) { + if (spi_controller_is_slave(drv_data->controller)) { chip->cr1 |= SSCR1_SCFR; chip->cr1 |= SSCR1_SCLKDIR; chip->cr1 |= SSCR1_SFRMDIR; @@ -1497,10 +1497,10 @@ static bool pxa2xx_spi_idma_filter(struct dma_chan *chan, void *param) #endif /* CONFIG_PCI */ -static struct pxa2xx_spi_master * +static struct pxa2xx_spi_controller * pxa2xx_spi_init_pdata(struct platform_device *pdev) { - struct pxa2xx_spi_master *pdata; + struct pxa2xx_spi_controller *pdata; struct acpi_device *adev; struct ssp_device *ssp; struct resource *res; @@ -1568,10 +1568,10 @@ pxa2xx_spi_init_pdata(struct platform_device *pdev) return pdata; } -static int pxa2xx_spi_fw_translate_cs(struct spi_controller *master, +static int pxa2xx_spi_fw_translate_cs(struct spi_controller *controller, unsigned int cs) { - struct driver_data *drv_data = spi_controller_get_devdata(master); + struct driver_data *drv_data = spi_controller_get_devdata(controller); if (has_acpi_companion(&drv_data->pdev->dev)) { switch (drv_data->ssp_type) { @@ -1595,8 +1595,8 @@ static int pxa2xx_spi_fw_translate_cs(struct spi_controller *master, static int pxa2xx_spi_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; - struct pxa2xx_spi_master *platform_info; - struct spi_controller *master; + struct pxa2xx_spi_controller *platform_info; + struct spi_controller *controller; struct driver_data *drv_data; struct ssp_device *ssp; const struct lpss_config *config; @@ -1622,37 +1622,37 @@ static int pxa2xx_spi_probe(struct platform_device *pdev) } if (platform_info->is_slave) - master = spi_alloc_slave(dev, sizeof(struct driver_data)); + controller = spi_alloc_slave(dev, sizeof(struct driver_data)); else - master = spi_alloc_master(dev, sizeof(struct driver_data)); + controller = spi_alloc_master(dev, sizeof(struct driver_data)); - if (!master) { - dev_err(&pdev->dev, "cannot alloc spi_master\n"); + if (!controller) { + dev_err(&pdev->dev, "cannot alloc spi_controller\n"); pxa_ssp_free(ssp); return -ENOMEM; } - drv_data = spi_controller_get_devdata(master); - drv_data->master = master; - drv_data->master_info = platform_info; + drv_data = spi_controller_get_devdata(controller); + drv_data->controller = controller; + drv_data->controller_info = platform_info; drv_data->pdev = pdev; drv_data->ssp = ssp; - master->dev.of_node = pdev->dev.of_node; + controller->dev.of_node = pdev->dev.of_node; /* the spi->mode bits understood by this driver: */ - master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH | SPI_LOOP; - - master->bus_num = ssp->port_id; - master->dma_alignment = DMA_ALIGNMENT; - master->cleanup = cleanup; - master->setup = setup; - master->set_cs = pxa2xx_spi_set_cs; - master->transfer_one = pxa2xx_spi_transfer_one; - master->slave_abort = pxa2xx_spi_slave_abort; - master->handle_err = pxa2xx_spi_handle_err; - master->unprepare_transfer_hardware = pxa2xx_spi_unprepare_transfer; - master->fw_translate_cs = pxa2xx_spi_fw_translate_cs; - master->auto_runtime_pm = true; - master->flags = SPI_CONTROLLER_MUST_RX | SPI_CONTROLLER_MUST_TX; + controller->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH | SPI_LOOP; + + controller->bus_num = ssp->port_id; + controller->dma_alignment = DMA_ALIGNMENT; + controller->cleanup = cleanup; + controller->setup = setup; + controller->set_cs = pxa2xx_spi_set_cs; + controller->transfer_one = pxa2xx_spi_transfer_one; + controller->slave_abort = pxa2xx_spi_slave_abort; + controller->handle_err = pxa2xx_spi_handle_err; + controller->unprepare_transfer_hardware = pxa2xx_spi_unprepare_transfer; + controller->fw_translate_cs = pxa2xx_spi_fw_translate_cs; + controller->auto_runtime_pm = true; + controller->flags = SPI_CONTROLLER_MUST_RX | SPI_CONTROLLER_MUST_TX; drv_data->ssp_type = ssp->type; @@ -1661,10 +1661,10 @@ static int pxa2xx_spi_probe(struct platform_device *pdev) if (pxa25x_ssp_comp(drv_data)) { switch (drv_data->ssp_type) { case QUARK_X1000_SSP: - master->bits_per_word_mask = SPI_BPW_RANGE_MASK(4, 32); + controller->bits_per_word_mask = SPI_BPW_RANGE_MASK(4, 32); break; default: - master->bits_per_word_mask = SPI_BPW_RANGE_MASK(4, 16); + controller->bits_per_word_mask = SPI_BPW_RANGE_MASK(4, 16); break; } @@ -1673,7 +1673,7 @@ static int pxa2xx_spi_probe(struct platform_device *pdev) drv_data->clear_sr = SSSR_ROR; drv_data->mask_sr = SSSR_RFS | SSSR_TFS | SSSR_ROR; } else { - master->bits_per_word_mask = SPI_BPW_RANGE_MASK(4, 32); + controller->bits_per_word_mask = SPI_BPW_RANGE_MASK(4, 32); drv_data->int_cr1 = SSCR1_TIE | SSCR1_RIE | SSCR1_TINTE; drv_data->dma_cr1 = DEFAULT_DMA_CR1; drv_data->clear_sr = SSSR_ROR | SSSR_TINT; @@ -1685,7 +1685,7 @@ static int pxa2xx_spi_probe(struct platform_device *pdev) drv_data); if (status < 0) { dev_err(&pdev->dev, "cannot get IRQ %d\n", ssp->irq); - goto out_error_master_alloc; + goto out_error_controller_alloc; } /* Setup DMA if requested */ @@ -1695,7 +1695,7 @@ static int pxa2xx_spi_probe(struct platform_device *pdev) dev_dbg(dev, "no DMA channels available, using PIO\n"); platform_info->enable_dma = false; } else { - master->can_dma = pxa2xx_spi_can_dma; + controller->can_dma = pxa2xx_spi_can_dma; } } @@ -1704,7 +1704,7 @@ static int pxa2xx_spi_probe(struct platform_device *pdev) if (status) goto out_error_dma_irq_alloc; - master->max_speed_hz = clk_get_rate(ssp->clk); + controller->max_speed_hz = clk_get_rate(ssp->clk); /* Load default SSP configuration */ pxa2xx_spi_write(drv_data, SSCR0, 0); @@ -1727,7 +1727,7 @@ static int pxa2xx_spi_probe(struct platform_device *pdev) break; default: - if (spi_controller_is_slave(master)) { + if (spi_controller_is_slave(controller)) { tmp = SSCR1_SCFR | SSCR1_SCLKDIR | SSCR1_SFRMDIR | @@ -1740,7 +1740,7 @@ static int pxa2xx_spi_probe(struct platform_device *pdev) } pxa2xx_spi_write(drv_data, SSCR1, tmp); tmp = SSCR0_Motorola | SSCR0_DataSize(8); - if (!spi_controller_is_slave(master)) + if (!spi_controller_is_slave(controller)) tmp |= SSCR0_SCR(2); pxa2xx_spi_write(drv_data, SSCR0, tmp); break; @@ -1765,24 +1765,24 @@ static int pxa2xx_spi_probe(struct platform_device *pdev) platform_info->num_chipselect = config->cs_num; } } - master->num_chipselect = platform_info->num_chipselect; + controller->num_chipselect = platform_info->num_chipselect; count = gpiod_count(&pdev->dev, "cs"); if (count > 0) { int i; - master->num_chipselect = max_t(int, count, - master->num_chipselect); + controller->num_chipselect = max_t(int, count, + controller->num_chipselect); drv_data->cs_gpiods = devm_kcalloc(&pdev->dev, - master->num_chipselect, sizeof(struct gpio_desc *), + controller->num_chipselect, sizeof(struct gpio_desc *), GFP_KERNEL); if (!drv_data->cs_gpiods) { status = -ENOMEM; goto out_error_clock_enabled; } - for (i = 0; i < master->num_chipselect; i++) { + for (i = 0; i < controller->num_chipselect; i++) { struct gpio_desc *gpiod; gpiod = devm_gpiod_get_index(dev, "cs", i, GPIOD_ASIS); @@ -1815,9 +1815,9 @@ static int pxa2xx_spi_probe(struct platform_device *pdev) /* Register with the SPI framework */ platform_set_drvdata(pdev, drv_data); - status = devm_spi_register_controller(&pdev->dev, master); + status = devm_spi_register_controller(&pdev->dev, controller); if (status != 0) { - dev_err(&pdev->dev, "problem registering spi master\n"); + dev_err(&pdev->dev, "problem registering spi controller\n"); goto out_error_clock_enabled; } @@ -1832,8 +1832,8 @@ out_error_dma_irq_alloc: pxa2xx_spi_dma_release(drv_data); free_irq(ssp->irq, drv_data); -out_error_master_alloc: - spi_controller_put(master); +out_error_controller_alloc: + spi_controller_put(controller); pxa_ssp_free(ssp); return status; } @@ -1854,7 +1854,7 @@ static int pxa2xx_spi_remove(struct platform_device *pdev) clk_disable_unprepare(ssp->clk); /* Release DMA */ - if (drv_data->master_info->enable_dma) + if (drv_data->controller_info->enable_dma) pxa2xx_spi_dma_release(drv_data); pm_runtime_put_noidle(&pdev->dev); @@ -1876,7 +1876,7 @@ static int pxa2xx_spi_suspend(struct device *dev) struct ssp_device *ssp = drv_data->ssp; int status; - status = spi_controller_suspend(drv_data->master); + status = spi_controller_suspend(drv_data->controller); if (status != 0) return status; pxa2xx_spi_write(drv_data, SSCR0, 0); @@ -1901,7 +1901,7 @@ static int pxa2xx_spi_resume(struct device *dev) } /* Start the queue running */ - return spi_controller_resume(drv_data->master); + return spi_controller_resume(drv_data->controller); } #endif diff --git a/drivers/spi/spi-pxa2xx.h b/drivers/spi/spi-pxa2xx.h index 4e324da66ef7..aba777b4502d 100644 --- a/drivers/spi/spi-pxa2xx.h +++ b/drivers/spi/spi-pxa2xx.h @@ -31,10 +31,10 @@ struct driver_data { /* SPI framework hookup */ enum pxa_ssp_type ssp_type; - struct spi_controller *master; + struct spi_controller *controller; /* PXA hookup */ - struct pxa2xx_spi_master *master_info; + struct pxa2xx_spi_controller *controller_info; /* SSP register addresses */ void __iomem *ioaddr; diff --git a/include/linux/spi/pxa2xx_spi.h b/include/linux/spi/pxa2xx_spi.h index b0674e330ef6..c1c59473cef9 100644 --- a/include/linux/spi/pxa2xx_spi.h +++ b/include/linux/spi/pxa2xx_spi.h @@ -22,7 +22,7 @@ struct dma_chan; /* device.platform_data for SSP controller devices */ -struct pxa2xx_spi_master { +struct pxa2xx_spi_controller { u16 num_chipselect; u8 enable_dma; bool is_slave; @@ -54,7 +54,7 @@ struct pxa2xx_spi_chip { #include -extern void pxa2xx_set_spi_info(unsigned id, struct pxa2xx_spi_master *info); +extern void pxa2xx_set_spi_info(unsigned id, struct pxa2xx_spi_controller *info); #endif #endif -- cgit v1.2.3 From a2d21848d9211dad5e786aa7368709ca8938834e Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Tue, 22 Jan 2019 11:42:24 +0200 Subject: regmap: regmap-irq: Add main status register support There is bunch of devices with multiple logical blocks which can generate interrupts. It's not a rare case that the interrupt reason registers are arranged so that there is own status/ack/mask register for each logical block. In some devices there is also a 'main interrupt register(s)' which can indicate what sub blocks have interrupts pending. When such a device is connected via slow bus like i2c the main part of interrupt handling latency can be caused by bus accesses. On systems where it is expected that only one (or few) sub blocks have active interrupts we can reduce the latency by only reading the main register and those sub registers which have active interrupts. Support this with regmap-irq for simple cases where main register does not require acking or masking. Signed-off-by: Matti Vaittinen Signed-off-by: Mark Brown --- drivers/base/regmap/regmap-irq.c | 99 ++++++++++++++++++++++++++++++++++++++-- include/linux/regmap.h | 31 +++++++++++++ 2 files changed, 126 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/regmap/regmap-irq.c b/drivers/base/regmap/regmap-irq.c index 1bd1145ad8b5..22778e9501ff 100644 --- a/drivers/base/regmap/regmap-irq.c +++ b/drivers/base/regmap/regmap-irq.c @@ -35,6 +35,7 @@ struct regmap_irq_chip_data { int wake_count; void *status_reg_buf; + unsigned int *main_status_buf; unsigned int *status_buf; unsigned int *mask_buf; unsigned int *mask_buf_def; @@ -326,6 +327,33 @@ static const struct irq_chip regmap_irq_chip = { .irq_set_wake = regmap_irq_set_wake, }; +static inline int read_sub_irq_data(struct regmap_irq_chip_data *data, + unsigned int b) +{ + const struct regmap_irq_chip *chip = data->chip; + struct regmap *map = data->map; + struct regmap_irq_sub_irq_map *subreg; + int i, ret = 0; + + if (!chip->sub_reg_offsets) { + /* Assume linear mapping */ + ret = regmap_read(map, chip->status_base + + (b * map->reg_stride * data->irq_reg_stride), + &data->status_buf[b]); + } else { + subreg = &chip->sub_reg_offsets[b]; + for (i = 0; i < subreg->num_regs; i++) { + unsigned int offset = subreg->offset[i]; + + ret = regmap_read(map, chip->status_base + offset, + &data->status_buf[offset]); + if (ret) + break; + } + } + return ret; +} + static irqreturn_t regmap_irq_thread(int irq, void *d) { struct regmap_irq_chip_data *data = d; @@ -349,11 +377,65 @@ static irqreturn_t regmap_irq_thread(int irq, void *d) } /* - * Read in the statuses, using a single bulk read if possible - * in order to reduce the I/O overheads. + * Read only registers with active IRQs if the chip has 'main status + * register'. Else read in the statuses, using a single bulk read if + * possible in order to reduce the I/O overheads. */ - if (!map->use_single_read && map->reg_stride == 1 && - data->irq_reg_stride == 1) { + + if (chip->num_main_regs) { + unsigned int max_main_bits; + unsigned long size; + + size = chip->num_regs * sizeof(unsigned int); + + max_main_bits = (chip->num_main_status_bits) ? + chip->num_main_status_bits : chip->num_regs; + /* Clear the status buf as we don't read all status regs */ + memset(data->status_buf, 0, size); + + /* We could support bulk read for main status registers + * but I don't expect to see devices with really many main + * status registers so let's only support single reads for the + * sake of simplicity. and add bulk reads only if needed + */ + for (i = 0; i < chip->num_main_regs; i++) { + ret = regmap_read(map, chip->main_status + + (i * map->reg_stride + * data->irq_reg_stride), + &data->main_status_buf[i]); + if (ret) { + dev_err(map->dev, + "Failed to read IRQ status %d\n", + ret); + goto exit; + } + } + + /* Read sub registers with active IRQs */ + for (i = 0; i < chip->num_main_regs; i++) { + unsigned int b; + const unsigned long mreg = data->main_status_buf[i]; + + for_each_set_bit(b, &mreg, map->format.val_bytes * 8) { + if (i * map->format.val_bytes * 8 + b > + max_main_bits) + break; + ret = read_sub_irq_data(data, b); + + if (ret != 0) { + dev_err(map->dev, + "Failed to read IRQ status %d\n", + ret); + if (chip->runtime_pm) + pm_runtime_put(map->dev); + goto exit; + } + } + + } + } else if (!map->use_single_read && map->reg_stride == 1 && + data->irq_reg_stride == 1) { + u8 *buf8 = data->status_reg_buf; u16 *buf16 = data->status_reg_buf; u32 *buf32 = data->status_reg_buf; @@ -518,6 +600,15 @@ int regmap_add_irq_chip(struct regmap *map, int irq, int irq_flags, if (!d) return -ENOMEM; + if (chip->num_main_regs) { + d->main_status_buf = kcalloc(chip->num_main_regs, + sizeof(unsigned int), + GFP_KERNEL); + + if (!d->main_status_buf) + goto err_alloc; + } + d->status_buf = kcalloc(chip->num_regs, sizeof(unsigned int), GFP_KERNEL); if (!d->status_buf) diff --git a/include/linux/regmap.h b/include/linux/regmap.h index 1781b6cb793c..daeec7dbd65c 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -1131,11 +1131,37 @@ struct regmap_irq { .reg_offset = (_id) / (_reg_bits), \ } +#define REGMAP_IRQ_MAIN_REG_OFFSET(arr) \ + { .num_regs = ARRAY_SIZE((arr)), .offset = &(arr)[0] } + +struct regmap_irq_sub_irq_map { + unsigned int num_regs; + unsigned int *offset; +}; + /** * struct regmap_irq_chip - Description of a generic regmap irq_chip. * * @name: Descriptive name for IRQ controller. * + * @main_status: Base main status register address. For chips which have + * interrupts arranged in separate sub-irq blocks with own IRQ + * registers and which have a main IRQ registers indicating + * sub-irq blocks with unhandled interrupts. For such chips fill + * sub-irq register information in status_base, mask_base and + * ack_base. + * @num_main_status_bits: Should be given to chips where number of meaningfull + * main status bits differs from num_regs. + * @sub_reg_offsets: arrays of mappings from main register bits to sub irq + * registers. First item in array describes the registers + * for first main status bit. Second array for second bit etc. + * Offset is given as sub register status offset to + * status_base. Should contain num_regs arrays. + * Can be provided for chips with more complex mapping than + * 1.st bit to 1.st sub-reg, 2.nd bit to 2.nd sub-reg, ... + * @num_main_regs: Number of 'main status' irq registers for chips which have + * main_status set. + * * @status_base: Base status register address. * @mask_base: Base mask register address. * @mask_writeonly: Base mask register is write only. @@ -1181,6 +1207,11 @@ struct regmap_irq { struct regmap_irq_chip { const char *name; + unsigned int main_status; + unsigned int num_main_status_bits; + struct regmap_irq_sub_irq_map *sub_reg_offsets; + int num_main_regs; + unsigned int status_base; unsigned int mask_base; unsigned int unmask_base; -- cgit v1.2.3 From f0125f1a559be1033055f44e511174aaa75b60cc Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 23 Jan 2019 17:29:53 +0000 Subject: spi: Go back to immediate teardown Commit 412e6037324 ("spi: core: avoid waking pump thread from spi_sync instead run teardown delayed") introduced regressions on some boards, apparently connected to spi_mem not triggering shutdown properly any more. Since we've thus far been unable to figure out exactly where the breakage is revert the optimisation for now. Reported-by: Jon Hunter Signed-off-by: Mark Brown Cc: kernel@martin.sperl.org --- drivers/spi/spi.c | 122 +++++++++++++----------------------------------- include/linux/spi/spi.h | 2 - 2 files changed, 33 insertions(+), 91 deletions(-) (limited to 'include/linux') diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 06b9139664a3..13f447a67d67 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -1225,7 +1225,7 @@ static void __spi_pump_messages(struct spi_controller *ctlr, bool in_kthread) return; } - /* If another context is idling the device then defer to kthread */ + /* If another context is idling the device then defer */ if (ctlr->idling) { kthread_queue_work(&ctlr->kworker, &ctlr->pump_messages); spin_unlock_irqrestore(&ctlr->queue_lock, flags); @@ -1239,10 +1239,34 @@ static void __spi_pump_messages(struct spi_controller *ctlr, bool in_kthread) return; } - /* schedule idle teardown with a delay of 1 second */ - kthread_mod_delayed_work(&ctlr->kworker, - &ctlr->pump_idle_teardown, - HZ); + /* Only do teardown in the thread */ + if (!in_kthread) { + kthread_queue_work(&ctlr->kworker, + &ctlr->pump_messages); + spin_unlock_irqrestore(&ctlr->queue_lock, flags); + return; + } + + ctlr->busy = false; + ctlr->idling = true; + spin_unlock_irqrestore(&ctlr->queue_lock, flags); + + kfree(ctlr->dummy_rx); + ctlr->dummy_rx = NULL; + kfree(ctlr->dummy_tx); + ctlr->dummy_tx = NULL; + if (ctlr->unprepare_transfer_hardware && + ctlr->unprepare_transfer_hardware(ctlr)) + dev_err(&ctlr->dev, + "failed to unprepare transfer hardware\n"); + if (ctlr->auto_runtime_pm) { + pm_runtime_mark_last_busy(ctlr->dev.parent); + pm_runtime_put_autosuspend(ctlr->dev.parent); + } + trace_spi_controller_idle(ctlr); + + spin_lock_irqsave(&ctlr->queue_lock, flags); + ctlr->idling = false; spin_unlock_irqrestore(&ctlr->queue_lock, flags); return; } @@ -1335,77 +1359,6 @@ static void spi_pump_messages(struct kthread_work *work) __spi_pump_messages(ctlr, true); } -/** - * spi_pump_idle_teardown - kthread delayed work function which tears down - * the controller settings after some delay - * @work: pointer to kthread work struct contained in the controller struct - */ -static void spi_pump_idle_teardown(struct kthread_work *work) -{ - struct spi_controller *ctlr = - container_of(work, struct spi_controller, - pump_idle_teardown.work); - unsigned long flags; - - /* Lock queue */ - spin_lock_irqsave(&ctlr->queue_lock, flags); - - /* Make sure we are not already running a message */ - if (ctlr->cur_msg) - goto out; - - /* if there is anything in the list then exit */ - if (!list_empty(&ctlr->queue)) - goto out; - - /* if the controller is running then exit */ - if (ctlr->running) - goto out; - - /* if the controller is busy then exit */ - if (ctlr->busy) - goto out; - - /* if the controller is idling then exit - * this is actually a bit strange and would indicate that - * this function is scheduled twice, which should not happen - */ - if (ctlr->idling) - goto out; - - /* set up the initial states */ - ctlr->busy = false; - ctlr->idling = true; - spin_unlock_irqrestore(&ctlr->queue_lock, flags); - - /* free dummy receive buffers */ - kfree(ctlr->dummy_rx); - ctlr->dummy_rx = NULL; - kfree(ctlr->dummy_tx); - ctlr->dummy_tx = NULL; - - /* unprepare hardware */ - if (ctlr->unprepare_transfer_hardware && - ctlr->unprepare_transfer_hardware(ctlr)) - dev_err(&ctlr->dev, - "failed to unprepare transfer hardware\n"); - /* handle pm */ - if (ctlr->auto_runtime_pm) { - pm_runtime_mark_last_busy(ctlr->dev.parent); - pm_runtime_put_autosuspend(ctlr->dev.parent); - } - - /* mark controller as idle */ - trace_spi_controller_idle(ctlr); - - /* finally put us from idling into stopped */ - spin_lock_irqsave(&ctlr->queue_lock, flags); - ctlr->idling = false; - -out: - spin_unlock_irqrestore(&ctlr->queue_lock, flags); -} - static int spi_init_queue(struct spi_controller *ctlr) { struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 }; @@ -1421,8 +1374,7 @@ static int spi_init_queue(struct spi_controller *ctlr) return PTR_ERR(ctlr->kworker_task); } kthread_init_work(&ctlr->pump_messages, spi_pump_messages); - kthread_init_delayed_work(&ctlr->pump_idle_teardown, - spi_pump_idle_teardown); + /* * Controller config will indicate if this controller should run the * message pump with high (realtime) priority to reduce the transfer @@ -1494,16 +1446,7 @@ void spi_finalize_current_message(struct spi_controller *ctlr) spin_lock_irqsave(&ctlr->queue_lock, flags); ctlr->cur_msg = NULL; ctlr->cur_msg_prepared = false; - - /* if there is something queued, then wake the queue */ - if (!list_empty(&ctlr->queue)) - kthread_queue_work(&ctlr->kworker, &ctlr->pump_messages); - else - /* otherwise schedule delayed teardown */ - kthread_mod_delayed_work(&ctlr->kworker, - &ctlr->pump_idle_teardown, - HZ); - + kthread_queue_work(&ctlr->kworker, &ctlr->pump_messages); spin_unlock_irqrestore(&ctlr->queue_lock, flags); trace_spi_message_done(mesg); @@ -1608,7 +1551,7 @@ static int __spi_queued_transfer(struct spi_device *spi, msg->status = -EINPROGRESS; list_add_tail(&msg->queue, &ctlr->queue); - if (need_pump) + if (!ctlr->busy && need_pump) kthread_queue_work(&ctlr->kworker, &ctlr->pump_messages); spin_unlock_irqrestore(&ctlr->queue_lock, flags); @@ -3783,3 +3726,4 @@ err0: * include needing to have boardinfo data structures be much more public. */ postcore_initcall(spi_init); + diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 79ad62e2487c..916bba47d156 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -334,7 +334,6 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv) * @kworker: thread struct for message pump * @kworker_task: pointer to task for message pump kworker thread * @pump_messages: work struct for scheduling work to the message pump - * @pump_idle_teardown: work structure for scheduling a teardown delayed * @queue_lock: spinlock to syncronise access to message queue * @queue: message queue * @idling: the device is entering idle state @@ -533,7 +532,6 @@ struct spi_controller { struct kthread_worker kworker; struct task_struct *kworker_task; struct kthread_work pump_messages; - struct kthread_delayed_work pump_idle_teardown; spinlock_t queue_lock; struct list_head queue; struct spi_message *cur_msg; -- cgit v1.2.3 From 52875a04f4b26e7ef30a288ea096f7cfec0e93cd Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 22 Jan 2019 22:45:20 -0800 Subject: bpf: verifier: remove dead code Instead of overwriting dead code with jmp -1 instructions remove it completely for root. Adjust verifier state and line info appropriately. v2: - adjust func_info (Alexei); - make sure first instruction retains line info (Alexei). v4: (Yonghong) - remove unnecessary if (!insn to remove) checks; - always keep last line info if first live instruction lacks one. v5: (Martin Lau) - improve and clarify comments. Signed-off-by: Jakub Kicinski Acked-by: Yonghong Song Signed-off-by: Alexei Starovoitov --- include/linux/filter.h | 1 + kernel/bpf/core.c | 12 ++++ kernel/bpf/verifier.c | 176 ++++++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 186 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index ad106d845b22..be9af6b4a9e4 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -778,6 +778,7 @@ static inline bool bpf_dump_raw_ok(void) struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, const struct bpf_insn *patch, u32 len); +int bpf_remove_insns(struct bpf_prog *prog, u32 off, u32 cnt); void bpf_clear_redirect_map(struct bpf_map *map); diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index ad08ba341197..2a81b8af3748 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -462,6 +462,18 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, return prog_adj; } +int bpf_remove_insns(struct bpf_prog *prog, u32 off, u32 cnt) +{ + /* Branch offsets can't overflow when program is shrinking, no need + * to call bpf_adj_branches(..., true) here + */ + memmove(prog->insnsi + off, prog->insnsi + off + cnt, + sizeof(struct bpf_insn) * (prog->len - off - cnt)); + prog->len -= cnt; + + return WARN_ON_ONCE(bpf_adj_branches(prog, off, off + cnt, off, false)); +} + void bpf_prog_kallsyms_del_subprogs(struct bpf_prog *fp) { int i; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index bf1f98e8beb6..099b2541f87f 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -6432,6 +6432,150 @@ static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 of return new_prog; } +static int adjust_subprog_starts_after_remove(struct bpf_verifier_env *env, + u32 off, u32 cnt) +{ + int i, j; + + /* find first prog starting at or after off (first to remove) */ + for (i = 0; i < env->subprog_cnt; i++) + if (env->subprog_info[i].start >= off) + break; + /* find first prog starting at or after off + cnt (first to stay) */ + for (j = i; j < env->subprog_cnt; j++) + if (env->subprog_info[j].start >= off + cnt) + break; + /* if j doesn't start exactly at off + cnt, we are just removing + * the front of previous prog + */ + if (env->subprog_info[j].start != off + cnt) + j--; + + if (j > i) { + struct bpf_prog_aux *aux = env->prog->aux; + int move; + + /* move fake 'exit' subprog as well */ + move = env->subprog_cnt + 1 - j; + + memmove(env->subprog_info + i, + env->subprog_info + j, + sizeof(*env->subprog_info) * move); + env->subprog_cnt -= j - i; + + /* remove func_info */ + if (aux->func_info) { + move = aux->func_info_cnt - j; + + memmove(aux->func_info + i, + aux->func_info + j, + sizeof(*aux->func_info) * move); + aux->func_info_cnt -= j - i; + /* func_info->insn_off is set after all code rewrites, + * in adjust_btf_func() - no need to adjust + */ + } + } else { + /* convert i from "first prog to remove" to "first to adjust" */ + if (env->subprog_info[i].start == off) + i++; + } + + /* update fake 'exit' subprog as well */ + for (; i <= env->subprog_cnt; i++) + env->subprog_info[i].start -= cnt; + + return 0; +} + +static int bpf_adj_linfo_after_remove(struct bpf_verifier_env *env, u32 off, + u32 cnt) +{ + struct bpf_prog *prog = env->prog; + u32 i, l_off, l_cnt, nr_linfo; + struct bpf_line_info *linfo; + + nr_linfo = prog->aux->nr_linfo; + if (!nr_linfo) + return 0; + + linfo = prog->aux->linfo; + + /* find first line info to remove, count lines to be removed */ + for (i = 0; i < nr_linfo; i++) + if (linfo[i].insn_off >= off) + break; + + l_off = i; + l_cnt = 0; + for (; i < nr_linfo; i++) + if (linfo[i].insn_off < off + cnt) + l_cnt++; + else + break; + + /* First live insn doesn't match first live linfo, it needs to "inherit" + * last removed linfo. prog is already modified, so prog->len == off + * means no live instructions after (tail of the program was removed). + */ + if (prog->len != off && l_cnt && + (i == nr_linfo || linfo[i].insn_off != off + cnt)) { + l_cnt--; + linfo[--i].insn_off = off + cnt; + } + + /* remove the line info which refer to the removed instructions */ + if (l_cnt) { + memmove(linfo + l_off, linfo + i, + sizeof(*linfo) * (nr_linfo - i)); + + prog->aux->nr_linfo -= l_cnt; + nr_linfo = prog->aux->nr_linfo; + } + + /* pull all linfo[i].insn_off >= off + cnt in by cnt */ + for (i = l_off; i < nr_linfo; i++) + linfo[i].insn_off -= cnt; + + /* fix up all subprogs (incl. 'exit') which start >= off */ + for (i = 0; i <= env->subprog_cnt; i++) + if (env->subprog_info[i].linfo_idx > l_off) { + /* program may have started in the removed region but + * may not be fully removed + */ + if (env->subprog_info[i].linfo_idx >= l_off + l_cnt) + env->subprog_info[i].linfo_idx -= l_cnt; + else + env->subprog_info[i].linfo_idx = l_off; + } + + return 0; +} + +static int verifier_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt) +{ + struct bpf_insn_aux_data *aux_data = env->insn_aux_data; + unsigned int orig_prog_len = env->prog->len; + int err; + + err = bpf_remove_insns(env->prog, off, cnt); + if (err) + return err; + + err = adjust_subprog_starts_after_remove(env, off, cnt); + if (err) + return err; + + err = bpf_adj_linfo_after_remove(env, off, cnt); + if (err) + return err; + + memmove(aux_data + off, aux_data + off + cnt, + sizeof(*aux_data) * (orig_prog_len - off - cnt)); + + return 0; +} + /* The verifier does more data flow analysis than llvm and will not * explore branches that are dead at run time. Malicious programs can * have dead code too. Therefore replace all dead at-run-time code @@ -6492,6 +6636,30 @@ static void opt_hard_wire_dead_code_branches(struct bpf_verifier_env *env) } } +static int opt_remove_dead_code(struct bpf_verifier_env *env) +{ + struct bpf_insn_aux_data *aux_data = env->insn_aux_data; + int insn_cnt = env->prog->len; + int i, err; + + for (i = 0; i < insn_cnt; i++) { + int j; + + j = 0; + while (i + j < insn_cnt && !aux_data[i + j].seen) + j++; + if (!j) + continue; + + err = verifier_remove_insns(env, i, j); + if (err) + return err; + insn_cnt = env->prog->len; + } + + return 0; +} + /* convert load instructions that access fields of a context type into a * sequence of instructions that access fields of the underlying structure: * struct __sk_buff -> struct sk_buff @@ -7282,11 +7450,13 @@ skip_full_check: if (is_priv) { if (ret == 0) opt_hard_wire_dead_code_branches(env); + if (ret == 0) + ret = opt_remove_dead_code(env); + } else { + if (ret == 0) + sanitize_dead_code(env); } - if (ret == 0) - sanitize_dead_code(env); - if (ret == 0) /* program is valid, convert *(u32*)(ctx + off) accesses */ ret = convert_ctx_accesses(env); -- cgit v1.2.3 From 9e4c24e7ee7dfd3898269519103e823892b730d8 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 22 Jan 2019 22:45:23 -0800 Subject: bpf: verifier: record original instruction index The communication between the verifier and advanced JITs is based on instruction indexes. We have to keep them stable throughout the optimizations otherwise referring to a particular instruction gets messy quickly. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: Alexei Starovoitov --- include/linux/bpf_verifier.h | 1 + kernel/bpf/verifier.c | 8 +++++--- 2 files changed, 6 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 573cca00a0e6..f3ae00ee5516 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -187,6 +187,7 @@ struct bpf_insn_aux_data { int sanitize_stack_off; /* stack slot to be cleared */ bool seen; /* this insn was processed by the verifier */ u8 alu_state; /* used in combination with alu_limit */ + unsigned int orig_idx; /* original instruction index */ }; #define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */ diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index f39bca188a5c..f2c49b4235df 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -7371,7 +7371,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, { struct bpf_verifier_env *env; struct bpf_verifier_log *log; - int ret = -EINVAL; + int i, len, ret = -EINVAL; bool is_priv; /* no program is valid */ @@ -7386,12 +7386,14 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, return -ENOMEM; log = &env->log; + len = (*prog)->len; env->insn_aux_data = - vzalloc(array_size(sizeof(struct bpf_insn_aux_data), - (*prog)->len)); + vzalloc(array_size(sizeof(struct bpf_insn_aux_data), len)); ret = -ENOMEM; if (!env->insn_aux_data) goto err_free_env; + for (i = 0; i < len; i++) + env->insn_aux_data[i].orig_idx = i; env->prog = *prog; env->ops = bpf_verifier_ops[env->prog->type]; -- cgit v1.2.3 From 08ca90afba255d05dc3253caa44056e7aecbe8c5 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 22 Jan 2019 22:45:24 -0800 Subject: bpf: notify offload JITs about optimizations Let offload JITs know when instructions are replaced and optimized out, so they can update their state appropriately. The optimizations are best effort, if JIT returns an error from any callback verifier will stop notifying it as state may now be out of sync, but the verifier continues making progress. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 7 +++++++ include/linux/bpf_verifier.h | 5 +++++ kernel/bpf/offload.c | 35 +++++++++++++++++++++++++++++++++++ kernel/bpf/verifier.c | 6 ++++++ 4 files changed, 53 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index e734f163bd0b..3851529062ec 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -268,9 +268,15 @@ struct bpf_verifier_ops { }; struct bpf_prog_offload_ops { + /* verifier basic callbacks */ int (*insn_hook)(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx); int (*finalize)(struct bpf_verifier_env *env); + /* verifier optimization callbacks (called after .finalize) */ + int (*replace_insn)(struct bpf_verifier_env *env, u32 off, + struct bpf_insn *insn); + int (*remove_insns)(struct bpf_verifier_env *env, u32 off, u32 cnt); + /* program management callbacks */ int (*prepare)(struct bpf_prog *prog); int (*translate)(struct bpf_prog *prog); void (*destroy)(struct bpf_prog *prog); @@ -283,6 +289,7 @@ struct bpf_prog_offload { void *dev_priv; struct list_head offloads; bool dev_state; + bool opt_failed; void *jited_image; u32 jited_len; }; diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index f3ae00ee5516..0620e418dde5 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -266,5 +266,10 @@ int bpf_prog_offload_verifier_prep(struct bpf_prog *prog); int bpf_prog_offload_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx); int bpf_prog_offload_finalize(struct bpf_verifier_env *env); +void +bpf_prog_offload_replace_insn(struct bpf_verifier_env *env, u32 off, + struct bpf_insn *insn); +void +bpf_prog_offload_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt); #endif /* _LINUX_BPF_VERIFIER_H */ diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c index 54cf2b9c44a4..39dba8c90331 100644 --- a/kernel/bpf/offload.c +++ b/kernel/bpf/offload.c @@ -173,6 +173,41 @@ int bpf_prog_offload_finalize(struct bpf_verifier_env *env) return ret; } +void +bpf_prog_offload_replace_insn(struct bpf_verifier_env *env, u32 off, + struct bpf_insn *insn) +{ + const struct bpf_prog_offload_ops *ops; + struct bpf_prog_offload *offload; + int ret = -EOPNOTSUPP; + + down_read(&bpf_devs_lock); + offload = env->prog->aux->offload; + if (offload) { + ops = offload->offdev->ops; + if (!offload->opt_failed && ops->replace_insn) + ret = ops->replace_insn(env, off, insn); + offload->opt_failed |= ret; + } + up_read(&bpf_devs_lock); +} + +void +bpf_prog_offload_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt) +{ + struct bpf_prog_offload *offload; + int ret = -EOPNOTSUPP; + + down_read(&bpf_devs_lock); + offload = env->prog->aux->offload; + if (offload) { + if (!offload->opt_failed && offload->offdev->ops->remove_insns) + ret = offload->offdev->ops->remove_insns(env, off, cnt); + offload->opt_failed |= ret; + } + up_read(&bpf_devs_lock); +} + static void __bpf_prog_offload_destroy(struct bpf_prog *prog) { struct bpf_prog_offload *offload = prog->aux->offload; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index f2c49b4235df..8cfe39ef770f 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -6558,6 +6558,9 @@ static int verifier_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt) unsigned int orig_prog_len = env->prog->len; int err; + if (bpf_prog_is_dev_bound(env->prog->aux)) + bpf_prog_offload_remove_insns(env, off, cnt); + err = bpf_remove_insns(env->prog, off, cnt); if (err) return err; @@ -6632,6 +6635,9 @@ static void opt_hard_wire_dead_code_branches(struct bpf_verifier_env *env) else continue; + if (bpf_prog_is_dev_bound(env->prog->aux)) + bpf_prog_offload_replace_insn(env, i, &ja); + memcpy(insn, &ja, sizeof(ja)); } } -- cgit v1.2.3 From 643fa9612bf1a29153eee46fd398117632f93cbe Mon Sep 17 00:00:00 2001 From: Chandan Rajendra Date: Wed, 12 Dec 2018 15:20:12 +0530 Subject: fscrypt: remove filesystem specific build config option In order to have a common code base for fscrypt "post read" processing for all filesystems which support encryption, this commit removes filesystem specific build config option (e.g. CONFIG_EXT4_FS_ENCRYPTION) and replaces it with a build option (i.e. CONFIG_FS_ENCRYPTION) whose value affects all the filesystems making use of fscrypt. Reviewed-by: Eric Biggers Signed-off-by: Chandan Rajendra Signed-off-by: Eric Biggers --- Documentation/filesystems/fscrypt.rst | 4 +- arch/mips/configs/generic_defconfig | 2 +- arch/nds32/configs/defconfig | 2 +- arch/s390/configs/debug_defconfig | 2 +- arch/s390/configs/performance_defconfig | 2 +- fs/crypto/Kconfig | 5 +- fs/crypto/fscrypt_private.h | 1 - fs/ext4/Kconfig | 15 -- fs/ext4/dir.c | 2 - fs/ext4/ext4.h | 7 +- fs/ext4/inode.c | 8 +- fs/ext4/ioctl.c | 4 +- fs/ext4/namei.c | 10 +- fs/ext4/page-io.c | 6 +- fs/ext4/readpage.c | 2 +- fs/ext4/super.c | 6 +- fs/ext4/sysfs.c | 4 +- fs/f2fs/Kconfig | 12 +- fs/f2fs/f2fs.h | 7 +- fs/f2fs/super.c | 8 +- fs/f2fs/sysfs.c | 4 +- fs/ubifs/Kconfig | 12 +- fs/ubifs/Makefile | 2 +- fs/ubifs/ioctl.c | 4 +- fs/ubifs/sb.c | 2 +- fs/ubifs/super.c | 2 +- fs/ubifs/ubifs.h | 5 +- include/linux/fs.h | 4 +- include/linux/fscrypt.h | 416 +++++++++++++++++++++++++++++++- include/linux/fscrypt_notsupp.h | 231 ------------------ include/linux/fscrypt_supp.h | 204 ---------------- 31 files changed, 460 insertions(+), 535 deletions(-) delete mode 100644 include/linux/fscrypt_notsupp.h delete mode 100644 include/linux/fscrypt_supp.h (limited to 'include/linux') diff --git a/Documentation/filesystems/fscrypt.rst b/Documentation/filesystems/fscrypt.rst index 3a7b60521b94..43dd989e2a3f 100644 --- a/Documentation/filesystems/fscrypt.rst +++ b/Documentation/filesystems/fscrypt.rst @@ -343,9 +343,9 @@ FS_IOC_SET_ENCRYPTION_POLICY can fail with the following errors: - ``ENOTEMPTY``: the file is unencrypted and is a nonempty directory - ``ENOTTY``: this type of filesystem does not implement encryption - ``EOPNOTSUPP``: the kernel was not configured with encryption - support for this filesystem, or the filesystem superblock has not + support for filesystems, or the filesystem superblock has not had encryption enabled on it. (For example, to use encryption on an - ext4 filesystem, CONFIG_EXT4_ENCRYPTION must be enabled in the + ext4 filesystem, CONFIG_FS_ENCRYPTION must be enabled in the kernel config, and the superblock must have had the "encrypt" feature flag enabled using ``tune2fs -O encrypt`` or ``mkfs.ext4 -O encrypt``.) diff --git a/arch/mips/configs/generic_defconfig b/arch/mips/configs/generic_defconfig index 7c138dab87df..5d80521e5d5a 100644 --- a/arch/mips/configs/generic_defconfig +++ b/arch/mips/configs/generic_defconfig @@ -59,7 +59,7 @@ CONFIG_HID_MONTEREY=y CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y -CONFIG_EXT4_ENCRYPTION=y +CONFIG_FS_ENCRYPTION=y CONFIG_FANOTIFY=y CONFIG_FUSE_FS=y CONFIG_CUSE=y diff --git a/arch/nds32/configs/defconfig b/arch/nds32/configs/defconfig index 2546d8770785..65ce9259081b 100644 --- a/arch/nds32/configs/defconfig +++ b/arch/nds32/configs/defconfig @@ -74,7 +74,7 @@ CONFIG_GENERIC_PHY=y CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y -CONFIG_EXT4_ENCRYPTION=y +CONFIG_FS_ENCRYPTION=y CONFIG_FUSE_FS=y CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index c69cb04b7a59..9824c7bad9d4 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -500,7 +500,6 @@ CONFIG_S390_AP_IOMMU=y CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y -CONFIG_EXT4_ENCRYPTION=y CONFIG_JBD2_DEBUG=y CONFIG_JFS_FS=m CONFIG_JFS_POSIX_ACL=y @@ -520,6 +519,7 @@ CONFIG_BTRFS_DEBUG=y CONFIG_NILFS2_FS=m CONFIG_FS_DAX=y CONFIG_EXPORTFS_BLOCK_OPS=y +CONFIG_FS_ENCRYPTION=y CONFIG_FANOTIFY=y CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y CONFIG_QUOTA_NETLINK_INTERFACE=y diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig index 32f539dc9c19..4fcbe5792744 100644 --- a/arch/s390/configs/performance_defconfig +++ b/arch/s390/configs/performance_defconfig @@ -497,7 +497,6 @@ CONFIG_S390_AP_IOMMU=y CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y -CONFIG_EXT4_ENCRYPTION=y CONFIG_JBD2_DEBUG=y CONFIG_JFS_FS=m CONFIG_JFS_POSIX_ACL=y @@ -515,6 +514,7 @@ CONFIG_BTRFS_FS_POSIX_ACL=y CONFIG_NILFS2_FS=m CONFIG_FS_DAX=y CONFIG_EXPORTFS_BLOCK_OPS=y +CONFIG_FS_ENCRYPTION=y CONFIG_FANOTIFY=y CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y CONFIG_QUOTA_NETLINK_INTERFACE=y diff --git a/fs/crypto/Kconfig b/fs/crypto/Kconfig index 284b589b4774..f0de238000c0 100644 --- a/fs/crypto/Kconfig +++ b/fs/crypto/Kconfig @@ -1,5 +1,5 @@ config FS_ENCRYPTION - tristate "FS Encryption (Per-file encryption)" + bool "FS Encryption (Per-file encryption)" select CRYPTO select CRYPTO_AES select CRYPTO_CBC @@ -12,4 +12,5 @@ config FS_ENCRYPTION Enable encryption of files and directories. This feature is similar to ecryptfs, but it is more memory efficient since it avoids caching the encrypted and - decrypted pages in the page cache. + decrypted pages in the page cache. Currently Ext4, + F2FS and UBIFS make use of this feature. diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h index 7424f851eb5c..7da276159593 100644 --- a/fs/crypto/fscrypt_private.h +++ b/fs/crypto/fscrypt_private.h @@ -12,7 +12,6 @@ #ifndef _FSCRYPT_PRIVATE_H #define _FSCRYPT_PRIVATE_H -#define __FS_HAS_ENCRYPTION 1 #include #include diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig index a453cc87082b..031e5a82d556 100644 --- a/fs/ext4/Kconfig +++ b/fs/ext4/Kconfig @@ -96,21 +96,6 @@ config EXT4_FS_SECURITY If you are not using a security module that requires using extended attributes for file security labels, say N. -config EXT4_ENCRYPTION - bool "Ext4 Encryption" - depends on EXT4_FS - select FS_ENCRYPTION - help - Enable encryption of ext4 files and directories. This - feature is similar to ecryptfs, but it is more memory - efficient since it avoids caching the encrypted and - decrypted pages in the page cache. - -config EXT4_FS_ENCRYPTION - bool - default y - depends on EXT4_ENCRYPTION - config EXT4_DEBUG bool "EXT4 debugging support" depends on EXT4_FS diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index fb7a64ea5679..0ccd51f72048 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -283,9 +283,7 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) done: err = 0; errout: -#ifdef CONFIG_EXT4_FS_ENCRYPTION fscrypt_fname_free_buffer(&fstr); -#endif brelse(bh); return err; } diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index afdb9ad8be0e..5012ddb6daf9 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -40,7 +40,6 @@ #include #endif -#define __FS_HAS_ENCRYPTION IS_ENABLED(CONFIG_EXT4_FS_ENCRYPTION) #include #include @@ -1326,7 +1325,7 @@ struct ext4_super_block { #define EXT4_MF_FS_ABORTED 0x0002 /* Fatal error detected */ #define EXT4_MF_TEST_DUMMY_ENCRYPTION 0x0004 -#ifdef CONFIG_EXT4_FS_ENCRYPTION +#ifdef CONFIG_FS_ENCRYPTION #define DUMMY_ENCRYPTION_ENABLED(sbi) (unlikely((sbi)->s_mount_flags & \ EXT4_MF_TEST_DUMMY_ENCRYPTION)) #else @@ -2051,7 +2050,7 @@ struct ext4_filename { const struct qstr *usr_fname; struct fscrypt_str disk_name; struct dx_hash_info hinfo; -#ifdef CONFIG_EXT4_FS_ENCRYPTION +#ifdef CONFIG_FS_ENCRYPTION struct fscrypt_str crypto_buf; #endif }; @@ -2279,7 +2278,7 @@ extern unsigned ext4_free_clusters_after_init(struct super_block *sb, struct ext4_group_desc *gdp); ext4_fsblk_t ext4_inode_to_goal_block(struct inode *); -#ifdef CONFIG_EXT4_FS_ENCRYPTION +#ifdef CONFIG_FS_ENCRYPTION static inline int ext4_fname_setup_filename(struct inode *dir, const struct qstr *iname, int lookup, struct ext4_filename *fname) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 71bd2d28f58d..4356ef6d728e 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1150,7 +1150,7 @@ int do_journal_get_write_access(handle_t *handle, return ret; } -#ifdef CONFIG_EXT4_FS_ENCRYPTION +#ifdef CONFIG_FS_ENCRYPTION static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len, get_block_t *get_block) { @@ -1302,7 +1302,7 @@ retry_journal: /* In case writeback began while the page was unlocked */ wait_for_stable_page(page); -#ifdef CONFIG_EXT4_FS_ENCRYPTION +#ifdef CONFIG_FS_ENCRYPTION if (ext4_should_dioread_nolock(inode)) ret = ext4_block_write_begin(page, pos, len, ext4_get_block_unwritten); @@ -3104,7 +3104,7 @@ retry_journal: /* In case writeback began while the page was unlocked */ wait_for_stable_page(page); -#ifdef CONFIG_EXT4_FS_ENCRYPTION +#ifdef CONFIG_FS_ENCRYPTION ret = ext4_block_write_begin(page, pos, len, ext4_da_get_block_prep); #else @@ -3879,7 +3879,7 @@ static ssize_t ext4_direct_IO(struct kiocb *iocb, struct iov_iter *iter) loff_t offset = iocb->ki_pos; ssize_t ret; -#ifdef CONFIG_EXT4_FS_ENCRYPTION +#ifdef CONFIG_FS_ENCRYPTION if (IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode)) return 0; #endif diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index d37dafa1d133..d26bcac291bb 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -210,7 +210,7 @@ journal_err_out: return err; } -#ifdef CONFIG_EXT4_FS_ENCRYPTION +#ifdef CONFIG_FS_ENCRYPTION static int uuid_is_zero(__u8 u[16]) { int i; @@ -978,7 +978,7 @@ resizefs_out: return fscrypt_ioctl_set_policy(filp, (const void __user *)arg); case EXT4_IOC_GET_ENCRYPTION_PWSALT: { -#ifdef CONFIG_EXT4_FS_ENCRYPTION +#ifdef CONFIG_FS_ENCRYPTION int err, err2; struct ext4_sb_info *sbi = EXT4_SB(sb); handle_t *handle; diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index be6cb69beb12..980166a8122a 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -612,7 +612,7 @@ static struct stats dx_show_leaf(struct inode *dir, { if (show_names) { -#ifdef CONFIG_EXT4_FS_ENCRYPTION +#ifdef CONFIG_FS_ENCRYPTION int len; char *name; struct fscrypt_str fname_crypto_str = @@ -984,7 +984,7 @@ static int htree_dirblock_to_tree(struct file *dir_file, top = (struct ext4_dir_entry_2 *) ((char *) de + dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(0)); -#ifdef CONFIG_EXT4_FS_ENCRYPTION +#ifdef CONFIG_FS_ENCRYPTION /* Check if the directory is encrypted */ if (IS_ENCRYPTED(dir)) { err = fscrypt_get_encryption_info(dir); @@ -1047,7 +1047,7 @@ static int htree_dirblock_to_tree(struct file *dir_file, } errout: brelse(bh); -#ifdef CONFIG_EXT4_FS_ENCRYPTION +#ifdef CONFIG_FS_ENCRYPTION fscrypt_fname_free_buffer(&fname_crypto_str); #endif return count; @@ -1267,7 +1267,7 @@ static inline bool ext4_match(const struct ext4_filename *fname, f.usr_fname = fname->usr_fname; f.disk_name = fname->disk_name; -#ifdef CONFIG_EXT4_FS_ENCRYPTION +#ifdef CONFIG_FS_ENCRYPTION f.crypto_buf = fname->crypto_buf; #endif return fscrypt_match_name(&f, de->name, de->name_len); @@ -1498,7 +1498,7 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, ext4_lblk_t block; int retval; -#ifdef CONFIG_EXT4_FS_ENCRYPTION +#ifdef CONFIG_FS_ENCRYPTION *res_dir = NULL; #endif frame = dx_probe(fname, dir, NULL, frames); diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index c398b55da854..b9d6cabe2ea8 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -66,7 +66,7 @@ static void ext4_finish_bio(struct bio *bio) bio_for_each_segment_all(bvec, bio, i) { struct page *page = bvec->bv_page; -#ifdef CONFIG_EXT4_FS_ENCRYPTION +#ifdef CONFIG_FS_ENCRYPTION struct page *data_page = NULL; #endif struct buffer_head *bh, *head; @@ -78,7 +78,7 @@ static void ext4_finish_bio(struct bio *bio) if (!page) continue; -#ifdef CONFIG_EXT4_FS_ENCRYPTION +#ifdef CONFIG_FS_ENCRYPTION if (!page->mapping) { /* The bounce data pages are unmapped. */ data_page = page; @@ -111,7 +111,7 @@ static void ext4_finish_bio(struct bio *bio) bit_spin_unlock(BH_Uptodate_Lock, &head->b_state); local_irq_restore(flags); if (!under_io) { -#ifdef CONFIG_EXT4_FS_ENCRYPTION +#ifdef CONFIG_FS_ENCRYPTION if (data_page) fscrypt_restore_control_page(data_page); #endif diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c index 52d3ff5a9db1..b18881eb8da6 100644 --- a/fs/ext4/readpage.c +++ b/fs/ext4/readpage.c @@ -49,7 +49,7 @@ static inline bool ext4_bio_encrypted(struct bio *bio) { -#ifdef CONFIG_EXT4_FS_ENCRYPTION +#ifdef CONFIG_FS_ENCRYPTION return unlikely(bio->bi_private != NULL); #else return false; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index fb12d3c17c1b..60da0a6e4d86 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1232,7 +1232,7 @@ static int bdev_try_to_free_page(struct super_block *sb, struct page *page, return try_to_free_buffers(page); } -#ifdef CONFIG_EXT4_FS_ENCRYPTION +#ifdef CONFIG_FS_ENCRYPTION static int ext4_get_context(struct inode *inode, void *ctx, size_t len) { return ext4_xattr_get(inode, EXT4_XATTR_INDEX_ENCRYPTION, @@ -1922,7 +1922,7 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token, *journal_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, arg); } else if (token == Opt_test_dummy_encryption) { -#ifdef CONFIG_EXT4_FS_ENCRYPTION +#ifdef CONFIG_FS_ENCRYPTION sbi->s_mount_flags |= EXT4_MF_TEST_DUMMY_ENCRYPTION; ext4_msg(sb, KERN_WARNING, "Test dummy encryption mode enabled"); @@ -4167,7 +4167,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) sb->s_op = &ext4_sops; sb->s_export_op = &ext4_export_ops; sb->s_xattr = ext4_xattr_handlers; -#ifdef CONFIG_EXT4_FS_ENCRYPTION +#ifdef CONFIG_FS_ENCRYPTION sb->s_cop = &ext4_cryptops; #endif #ifdef CONFIG_QUOTA diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c index 9212a026a1f1..5e4e78fc0b3a 100644 --- a/fs/ext4/sysfs.c +++ b/fs/ext4/sysfs.c @@ -224,7 +224,7 @@ static struct attribute *ext4_attrs[] = { EXT4_ATTR_FEATURE(lazy_itable_init); EXT4_ATTR_FEATURE(batched_discard); EXT4_ATTR_FEATURE(meta_bg_resize); -#ifdef CONFIG_EXT4_FS_ENCRYPTION +#ifdef CONFIG_FS_ENCRYPTION EXT4_ATTR_FEATURE(encryption); #endif EXT4_ATTR_FEATURE(metadata_csum_seed); @@ -233,7 +233,7 @@ static struct attribute *ext4_feat_attrs[] = { ATTR_LIST(lazy_itable_init), ATTR_LIST(batched_discard), ATTR_LIST(meta_bg_resize), -#ifdef CONFIG_EXT4_FS_ENCRYPTION +#ifdef CONFIG_FS_ENCRYPTION ATTR_LIST(encryption), #endif ATTR_LIST(metadata_csum_seed), diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig index 9a20ef42fadd..e57cc754d543 100644 --- a/fs/f2fs/Kconfig +++ b/fs/f2fs/Kconfig @@ -3,6 +3,7 @@ config F2FS_FS depends on BLOCK select CRYPTO select CRYPTO_CRC32 + select F2FS_FS_XATTR if FS_ENCRYPTION help F2FS is based on Log-structured File System (LFS), which supports versatile "flash-friendly" features. The design has been focused on @@ -70,17 +71,6 @@ config F2FS_CHECK_FS If you want to improve the performance, say N. -config F2FS_FS_ENCRYPTION - bool "F2FS Encryption" - depends on F2FS_FS - depends on F2FS_FS_XATTR - select FS_ENCRYPTION - help - Enable encryption of f2fs files and directories. This - feature is similar to ecryptfs, but it is more memory - efficient since it avoids caching the encrypted and - decrypted pages in the page cache. - config F2FS_IO_TRACE bool "F2FS IO tracer" depends on F2FS_FS diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 9ef6f38e51cc..95cc885ccb2f 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -24,7 +24,6 @@ #include #include -#define __FS_HAS_ENCRYPTION IS_ENABLED(CONFIG_F2FS_FS_ENCRYPTION) #include #ifdef CONFIG_F2FS_CHECK_FS @@ -1137,7 +1136,7 @@ enum fsync_mode { FSYNC_MODE_NOBARRIER, /* fsync behaves nobarrier based on posix */ }; -#ifdef CONFIG_F2FS_FS_ENCRYPTION +#ifdef CONFIG_FS_ENCRYPTION #define DUMMY_ENCRYPTION_ENABLED(sbi) \ (unlikely(F2FS_OPTION(sbi).test_dummy_encryption)) #else @@ -3470,7 +3469,7 @@ static inline bool f2fs_encrypted_file(struct inode *inode) static inline void f2fs_set_encrypted_inode(struct inode *inode) { -#ifdef CONFIG_F2FS_FS_ENCRYPTION +#ifdef CONFIG_FS_ENCRYPTION file_set_encrypt(inode); f2fs_set_inode_flags(inode); #endif @@ -3549,7 +3548,7 @@ static inline void set_opt_mode(struct f2fs_sb_info *sbi, unsigned int mt) static inline bool f2fs_may_encrypt(struct inode *inode) { -#ifdef CONFIG_F2FS_FS_ENCRYPTION +#ifdef CONFIG_FS_ENCRYPTION umode_t mode = inode->i_mode; return (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index c46a1d4318d4..0f3db3a8e5cb 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -757,7 +757,7 @@ static int parse_options(struct super_block *sb, char *options) kvfree(name); break; case Opt_test_dummy_encryption: -#ifdef CONFIG_F2FS_FS_ENCRYPTION +#ifdef CONFIG_FS_ENCRYPTION if (!f2fs_sb_has_encrypt(sbi)) { f2fs_msg(sb, KERN_ERR, "Encrypt feature is off"); return -EINVAL; @@ -1390,7 +1390,7 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) seq_printf(seq, ",whint_mode=%s", "user-based"); else if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_FS) seq_printf(seq, ",whint_mode=%s", "fs-based"); -#ifdef CONFIG_F2FS_FS_ENCRYPTION +#ifdef CONFIG_FS_ENCRYPTION if (F2FS_OPTION(sbi).test_dummy_encryption) seq_puts(seq, ",test_dummy_encryption"); #endif @@ -2154,7 +2154,7 @@ static const struct super_operations f2fs_sops = { .remount_fs = f2fs_remount, }; -#ifdef CONFIG_F2FS_FS_ENCRYPTION +#ifdef CONFIG_FS_ENCRYPTION static int f2fs_get_context(struct inode *inode, void *ctx, size_t len) { return f2fs_getxattr(inode, F2FS_XATTR_INDEX_ENCRYPTION, @@ -3116,7 +3116,7 @@ try_onemore: #endif sb->s_op = &f2fs_sops; -#ifdef CONFIG_F2FS_FS_ENCRYPTION +#ifdef CONFIG_FS_ENCRYPTION sb->s_cop = &f2fs_cryptops; #endif sb->s_xattr = f2fs_xattr_handlers; diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 0575edbe3ed6..70da6801c86f 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -431,7 +431,7 @@ F2FS_GENERAL_RO_ATTR(lifetime_write_kbytes); F2FS_GENERAL_RO_ATTR(features); F2FS_GENERAL_RO_ATTR(current_reserved_blocks); -#ifdef CONFIG_F2FS_FS_ENCRYPTION +#ifdef CONFIG_FS_ENCRYPTION F2FS_FEATURE_RO_ATTR(encryption, FEAT_CRYPTO); #endif #ifdef CONFIG_BLK_DEV_ZONED @@ -492,7 +492,7 @@ static struct attribute *f2fs_attrs[] = { }; static struct attribute *f2fs_feat_attrs[] = { -#ifdef CONFIG_F2FS_FS_ENCRYPTION +#ifdef CONFIG_FS_ENCRYPTION ATTR_LIST(encryption), #endif #ifdef CONFIG_BLK_DEV_ZONED diff --git a/fs/ubifs/Kconfig b/fs/ubifs/Kconfig index bc1e082d921d..9da2f135121b 100644 --- a/fs/ubifs/Kconfig +++ b/fs/ubifs/Kconfig @@ -8,6 +8,7 @@ config UBIFS_FS select CRYPTO_LZO if UBIFS_FS_LZO select CRYPTO_DEFLATE if UBIFS_FS_ZLIB select CRYPTO_HASH_INFO + select UBIFS_FS_XATTR if FS_ENCRYPTION depends on MTD_UBI help UBIFS is a file system for flash devices which works on top of UBI. @@ -60,17 +61,6 @@ config UBIFS_FS_XATTR If unsure, say Y. -config UBIFS_FS_ENCRYPTION - bool "UBIFS Encryption" - depends on UBIFS_FS_XATTR && BLOCK - select FS_ENCRYPTION - default n - help - Enable encryption of UBIFS files and directories. This - feature is similar to ecryptfs, but it is more memory - efficient since it avoids caching the encrypted and - decrypted pages in the page cache. - config UBIFS_FS_SECURITY bool "UBIFS Security Labels" depends on UBIFS_FS_XATTR diff --git a/fs/ubifs/Makefile b/fs/ubifs/Makefile index 5f838319c8d5..5c4b845754a7 100644 --- a/fs/ubifs/Makefile +++ b/fs/ubifs/Makefile @@ -6,6 +6,6 @@ ubifs-y += tnc.o master.o scan.o replay.o log.o commit.o gc.o orphan.o ubifs-y += budget.o find.o tnc_commit.o compress.o lpt.o lprops.o ubifs-y += recovery.o ioctl.o lpt_commit.o tnc_misc.o debug.o ubifs-y += misc.o -ubifs-$(CONFIG_UBIFS_FS_ENCRYPTION) += crypto.o +ubifs-$(CONFIG_FS_ENCRYPTION) += crypto.o ubifs-$(CONFIG_UBIFS_FS_XATTR) += xattr.o ubifs-$(CONFIG_UBIFS_FS_AUTHENTICATION) += auth.o diff --git a/fs/ubifs/ioctl.c b/fs/ubifs/ioctl.c index 0164bcc827f8..0f9c362a3402 100644 --- a/fs/ubifs/ioctl.c +++ b/fs/ubifs/ioctl.c @@ -185,7 +185,7 @@ long ubifs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) return err; } case FS_IOC_SET_ENCRYPTION_POLICY: { -#ifdef CONFIG_UBIFS_FS_ENCRYPTION +#ifdef CONFIG_FS_ENCRYPTION struct ubifs_info *c = inode->i_sb->s_fs_info; err = ubifs_enable_encryption(c); @@ -198,7 +198,7 @@ long ubifs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) #endif } case FS_IOC_GET_ENCRYPTION_POLICY: { -#ifdef CONFIG_UBIFS_FS_ENCRYPTION +#ifdef CONFIG_FS_ENCRYPTION return fscrypt_ioctl_get_policy(file, (void __user *)arg); #else return -EOPNOTSUPP; diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c index 3da90c951c23..67fac1e8adfb 100644 --- a/fs/ubifs/sb.c +++ b/fs/ubifs/sb.c @@ -748,7 +748,7 @@ int ubifs_read_superblock(struct ubifs_info *c) goto out; } -#ifndef CONFIG_UBIFS_FS_ENCRYPTION +#ifndef CONFIG_FS_ENCRYPTION if (c->encrypted) { ubifs_err(c, "file system contains encrypted files but UBIFS" " was built without crypto support."); diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 1fac1133dadd..8dc2818fdd84 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -2146,7 +2146,7 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent) #ifdef CONFIG_UBIFS_FS_XATTR sb->s_xattr = ubifs_xattr_handlers; #endif -#ifdef CONFIG_UBIFS_FS_ENCRYPTION +#ifdef CONFIG_FS_ENCRYPTION sb->s_cop = &ubifs_crypt_operations; #endif diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index 38401adaa00d..1ae12900e01d 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -43,7 +43,6 @@ #include #include -#define __FS_HAS_ENCRYPTION IS_ENABLED(CONFIG_UBIFS_FS_ENCRYPTION) #include #include "ubifs-media.h" @@ -142,7 +141,7 @@ */ #define WORST_COMPR_FACTOR 2 -#ifdef CONFIG_UBIFS_FS_ENCRYPTION +#ifdef CONFIG_FS_ENCRYPTION #define UBIFS_CIPHER_BLOCK_SIZE FS_CRYPTO_BLOCK_SIZE #else #define UBIFS_CIPHER_BLOCK_SIZE 0 @@ -2072,7 +2071,7 @@ int ubifs_decompress(const struct ubifs_info *c, const void *buf, int len, #include "misc.h" #include "key.h" -#ifndef CONFIG_UBIFS_FS_ENCRYPTION +#ifndef CONFIG_FS_ENCRYPTION static inline int ubifs_encrypt(const struct inode *inode, struct ubifs_data_node *dn, unsigned int in_len, unsigned int *out_len, diff --git a/include/linux/fs.h b/include/linux/fs.h index 811c77743dad..ba7889bb9ef6 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -698,7 +698,7 @@ struct inode { struct fsnotify_mark_connector __rcu *i_fsnotify_marks; #endif -#if IS_ENABLED(CONFIG_FS_ENCRYPTION) +#ifdef CONFIG_FS_ENCRYPTION struct fscrypt_info *i_crypt_info; #endif @@ -1403,7 +1403,7 @@ struct super_block { void *s_security; #endif const struct xattr_handler **s_xattr; -#if IS_ENABLED(CONFIG_FS_ENCRYPTION) +#ifdef CONFIG_FS_ENCRYPTION const struct fscrypt_operations *s_cop; #endif struct hlist_bl_head s_roots; /* alternate root dentries for NFS */ diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 952ab97af325..eec604840568 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -2,9 +2,8 @@ /* * fscrypt.h: declarations for per-file encryption * - * Filesystems that implement per-file encryption include this header - * file with the __FS_HAS_ENCRYPTION set according to whether that filesystem - * is being built with encryption support or not. + * Filesystems that implement per-file encryption must include this header + * file. * * Copyright (C) 2015, Google, Inc. * @@ -15,6 +14,8 @@ #define _LINUX_FSCRYPT_H #include +#include +#include #define FS_CRYPTO_BLOCK_SIZE 16 @@ -42,11 +43,410 @@ struct fscrypt_name { /* Maximum value for the third parameter of fscrypt_operations.set_context(). */ #define FSCRYPT_SET_CONTEXT_MAX_SIZE 28 -#if __FS_HAS_ENCRYPTION -#include -#else -#include -#endif +#ifdef CONFIG_FS_ENCRYPTION +/* + * fscrypt superblock flags + */ +#define FS_CFLG_OWN_PAGES (1U << 1) + +/* + * crypto operations for filesystems + */ +struct fscrypt_operations { + unsigned int flags; + const char *key_prefix; + int (*get_context)(struct inode *, void *, size_t); + int (*set_context)(struct inode *, const void *, size_t, void *); + bool (*dummy_context)(struct inode *); + bool (*empty_dir)(struct inode *); + unsigned int max_namelen; +}; + +struct fscrypt_ctx { + union { + struct { + struct page *bounce_page; /* Ciphertext page */ + struct page *control_page; /* Original page */ + } w; + struct { + struct bio *bio; + struct work_struct work; + } r; + struct list_head free_list; /* Free list */ + }; + u8 flags; /* Flags */ +}; + +static inline bool fscrypt_has_encryption_key(const struct inode *inode) +{ + return (inode->i_crypt_info != NULL); +} + +static inline bool fscrypt_dummy_context_enabled(struct inode *inode) +{ + return inode->i_sb->s_cop->dummy_context && + inode->i_sb->s_cop->dummy_context(inode); +} + +/* crypto.c */ +extern void fscrypt_enqueue_decrypt_work(struct work_struct *); +extern struct fscrypt_ctx *fscrypt_get_ctx(const struct inode *, gfp_t); +extern void fscrypt_release_ctx(struct fscrypt_ctx *); +extern struct page *fscrypt_encrypt_page(const struct inode *, struct page *, + unsigned int, unsigned int, + u64, gfp_t); +extern int fscrypt_decrypt_page(const struct inode *, struct page *, unsigned int, + unsigned int, u64); + +static inline struct page *fscrypt_control_page(struct page *page) +{ + return ((struct fscrypt_ctx *)page_private(page))->w.control_page; +} + +extern void fscrypt_restore_control_page(struct page *); + +/* policy.c */ +extern int fscrypt_ioctl_set_policy(struct file *, const void __user *); +extern int fscrypt_ioctl_get_policy(struct file *, void __user *); +extern int fscrypt_has_permitted_context(struct inode *, struct inode *); +extern int fscrypt_inherit_context(struct inode *, struct inode *, + void *, bool); +/* keyinfo.c */ +extern int fscrypt_get_encryption_info(struct inode *); +extern void fscrypt_put_encryption_info(struct inode *); + +/* fname.c */ +extern int fscrypt_setup_filename(struct inode *, const struct qstr *, + int lookup, struct fscrypt_name *); + +static inline void fscrypt_free_filename(struct fscrypt_name *fname) +{ + kfree(fname->crypto_buf.name); +} + +extern int fscrypt_fname_alloc_buffer(const struct inode *, u32, + struct fscrypt_str *); +extern void fscrypt_fname_free_buffer(struct fscrypt_str *); +extern int fscrypt_fname_disk_to_usr(struct inode *, u32, u32, + const struct fscrypt_str *, struct fscrypt_str *); + +#define FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE 32 + +/* Extracts the second-to-last ciphertext block; see explanation below */ +#define FSCRYPT_FNAME_DIGEST(name, len) \ + ((name) + round_down((len) - FS_CRYPTO_BLOCK_SIZE - 1, \ + FS_CRYPTO_BLOCK_SIZE)) + +#define FSCRYPT_FNAME_DIGEST_SIZE FS_CRYPTO_BLOCK_SIZE + +/** + * fscrypt_digested_name - alternate identifier for an on-disk filename + * + * When userspace lists an encrypted directory without access to the key, + * filenames whose ciphertext is longer than FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE + * bytes are shown in this abbreviated form (base64-encoded) rather than as the + * full ciphertext (base64-encoded). This is necessary to allow supporting + * filenames up to NAME_MAX bytes, since base64 encoding expands the length. + * + * To make it possible for filesystems to still find the correct directory entry + * despite not knowing the full on-disk name, we encode any filesystem-specific + * 'hash' and/or 'minor_hash' which the filesystem may need for its lookups, + * followed by the second-to-last ciphertext block of the filename. Due to the + * use of the CBC-CTS encryption mode, the second-to-last ciphertext block + * depends on the full plaintext. (Note that ciphertext stealing causes the + * last two blocks to appear "flipped".) This makes accidental collisions very + * unlikely: just a 1 in 2^128 chance for two filenames to collide even if they + * share the same filesystem-specific hashes. + * + * However, this scheme isn't immune to intentional collisions, which can be + * created by anyone able to create arbitrary plaintext filenames and view them + * without the key. Making the "digest" be a real cryptographic hash like + * SHA-256 over the full ciphertext would prevent this, although it would be + * less efficient and harder to implement, especially since the filesystem would + * need to calculate it for each directory entry examined during a search. + */ +struct fscrypt_digested_name { + u32 hash; + u32 minor_hash; + u8 digest[FSCRYPT_FNAME_DIGEST_SIZE]; +}; + +/** + * fscrypt_match_name() - test whether the given name matches a directory entry + * @fname: the name being searched for + * @de_name: the name from the directory entry + * @de_name_len: the length of @de_name in bytes + * + * Normally @fname->disk_name will be set, and in that case we simply compare + * that to the name stored in the directory entry. The only exception is that + * if we don't have the key for an encrypted directory and a filename in it is + * very long, then we won't have the full disk_name and we'll instead need to + * match against the fscrypt_digested_name. + * + * Return: %true if the name matches, otherwise %false. + */ +static inline bool fscrypt_match_name(const struct fscrypt_name *fname, + const u8 *de_name, u32 de_name_len) +{ + if (unlikely(!fname->disk_name.name)) { + const struct fscrypt_digested_name *n = + (const void *)fname->crypto_buf.name; + if (WARN_ON_ONCE(fname->usr_fname->name[0] != '_')) + return false; + if (de_name_len <= FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE) + return false; + return !memcmp(FSCRYPT_FNAME_DIGEST(de_name, de_name_len), + n->digest, FSCRYPT_FNAME_DIGEST_SIZE); + } + + if (de_name_len != fname->disk_name.len) + return false; + return !memcmp(de_name, fname->disk_name.name, fname->disk_name.len); +} + +/* bio.c */ +extern void fscrypt_decrypt_bio(struct bio *); +extern void fscrypt_enqueue_decrypt_bio(struct fscrypt_ctx *ctx, + struct bio *bio); +extern void fscrypt_pullback_bio_page(struct page **, bool); +extern int fscrypt_zeroout_range(const struct inode *, pgoff_t, sector_t, + unsigned int); + +/* hooks.c */ +extern int fscrypt_file_open(struct inode *inode, struct file *filp); +extern int __fscrypt_prepare_link(struct inode *inode, struct inode *dir); +extern int __fscrypt_prepare_rename(struct inode *old_dir, + struct dentry *old_dentry, + struct inode *new_dir, + struct dentry *new_dentry, + unsigned int flags); +extern int __fscrypt_prepare_lookup(struct inode *dir, struct dentry *dentry); +extern int __fscrypt_prepare_symlink(struct inode *dir, unsigned int len, + unsigned int max_len, + struct fscrypt_str *disk_link); +extern int __fscrypt_encrypt_symlink(struct inode *inode, const char *target, + unsigned int len, + struct fscrypt_str *disk_link); +extern const char *fscrypt_get_symlink(struct inode *inode, const void *caddr, + unsigned int max_size, + struct delayed_call *done); +#else /* !CONFIG_FS_ENCRYPTION */ + +static inline bool fscrypt_has_encryption_key(const struct inode *inode) +{ + return false; +} + +static inline bool fscrypt_dummy_context_enabled(struct inode *inode) +{ + return false; +} + +/* crypto.c */ +static inline void fscrypt_enqueue_decrypt_work(struct work_struct *work) +{ +} + +static inline struct fscrypt_ctx *fscrypt_get_ctx(const struct inode *inode, + gfp_t gfp_flags) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +static inline void fscrypt_release_ctx(struct fscrypt_ctx *ctx) +{ + return; +} + +static inline struct page *fscrypt_encrypt_page(const struct inode *inode, + struct page *page, + unsigned int len, + unsigned int offs, + u64 lblk_num, gfp_t gfp_flags) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +static inline int fscrypt_decrypt_page(const struct inode *inode, + struct page *page, + unsigned int len, unsigned int offs, + u64 lblk_num) +{ + return -EOPNOTSUPP; +} + +static inline struct page *fscrypt_control_page(struct page *page) +{ + WARN_ON_ONCE(1); + return ERR_PTR(-EINVAL); +} + +static inline void fscrypt_restore_control_page(struct page *page) +{ + return; +} + +/* policy.c */ +static inline int fscrypt_ioctl_set_policy(struct file *filp, + const void __user *arg) +{ + return -EOPNOTSUPP; +} + +static inline int fscrypt_ioctl_get_policy(struct file *filp, void __user *arg) +{ + return -EOPNOTSUPP; +} + +static inline int fscrypt_has_permitted_context(struct inode *parent, + struct inode *child) +{ + return 0; +} + +static inline int fscrypt_inherit_context(struct inode *parent, + struct inode *child, + void *fs_data, bool preload) +{ + return -EOPNOTSUPP; +} + +/* keyinfo.c */ +static inline int fscrypt_get_encryption_info(struct inode *inode) +{ + return -EOPNOTSUPP; +} + +static inline void fscrypt_put_encryption_info(struct inode *inode) +{ + return; +} + + /* fname.c */ +static inline int fscrypt_setup_filename(struct inode *dir, + const struct qstr *iname, + int lookup, struct fscrypt_name *fname) +{ + if (IS_ENCRYPTED(dir)) + return -EOPNOTSUPP; + + memset(fname, 0, sizeof(struct fscrypt_name)); + fname->usr_fname = iname; + fname->disk_name.name = (unsigned char *)iname->name; + fname->disk_name.len = iname->len; + return 0; +} + +static inline void fscrypt_free_filename(struct fscrypt_name *fname) +{ + return; +} + +static inline int fscrypt_fname_alloc_buffer(const struct inode *inode, + u32 max_encrypted_len, + struct fscrypt_str *crypto_str) +{ + return -EOPNOTSUPP; +} + +static inline void fscrypt_fname_free_buffer(struct fscrypt_str *crypto_str) +{ + return; +} + +static inline int fscrypt_fname_disk_to_usr(struct inode *inode, + u32 hash, u32 minor_hash, + const struct fscrypt_str *iname, + struct fscrypt_str *oname) +{ + return -EOPNOTSUPP; +} + +static inline bool fscrypt_match_name(const struct fscrypt_name *fname, + const u8 *de_name, u32 de_name_len) +{ + /* Encryption support disabled; use standard comparison */ + if (de_name_len != fname->disk_name.len) + return false; + return !memcmp(de_name, fname->disk_name.name, fname->disk_name.len); +} + +/* bio.c */ +static inline void fscrypt_decrypt_bio(struct bio *bio) +{ +} + +static inline void fscrypt_enqueue_decrypt_bio(struct fscrypt_ctx *ctx, + struct bio *bio) +{ +} + +static inline void fscrypt_pullback_bio_page(struct page **page, bool restore) +{ + return; +} + +static inline int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk, + sector_t pblk, unsigned int len) +{ + return -EOPNOTSUPP; +} + +/* hooks.c */ + +static inline int fscrypt_file_open(struct inode *inode, struct file *filp) +{ + if (IS_ENCRYPTED(inode)) + return -EOPNOTSUPP; + return 0; +} + +static inline int __fscrypt_prepare_link(struct inode *inode, + struct inode *dir) +{ + return -EOPNOTSUPP; +} + +static inline int __fscrypt_prepare_rename(struct inode *old_dir, + struct dentry *old_dentry, + struct inode *new_dir, + struct dentry *new_dentry, + unsigned int flags) +{ + return -EOPNOTSUPP; +} + +static inline int __fscrypt_prepare_lookup(struct inode *dir, + struct dentry *dentry) +{ + return -EOPNOTSUPP; +} + +static inline int __fscrypt_prepare_symlink(struct inode *dir, + unsigned int len, + unsigned int max_len, + struct fscrypt_str *disk_link) +{ + return -EOPNOTSUPP; +} + + +static inline int __fscrypt_encrypt_symlink(struct inode *inode, + const char *target, + unsigned int len, + struct fscrypt_str *disk_link) +{ + return -EOPNOTSUPP; +} + +static inline const char *fscrypt_get_symlink(struct inode *inode, + const void *caddr, + unsigned int max_size, + struct delayed_call *done) +{ + return ERR_PTR(-EOPNOTSUPP); +} +#endif /* !CONFIG_FS_ENCRYPTION */ /** * fscrypt_require_key - require an inode's encryption key diff --git a/include/linux/fscrypt_notsupp.h b/include/linux/fscrypt_notsupp.h deleted file mode 100644 index ee8b43e4c15a..000000000000 --- a/include/linux/fscrypt_notsupp.h +++ /dev/null @@ -1,231 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * fscrypt_notsupp.h - * - * This stubs out the fscrypt functions for filesystems configured without - * encryption support. - * - * Do not include this file directly. Use fscrypt.h instead! - */ -#ifndef _LINUX_FSCRYPT_H -#error "Incorrect include of linux/fscrypt_notsupp.h!" -#endif - -#ifndef _LINUX_FSCRYPT_NOTSUPP_H -#define _LINUX_FSCRYPT_NOTSUPP_H - -static inline bool fscrypt_has_encryption_key(const struct inode *inode) -{ - return false; -} - -static inline bool fscrypt_dummy_context_enabled(struct inode *inode) -{ - return false; -} - -/* crypto.c */ -static inline void fscrypt_enqueue_decrypt_work(struct work_struct *work) -{ -} - -static inline struct fscrypt_ctx *fscrypt_get_ctx(const struct inode *inode, - gfp_t gfp_flags) -{ - return ERR_PTR(-EOPNOTSUPP); -} - -static inline void fscrypt_release_ctx(struct fscrypt_ctx *ctx) -{ - return; -} - -static inline struct page *fscrypt_encrypt_page(const struct inode *inode, - struct page *page, - unsigned int len, - unsigned int offs, - u64 lblk_num, gfp_t gfp_flags) -{ - return ERR_PTR(-EOPNOTSUPP); -} - -static inline int fscrypt_decrypt_page(const struct inode *inode, - struct page *page, - unsigned int len, unsigned int offs, - u64 lblk_num) -{ - return -EOPNOTSUPP; -} - -static inline struct page *fscrypt_control_page(struct page *page) -{ - WARN_ON_ONCE(1); - return ERR_PTR(-EINVAL); -} - -static inline void fscrypt_restore_control_page(struct page *page) -{ - return; -} - -/* policy.c */ -static inline int fscrypt_ioctl_set_policy(struct file *filp, - const void __user *arg) -{ - return -EOPNOTSUPP; -} - -static inline int fscrypt_ioctl_get_policy(struct file *filp, void __user *arg) -{ - return -EOPNOTSUPP; -} - -static inline int fscrypt_has_permitted_context(struct inode *parent, - struct inode *child) -{ - return 0; -} - -static inline int fscrypt_inherit_context(struct inode *parent, - struct inode *child, - void *fs_data, bool preload) -{ - return -EOPNOTSUPP; -} - -/* keyinfo.c */ -static inline int fscrypt_get_encryption_info(struct inode *inode) -{ - return -EOPNOTSUPP; -} - -static inline void fscrypt_put_encryption_info(struct inode *inode) -{ - return; -} - - /* fname.c */ -static inline int fscrypt_setup_filename(struct inode *dir, - const struct qstr *iname, - int lookup, struct fscrypt_name *fname) -{ - if (IS_ENCRYPTED(dir)) - return -EOPNOTSUPP; - - memset(fname, 0, sizeof(struct fscrypt_name)); - fname->usr_fname = iname; - fname->disk_name.name = (unsigned char *)iname->name; - fname->disk_name.len = iname->len; - return 0; -} - -static inline void fscrypt_free_filename(struct fscrypt_name *fname) -{ - return; -} - -static inline int fscrypt_fname_alloc_buffer(const struct inode *inode, - u32 max_encrypted_len, - struct fscrypt_str *crypto_str) -{ - return -EOPNOTSUPP; -} - -static inline void fscrypt_fname_free_buffer(struct fscrypt_str *crypto_str) -{ - return; -} - -static inline int fscrypt_fname_disk_to_usr(struct inode *inode, - u32 hash, u32 minor_hash, - const struct fscrypt_str *iname, - struct fscrypt_str *oname) -{ - return -EOPNOTSUPP; -} - -static inline bool fscrypt_match_name(const struct fscrypt_name *fname, - const u8 *de_name, u32 de_name_len) -{ - /* Encryption support disabled; use standard comparison */ - if (de_name_len != fname->disk_name.len) - return false; - return !memcmp(de_name, fname->disk_name.name, fname->disk_name.len); -} - -/* bio.c */ -static inline void fscrypt_decrypt_bio(struct bio *bio) -{ -} - -static inline void fscrypt_enqueue_decrypt_bio(struct fscrypt_ctx *ctx, - struct bio *bio) -{ -} - -static inline void fscrypt_pullback_bio_page(struct page **page, bool restore) -{ - return; -} - -static inline int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk, - sector_t pblk, unsigned int len) -{ - return -EOPNOTSUPP; -} - -/* hooks.c */ - -static inline int fscrypt_file_open(struct inode *inode, struct file *filp) -{ - if (IS_ENCRYPTED(inode)) - return -EOPNOTSUPP; - return 0; -} - -static inline int __fscrypt_prepare_link(struct inode *inode, - struct inode *dir) -{ - return -EOPNOTSUPP; -} - -static inline int __fscrypt_prepare_rename(struct inode *old_dir, - struct dentry *old_dentry, - struct inode *new_dir, - struct dentry *new_dentry, - unsigned int flags) -{ - return -EOPNOTSUPP; -} - -static inline int __fscrypt_prepare_lookup(struct inode *dir, - struct dentry *dentry) -{ - return -EOPNOTSUPP; -} - -static inline int __fscrypt_prepare_symlink(struct inode *dir, - unsigned int len, - unsigned int max_len, - struct fscrypt_str *disk_link) -{ - return -EOPNOTSUPP; -} - -static inline int __fscrypt_encrypt_symlink(struct inode *inode, - const char *target, - unsigned int len, - struct fscrypt_str *disk_link) -{ - return -EOPNOTSUPP; -} - -static inline const char *fscrypt_get_symlink(struct inode *inode, - const void *caddr, - unsigned int max_size, - struct delayed_call *done) -{ - return ERR_PTR(-EOPNOTSUPP); -} - -#endif /* _LINUX_FSCRYPT_NOTSUPP_H */ diff --git a/include/linux/fscrypt_supp.h b/include/linux/fscrypt_supp.h deleted file mode 100644 index 6456c6b2005f..000000000000 --- a/include/linux/fscrypt_supp.h +++ /dev/null @@ -1,204 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * fscrypt_supp.h - * - * Do not include this file directly. Use fscrypt.h instead! - */ -#ifndef _LINUX_FSCRYPT_H -#error "Incorrect include of linux/fscrypt_supp.h!" -#endif - -#ifndef _LINUX_FSCRYPT_SUPP_H -#define _LINUX_FSCRYPT_SUPP_H - -#include -#include - -/* - * fscrypt superblock flags - */ -#define FS_CFLG_OWN_PAGES (1U << 1) - -/* - * crypto operations for filesystems - */ -struct fscrypt_operations { - unsigned int flags; - const char *key_prefix; - int (*get_context)(struct inode *, void *, size_t); - int (*set_context)(struct inode *, const void *, size_t, void *); - bool (*dummy_context)(struct inode *); - bool (*empty_dir)(struct inode *); - unsigned int max_namelen; -}; - -struct fscrypt_ctx { - union { - struct { - struct page *bounce_page; /* Ciphertext page */ - struct page *control_page; /* Original page */ - } w; - struct { - struct bio *bio; - struct work_struct work; - } r; - struct list_head free_list; /* Free list */ - }; - u8 flags; /* Flags */ -}; - -static inline bool fscrypt_has_encryption_key(const struct inode *inode) -{ - return (inode->i_crypt_info != NULL); -} - -static inline bool fscrypt_dummy_context_enabled(struct inode *inode) -{ - return inode->i_sb->s_cop->dummy_context && - inode->i_sb->s_cop->dummy_context(inode); -} - -/* crypto.c */ -extern void fscrypt_enqueue_decrypt_work(struct work_struct *); -extern struct fscrypt_ctx *fscrypt_get_ctx(const struct inode *, gfp_t); -extern void fscrypt_release_ctx(struct fscrypt_ctx *); -extern struct page *fscrypt_encrypt_page(const struct inode *, struct page *, - unsigned int, unsigned int, - u64, gfp_t); -extern int fscrypt_decrypt_page(const struct inode *, struct page *, unsigned int, - unsigned int, u64); - -static inline struct page *fscrypt_control_page(struct page *page) -{ - return ((struct fscrypt_ctx *)page_private(page))->w.control_page; -} - -extern void fscrypt_restore_control_page(struct page *); - -/* policy.c */ -extern int fscrypt_ioctl_set_policy(struct file *, const void __user *); -extern int fscrypt_ioctl_get_policy(struct file *, void __user *); -extern int fscrypt_has_permitted_context(struct inode *, struct inode *); -extern int fscrypt_inherit_context(struct inode *, struct inode *, - void *, bool); -/* keyinfo.c */ -extern int fscrypt_get_encryption_info(struct inode *); -extern void fscrypt_put_encryption_info(struct inode *); - -/* fname.c */ -extern int fscrypt_setup_filename(struct inode *, const struct qstr *, - int lookup, struct fscrypt_name *); - -static inline void fscrypt_free_filename(struct fscrypt_name *fname) -{ - kfree(fname->crypto_buf.name); -} - -extern int fscrypt_fname_alloc_buffer(const struct inode *, u32, - struct fscrypt_str *); -extern void fscrypt_fname_free_buffer(struct fscrypt_str *); -extern int fscrypt_fname_disk_to_usr(struct inode *, u32, u32, - const struct fscrypt_str *, struct fscrypt_str *); - -#define FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE 32 - -/* Extracts the second-to-last ciphertext block; see explanation below */ -#define FSCRYPT_FNAME_DIGEST(name, len) \ - ((name) + round_down((len) - FS_CRYPTO_BLOCK_SIZE - 1, \ - FS_CRYPTO_BLOCK_SIZE)) - -#define FSCRYPT_FNAME_DIGEST_SIZE FS_CRYPTO_BLOCK_SIZE - -/** - * fscrypt_digested_name - alternate identifier for an on-disk filename - * - * When userspace lists an encrypted directory without access to the key, - * filenames whose ciphertext is longer than FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE - * bytes are shown in this abbreviated form (base64-encoded) rather than as the - * full ciphertext (base64-encoded). This is necessary to allow supporting - * filenames up to NAME_MAX bytes, since base64 encoding expands the length. - * - * To make it possible for filesystems to still find the correct directory entry - * despite not knowing the full on-disk name, we encode any filesystem-specific - * 'hash' and/or 'minor_hash' which the filesystem may need for its lookups, - * followed by the second-to-last ciphertext block of the filename. Due to the - * use of the CBC-CTS encryption mode, the second-to-last ciphertext block - * depends on the full plaintext. (Note that ciphertext stealing causes the - * last two blocks to appear "flipped".) This makes accidental collisions very - * unlikely: just a 1 in 2^128 chance for two filenames to collide even if they - * share the same filesystem-specific hashes. - * - * However, this scheme isn't immune to intentional collisions, which can be - * created by anyone able to create arbitrary plaintext filenames and view them - * without the key. Making the "digest" be a real cryptographic hash like - * SHA-256 over the full ciphertext would prevent this, although it would be - * less efficient and harder to implement, especially since the filesystem would - * need to calculate it for each directory entry examined during a search. - */ -struct fscrypt_digested_name { - u32 hash; - u32 minor_hash; - u8 digest[FSCRYPT_FNAME_DIGEST_SIZE]; -}; - -/** - * fscrypt_match_name() - test whether the given name matches a directory entry - * @fname: the name being searched for - * @de_name: the name from the directory entry - * @de_name_len: the length of @de_name in bytes - * - * Normally @fname->disk_name will be set, and in that case we simply compare - * that to the name stored in the directory entry. The only exception is that - * if we don't have the key for an encrypted directory and a filename in it is - * very long, then we won't have the full disk_name and we'll instead need to - * match against the fscrypt_digested_name. - * - * Return: %true if the name matches, otherwise %false. - */ -static inline bool fscrypt_match_name(const struct fscrypt_name *fname, - const u8 *de_name, u32 de_name_len) -{ - if (unlikely(!fname->disk_name.name)) { - const struct fscrypt_digested_name *n = - (const void *)fname->crypto_buf.name; - if (WARN_ON_ONCE(fname->usr_fname->name[0] != '_')) - return false; - if (de_name_len <= FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE) - return false; - return !memcmp(FSCRYPT_FNAME_DIGEST(de_name, de_name_len), - n->digest, FSCRYPT_FNAME_DIGEST_SIZE); - } - - if (de_name_len != fname->disk_name.len) - return false; - return !memcmp(de_name, fname->disk_name.name, fname->disk_name.len); -} - -/* bio.c */ -extern void fscrypt_decrypt_bio(struct bio *); -extern void fscrypt_enqueue_decrypt_bio(struct fscrypt_ctx *ctx, - struct bio *bio); -extern void fscrypt_pullback_bio_page(struct page **, bool); -extern int fscrypt_zeroout_range(const struct inode *, pgoff_t, sector_t, - unsigned int); - -/* hooks.c */ -extern int fscrypt_file_open(struct inode *inode, struct file *filp); -extern int __fscrypt_prepare_link(struct inode *inode, struct inode *dir); -extern int __fscrypt_prepare_rename(struct inode *old_dir, - struct dentry *old_dentry, - struct inode *new_dir, - struct dentry *new_dentry, - unsigned int flags); -extern int __fscrypt_prepare_lookup(struct inode *dir, struct dentry *dentry); -extern int __fscrypt_prepare_symlink(struct inode *dir, unsigned int len, - unsigned int max_len, - struct fscrypt_str *disk_link); -extern int __fscrypt_encrypt_symlink(struct inode *inode, const char *target, - unsigned int len, - struct fscrypt_str *disk_link); -extern const char *fscrypt_get_symlink(struct inode *inode, const void *caddr, - unsigned int max_size, - struct delayed_call *done); - -#endif /* _LINUX_FSCRYPT_SUPP_H */ -- cgit v1.2.3 From f5e55e777cc93eae1416f0fa4908e8846b6d7825 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 22 Jan 2019 16:20:21 -0800 Subject: fscrypt: return -EXDEV for incompatible rename or link into encrypted dir Currently, trying to rename or link a regular file, directory, or symlink into an encrypted directory fails with EPERM when the source file is unencrypted or is encrypted with a different encryption policy, and is on the same mountpoint. It is correct for the operation to fail, but the choice of EPERM breaks tools like 'mv' that know to copy rather than rename if they see EXDEV, but don't know what to do with EPERM. Our original motivation for EPERM was to encourage users to securely handle their data. Encrypting files by "moving" them into an encrypted directory can be insecure because the unencrypted data may remain in free space on disk, where it can later be recovered by an attacker. It's much better to encrypt the data from the start, or at least try to securely delete the source data e.g. using the 'shred' program. However, the current behavior hasn't been effective at achieving its goal because users tend to be confused, hack around it, and complain; see e.g. https://github.com/google/fscrypt/issues/76. And in some cases it's actually inconsistent or unnecessary. For example, 'mv'-ing files between differently encrypted directories doesn't work even in cases where it can be secure, such as when in userspace the same passphrase protects both directories. Yet, you *can* already 'mv' unencrypted files into an encrypted directory if the source files are on a different mountpoint, even though doing so is often insecure. There are probably better ways to teach users to securely handle their files. For example, the 'fscrypt' userspace tool could provide a command that migrates unencrypted files into an encrypted directory, acting like 'shred' on the source files and providing appropriate warnings depending on the type of the source filesystem and disk. Receiving errors on unimportant files might also force some users to disable encryption, thus making the behavior counterproductive. It's desirable to make encryption as unobtrusive as possible. Therefore, change the error code from EPERM to EXDEV so that tools looking for EXDEV will fall back to a copy. This, of course, doesn't prevent users from still doing the right things to securely manage their files. Note that this also matches the behavior when a file is renamed between two project quota hierarchies; so there's precedent for using EXDEV for things other than mountpoints. xfstests generic/398 will require an update with this change. [Rewritten from an earlier patch series by Michael Halcrow.] Cc: Michael Halcrow Cc: Joe Richey Signed-off-by: Eric Biggers --- Documentation/filesystems/fscrypt.rst | 12 ++++++++++-- fs/crypto/hooks.c | 6 +++--- fs/crypto/policy.c | 3 +-- include/linux/fscrypt.h | 4 ++-- 4 files changed, 16 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/Documentation/filesystems/fscrypt.rst b/Documentation/filesystems/fscrypt.rst index 43dd989e2a3f..08c23b60e016 100644 --- a/Documentation/filesystems/fscrypt.rst +++ b/Documentation/filesystems/fscrypt.rst @@ -451,10 +451,18 @@ astute users may notice some differences in behavior: - Unencrypted files, or files encrypted with a different encryption policy (i.e. different key, modes, or flags), cannot be renamed or linked into an encrypted directory; see `Encryption policy - enforcement`_. Attempts to do so will fail with EPERM. However, + enforcement`_. Attempts to do so will fail with EXDEV. However, encrypted files can be renamed within an encrypted directory, or into an unencrypted directory. + Note: "moving" an unencrypted file into an encrypted directory, e.g. + with the `mv` program, is implemented in userspace by a copy + followed by a delete. Be aware that the original unencrypted data + may remain recoverable from free space on the disk; prefer to keep + all files encrypted from the very beginning. The `shred` program + may be used to overwrite the source files but isn't guaranteed to be + effective on all filesystems and storage devices. + - Direct I/O is not supported on encrypted files. Attempts to use direct I/O on such files will fall back to buffered I/O. @@ -541,7 +549,7 @@ not be encrypted. Except for those special files, it is forbidden to have unencrypted files, or files encrypted with a different encryption policy, in an encrypted directory tree. Attempts to link or rename such a file into -an encrypted directory will fail with EPERM. This is also enforced +an encrypted directory will fail with EXDEV. This is also enforced during ->lookup() to provide limited protection against offline attacks that try to disable or downgrade encryption in known locations where applications may later write sensitive data. It is recommended diff --git a/fs/crypto/hooks.c b/fs/crypto/hooks.c index 926e5df20ec3..56debb1fcf5e 100644 --- a/fs/crypto/hooks.c +++ b/fs/crypto/hooks.c @@ -58,7 +58,7 @@ int __fscrypt_prepare_link(struct inode *inode, struct inode *dir) return err; if (!fscrypt_has_permitted_context(dir, inode)) - return -EPERM; + return -EXDEV; return 0; } @@ -82,13 +82,13 @@ int __fscrypt_prepare_rename(struct inode *old_dir, struct dentry *old_dentry, if (IS_ENCRYPTED(new_dir) && !fscrypt_has_permitted_context(new_dir, d_inode(old_dentry))) - return -EPERM; + return -EXDEV; if ((flags & RENAME_EXCHANGE) && IS_ENCRYPTED(old_dir) && !fscrypt_has_permitted_context(old_dir, d_inode(new_dentry))) - return -EPERM; + return -EXDEV; } return 0; } diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c index f490de921ce8..bd7eaf9b3f00 100644 --- a/fs/crypto/policy.c +++ b/fs/crypto/policy.c @@ -151,8 +151,7 @@ EXPORT_SYMBOL(fscrypt_ioctl_get_policy); * malicious offline violations of this constraint, while the link and rename * checks are needed to prevent online violations of this constraint. * - * Return: 1 if permitted, 0 if forbidden. If forbidden, the caller must fail - * the filesystem operation with EPERM. + * Return: 1 if permitted, 0 if forbidden. */ int fscrypt_has_permitted_context(struct inode *parent, struct inode *child) { diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index eec604840568..e5194fc3983e 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -489,7 +489,7 @@ static inline int fscrypt_require_key(struct inode *inode) * in an encrypted directory tree use the same encryption policy. * * Return: 0 on success, -ENOKEY if the directory's encryption key is missing, - * -EPERM if the link would result in an inconsistent encryption policy, or + * -EXDEV if the link would result in an inconsistent encryption policy, or * another -errno code. */ static inline int fscrypt_prepare_link(struct dentry *old_dentry, @@ -519,7 +519,7 @@ static inline int fscrypt_prepare_link(struct dentry *old_dentry, * We also verify that the rename will not violate the constraint that all files * in an encrypted directory tree use the same encryption policy. * - * Return: 0 on success, -ENOKEY if an encryption key is missing, -EPERM if the + * Return: 0 on success, -ENOKEY if an encryption key is missing, -EXDEV if the * rename would cause inconsistent encryption policies, or another -errno code. */ static inline int fscrypt_prepare_rename(struct inode *old_dir, -- cgit v1.2.3 From e355477ed9e4f401e3931043df97325d38552d54 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Fri, 18 Jan 2019 16:33:10 -0800 Subject: net/mlx5: Make mlx5_cmd_exec_cb() a safe API APIs that have deferred callbacks should have some kind of cleanup function that callers can use to fence the callbacks. Otherwise things like module unloading can lead to dangling function pointers, or worse. The IB MR code is the only place that calls this function and had a really poor attempt at creating this fence. Provide a good version in the core code as future patches will add more places that need this fence. Signed-off-by: Jason Gunthorpe Signed-off-by: Yishai Hadas Signed-off-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 2 + drivers/infiniband/hw/mlx5/mr.c | 39 ++++--------------- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 55 ++++++++++++++++++++++++--- drivers/net/ethernet/mellanox/mlx5/core/mr.c | 11 +++--- include/linux/mlx5/driver.h | 32 +++++++++++++--- 5 files changed, 91 insertions(+), 48 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index efe383c0ac86..eedba0d2ec4b 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -587,6 +587,7 @@ struct mlx5_ib_mr { struct mlx5_ib_mr *parent; atomic_t num_leaf_free; wait_queue_head_t q_leaf_free; + struct mlx5_async_work cb_work; }; struct mlx5_ib_mw { @@ -944,6 +945,7 @@ struct mlx5_ib_dev { struct mlx5_memic memic; u16 devx_whitelist_uid; struct mlx5_srq_table srq_table; + struct mlx5_async_ctx async_ctx; }; static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq) diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index fd6ea1f75085..bf2b6ea23851 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -123,9 +123,10 @@ static void update_odp_mr(struct mlx5_ib_mr *mr) } #endif -static void reg_mr_callback(int status, void *context) +static void reg_mr_callback(int status, struct mlx5_async_work *context) { - struct mlx5_ib_mr *mr = context; + struct mlx5_ib_mr *mr = + container_of(context, struct mlx5_ib_mr, cb_work); struct mlx5_ib_dev *dev = mr->dev; struct mlx5_mr_cache *cache = &dev->cache; int c = order2idx(dev, mr->order); @@ -216,9 +217,9 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num) ent->pending++; spin_unlock_irq(&ent->lock); err = mlx5_core_create_mkey_cb(dev->mdev, &mr->mmkey, - in, inlen, + &dev->async_ctx, in, inlen, mr->out, sizeof(mr->out), - reg_mr_callback, mr); + reg_mr_callback, &mr->cb_work); if (err) { spin_lock_irq(&ent->lock); ent->pending--; @@ -679,6 +680,7 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) return -ENOMEM; } + mlx5_cmd_init_async_ctx(dev->mdev, &dev->async_ctx); timer_setup(&dev->delay_timer, delay_time_func, 0); for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { ent = &cache->ent[i]; @@ -725,33 +727,6 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) return 0; } -static void wait_for_async_commands(struct mlx5_ib_dev *dev) -{ - struct mlx5_mr_cache *cache = &dev->cache; - struct mlx5_cache_ent *ent; - int total = 0; - int i; - int j; - - for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { - ent = &cache->ent[i]; - for (j = 0 ; j < 1000; j++) { - if (!ent->pending) - break; - msleep(50); - } - } - for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { - ent = &cache->ent[i]; - total += ent->pending; - } - - if (total) - mlx5_ib_warn(dev, "aborted while there are %d pending mr requests\n", total); - else - mlx5_ib_warn(dev, "done with all pending requests\n"); -} - int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev) { int i; @@ -763,12 +738,12 @@ int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev) flush_workqueue(dev->cache.wq); mlx5_mr_cache_debugfs_cleanup(dev); + mlx5_cmd_cleanup_async_ctx(&dev->async_ctx); for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) clean_keys(dev, i); destroy_workqueue(dev->cache.wq); - wait_for_async_commands(dev); del_timer_sync(&dev->delay_timer); return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 3e0fa8a8077b..a25a8c6f938e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -1711,12 +1711,57 @@ int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, } EXPORT_SYMBOL(mlx5_cmd_exec); -int mlx5_cmd_exec_cb(struct mlx5_core_dev *dev, void *in, int in_size, - void *out, int out_size, mlx5_cmd_cbk_t callback, - void *context) +void mlx5_cmd_init_async_ctx(struct mlx5_core_dev *dev, + struct mlx5_async_ctx *ctx) { - return cmd_exec(dev, in, in_size, out, out_size, callback, context, - false); + ctx->dev = dev; + /* Starts at 1 to avoid doing wake_up if we are not cleaning up */ + atomic_set(&ctx->num_inflight, 1); + init_waitqueue_head(&ctx->wait); +} +EXPORT_SYMBOL(mlx5_cmd_init_async_ctx); + +/** + * mlx5_cmd_cleanup_async_ctx - Clean up an async_ctx + * @ctx: The ctx to clean + * + * Upon return all callbacks given to mlx5_cmd_exec_cb() have been called. The + * caller must ensure that mlx5_cmd_exec_cb() is not called during or after + * the call mlx5_cleanup_async_ctx(). + */ +void mlx5_cmd_cleanup_async_ctx(struct mlx5_async_ctx *ctx) +{ + atomic_dec(&ctx->num_inflight); + wait_event(ctx->wait, atomic_read(&ctx->num_inflight) == 0); +} +EXPORT_SYMBOL(mlx5_cmd_cleanup_async_ctx); + +static void mlx5_cmd_exec_cb_handler(int status, void *_work) +{ + struct mlx5_async_work *work = _work; + struct mlx5_async_ctx *ctx = work->ctx; + + work->user_callback(status, work); + if (atomic_dec_and_test(&ctx->num_inflight)) + wake_up(&ctx->wait); +} + +int mlx5_cmd_exec_cb(struct mlx5_async_ctx *ctx, void *in, int in_size, + void *out, int out_size, mlx5_async_cbk_t callback, + struct mlx5_async_work *work) +{ + int ret; + + work->ctx = ctx; + work->user_callback = callback; + if (WARN_ON(!atomic_inc_not_zero(&ctx->num_inflight))) + return -EIO; + ret = cmd_exec(ctx->dev, in, in_size, out, out_size, + mlx5_cmd_exec_cb_handler, work, false); + if (ret && atomic_dec_and_test(&ctx->num_inflight)) + wake_up(&ctx->wait); + + return ret; } EXPORT_SYMBOL(mlx5_cmd_exec_cb); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c index 0670165afd5f..ea744d8466ea 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c @@ -51,9 +51,10 @@ void mlx5_cleanup_mkey_table(struct mlx5_core_dev *dev) int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey, - u32 *in, int inlen, - u32 *out, int outlen, - mlx5_cmd_cbk_t callback, void *context) + struct mlx5_async_ctx *async_ctx, u32 *in, + int inlen, u32 *out, int outlen, + mlx5_async_cbk_t callback, + struct mlx5_async_work *context) { struct mlx5_mkey_table *table = &dev->priv.mkey_table; u32 lout[MLX5_ST_SZ_DW(create_mkey_out)] = {0}; @@ -71,7 +72,7 @@ int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, MLX5_SET(mkc, mkc, mkey_7_0, key); if (callback) - return mlx5_cmd_exec_cb(dev, in, inlen, out, outlen, + return mlx5_cmd_exec_cb(async_ctx, in, inlen, out, outlen, callback, context); err = mlx5_cmd_exec(dev, in, inlen, lout, sizeof(lout)); @@ -105,7 +106,7 @@ int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey, u32 *in, int inlen) { - return mlx5_core_create_mkey_cb(dev, mkey, in, inlen, + return mlx5_core_create_mkey_cb(dev, mkey, NULL, in, inlen, NULL, 0, NULL, NULL); } EXPORT_SYMBOL(mlx5_core_create_mkey); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 4e444863054a..039c9398614c 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -850,11 +850,30 @@ void mlx5_cmd_cleanup(struct mlx5_core_dev *dev); void mlx5_cmd_use_events(struct mlx5_core_dev *dev); void mlx5_cmd_use_polling(struct mlx5_core_dev *dev); +struct mlx5_async_ctx { + struct mlx5_core_dev *dev; + atomic_t num_inflight; + struct wait_queue_head wait; +}; + +struct mlx5_async_work; + +typedef void (*mlx5_async_cbk_t)(int status, struct mlx5_async_work *context); + +struct mlx5_async_work { + struct mlx5_async_ctx *ctx; + mlx5_async_cbk_t user_callback; +}; + +void mlx5_cmd_init_async_ctx(struct mlx5_core_dev *dev, + struct mlx5_async_ctx *ctx); +void mlx5_cmd_cleanup_async_ctx(struct mlx5_async_ctx *ctx); +int mlx5_cmd_exec_cb(struct mlx5_async_ctx *ctx, void *in, int in_size, + void *out, int out_size, mlx5_async_cbk_t callback, + struct mlx5_async_work *work); + int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int out_size); -int mlx5_cmd_exec_cb(struct mlx5_core_dev *dev, void *in, int in_size, - void *out, int out_size, mlx5_cmd_cbk_t callback, - void *context); int mlx5_cmd_exec_polling(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int out_size); void mlx5_cmd_mbox_status(void *out, u8 *status, u32 *syndrome); @@ -885,9 +904,10 @@ void mlx5_init_mkey_table(struct mlx5_core_dev *dev); void mlx5_cleanup_mkey_table(struct mlx5_core_dev *dev); int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey, - u32 *in, int inlen, - u32 *out, int outlen, - mlx5_cmd_cbk_t callback, void *context); + struct mlx5_async_ctx *async_ctx, u32 *in, + int inlen, u32 *out, int outlen, + mlx5_async_cbk_t callback, + struct mlx5_async_work *context); int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey, u32 *in, int inlen); -- cgit v1.2.3 From ef74f70e5a10cc2a78cc5529e564170cabcda9af Mon Sep 17 00:00:00 2001 From: Brian Masney Date: Sat, 19 Jan 2019 15:42:42 -0500 Subject: gpio: add irq domain activate/deactivate functions This adds the two new functions gpiochip_irq_domain_activate and gpiochip_irq_domain_deactivate that can be used as the activate and deactivate functions in the struct irq_domain_ops. This is for situations where only gpiochip_{lock,unlock}_as_irq needs to be called. SPMI and SSBI GPIO are two users that will initially use these functions. Signed-off-by: Brian Masney Suggested-by: Stephen Boyd Reviewed-by: Stephen Boyd Signed-off-by: Linus Walleij --- drivers/gpio/gpiolib.c | 37 +++++++++++++++++++++++++++++++++++++ include/linux/gpio/driver.h | 5 +++++ 2 files changed, 42 insertions(+) (limited to 'include/linux') diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 1651d7f0a303..361a09c8138a 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -1775,6 +1775,43 @@ static const struct irq_domain_ops gpiochip_domain_ops = { .xlate = irq_domain_xlate_twocell, }; +/** + * gpiochip_irq_domain_activate() - Lock a GPIO to be used as an IRQ + * @domain: The IRQ domain used by this IRQ chip + * @data: Outermost irq_data associated with the IRQ + * @reserve: If set, only reserve an interrupt vector instead of assigning one + * + * This function is a wrapper that calls gpiochip_lock_as_irq() and is to be + * used as the activate function for the &struct irq_domain_ops. The host_data + * for the IRQ domain must be the &struct gpio_chip. + */ +int gpiochip_irq_domain_activate(struct irq_domain *domain, + struct irq_data *data, bool reserve) +{ + struct gpio_chip *chip = domain->host_data; + + return gpiochip_lock_as_irq(chip, data->hwirq); +} +EXPORT_SYMBOL_GPL(gpiochip_irq_domain_activate); + +/** + * gpiochip_irq_domain_deactivate() - Unlock a GPIO used as an IRQ + * @domain: The IRQ domain used by this IRQ chip + * @data: Outermost irq_data associated with the IRQ + * + * This function is a wrapper that will call gpiochip_unlock_as_irq() and is to + * be used as the deactivate function for the &struct irq_domain_ops. The + * host_data for the IRQ domain must be the &struct gpio_chip. + */ +void gpiochip_irq_domain_deactivate(struct irq_domain *domain, + struct irq_data *data) +{ + struct gpio_chip *chip = domain->host_data; + + return gpiochip_unlock_as_irq(chip, data->hwirq); +} +EXPORT_SYMBOL_GPL(gpiochip_irq_domain_deactivate); + static int gpiochip_to_irq(struct gpio_chip *chip, unsigned offset) { if (!gpiochip_irqchip_irq_valid(chip, offset)) diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 07cddbf45186..01497910f023 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -472,6 +472,11 @@ int gpiochip_irq_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hwirq); void gpiochip_irq_unmap(struct irq_domain *d, unsigned int irq); +int gpiochip_irq_domain_activate(struct irq_domain *domain, + struct irq_data *data, bool reserve); +void gpiochip_irq_domain_deactivate(struct irq_domain *domain, + struct irq_data *data); + void gpiochip_set_chained_irqchip(struct gpio_chip *gpiochip, struct irq_chip *irqchip, unsigned int parent_irq, -- cgit v1.2.3 From 434a4315b9617bf1742bc64712bf44a208502f7f Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Wed, 23 Jan 2019 07:31:58 +0100 Subject: net: phy: change phy_start_interrupts to phy_request_interrupt Now that we enable the interrupts in phy_start() we don't have to do it before. Therefore remove enabling interrupts from phy_start_interrupts() and rename this function to reflect the changed functionality. v2: - improve warning to clearly state that we fall back to polling Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- drivers/net/phy/phy.c | 23 +++++++++++------------ drivers/net/phy/phy_device.c | 4 ++-- drivers/net/phy/phylink.c | 4 ++-- include/linux/phy.h | 2 +- 4 files changed, 16 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 079b6a617fc8..d12aa512b7f5 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -785,28 +785,27 @@ static int phy_enable_interrupts(struct phy_device *phydev) } /** - * phy_start_interrupts - request and enable interrupts for a PHY device + * phy_request_interrupt - request interrupt for a PHY device * @phydev: target phy_device struct * * Description: Request the interrupt for the given PHY. * If this fails, then we set irq to PHY_POLL. - * Otherwise, we enable the interrupts in the PHY. * This should only be called with a valid IRQ number. - * Returns 0 on success or < 0 on error. */ -int phy_start_interrupts(struct phy_device *phydev) +void phy_request_interrupt(struct phy_device *phydev) { - if (request_threaded_irq(phydev->irq, NULL, phy_interrupt, - IRQF_ONESHOT | IRQF_SHARED, - phydev_name(phydev), phydev) < 0) { - phydev_warn(phydev, "Can't get IRQ %d\n", phydev->irq); + int err; + + err = request_threaded_irq(phydev->irq, NULL, phy_interrupt, + IRQF_ONESHOT | IRQF_SHARED, + phydev_name(phydev), phydev); + if (err) { + phydev_warn(phydev, "Error %d requesting IRQ %d, falling back to polling\n", + err, phydev->irq); phydev->irq = PHY_POLL; - return 0; } - - return phy_enable_interrupts(phydev); } -EXPORT_SYMBOL(phy_start_interrupts); +EXPORT_SYMBOL(phy_request_interrupt); /** * phy_stop - Bring down the PHY link, and stop checking the status diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 64c25a0684ac..891e0178b97f 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -951,8 +951,8 @@ int phy_connect_direct(struct net_device *dev, struct phy_device *phydev, return rc; phy_prepare_link(phydev, handler); - if (phydev->irq > 0) - phy_start_interrupts(phydev); + if (phy_interrupt_is_valid(phydev)) + phy_request_interrupt(phydev); return 0; } diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index c1b6e05ba60c..2e21ce42e388 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -676,8 +676,8 @@ static int phylink_bringup_phy(struct phylink *pl, struct phy_device *phy) __ETHTOOL_LINK_MODE_MASK_NBITS, pl->supported, __ETHTOOL_LINK_MODE_MASK_NBITS, phy->advertising); - if (phy->irq > 0) - phy_start_interrupts(phy); + if (phy_interrupt_is_valid(phy)) + phy_request_interrupt(phy); return 0; } diff --git a/include/linux/phy.h b/include/linux/phy.h index 1f3873a2ff29..70f83d0d7469 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1047,7 +1047,7 @@ void phy_ethtool_ksettings_get(struct phy_device *phydev, int phy_ethtool_ksettings_set(struct phy_device *phydev, const struct ethtool_link_ksettings *cmd); int phy_mii_ioctl(struct phy_device *phydev, struct ifreq *ifr, int cmd); -int phy_start_interrupts(struct phy_device *phydev); +void phy_request_interrupt(struct phy_device *phydev); void phy_print_status(struct phy_device *phydev); int phy_set_max_speed(struct phy_device *phydev, u32 max_speed); void phy_remove_link_mode(struct phy_device *phydev, u32 link_mode); -- cgit v1.2.3 From 231baecdef7a906579925ccf1bd45aa734f32320 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 18 Jan 2019 22:48:00 -0800 Subject: crypto: clarify name of WEAK_KEY request flag CRYPTO_TFM_REQ_WEAK_KEY confuses newcomers to the crypto API because it sounds like it is requesting a weak key. Actually, it is requesting that weak keys be forbidden (for algorithms that have the notion of "weak keys"; currently only DES and XTS do). Also it is only one letter away from CRYPTO_TFM_RES_WEAK_KEY, with which it can be easily confused. (This in fact happened in the UX500 driver, though just in some debugging messages.) Therefore, make the intent clear by renaming it to CRYPTO_TFM_REQ_FORBID_WEAK_KEYS. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/s390/crypto/des_s390.c | 4 ++-- arch/sparc/crypto/des_glue.c | 4 ++-- crypto/des_generic.c | 4 ++-- crypto/testmgr.c | 14 +++++++------- crypto/testmgr.h | 4 ++-- drivers/crypto/atmel-tdes.c | 2 +- drivers/crypto/bcm/cipher.c | 4 ++-- drivers/crypto/ccp/ccp-crypto-des3.c | 2 +- drivers/crypto/ccree/cc_cipher.c | 3 ++- drivers/crypto/hifn_795x.c | 3 ++- drivers/crypto/inside-secure/safexcel_cipher.c | 2 +- drivers/crypto/ixp4xx_crypto.c | 4 ++-- drivers/crypto/marvell/cipher.c | 2 +- drivers/crypto/n2_core.c | 2 +- drivers/crypto/omap-des.c | 2 +- drivers/crypto/picoxcell_crypto.c | 3 ++- drivers/crypto/qce/ablkcipher.c | 4 ++-- drivers/crypto/rockchip/rk3288_crypto_ablkcipher.c | 2 +- drivers/crypto/sunxi-ss/sun4i-ss-cipher.c | 2 +- drivers/crypto/talitos.c | 2 +- drivers/crypto/ux500/cryp/cryp_core.c | 20 +++++++++++--------- fs/crypto/keyinfo.c | 4 ++-- fs/ecryptfs/crypto.c | 5 +++-- include/crypto/xts.h | 4 ++-- include/linux/crypto.h | 2 +- 25 files changed, 55 insertions(+), 49 deletions(-) (limited to 'include/linux') diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c index 5346b5a80bb6..0d15383d0ff1 100644 --- a/arch/s390/crypto/des_s390.c +++ b/arch/s390/crypto/des_s390.c @@ -38,7 +38,7 @@ static int des_setkey(struct crypto_tfm *tfm, const u8 *key, /* check for weak keys */ if (!des_ekey(tmp, key) && - (tfm->crt_flags & CRYPTO_TFM_REQ_WEAK_KEY)) { + (tfm->crt_flags & CRYPTO_TFM_REQ_FORBID_WEAK_KEYS)) { tfm->crt_flags |= CRYPTO_TFM_RES_WEAK_KEY; return -EINVAL; } @@ -228,7 +228,7 @@ static int des3_setkey(struct crypto_tfm *tfm, const u8 *key, if (!(crypto_memneq(key, &key[DES_KEY_SIZE], DES_KEY_SIZE) && crypto_memneq(&key[DES_KEY_SIZE], &key[DES_KEY_SIZE * 2], DES_KEY_SIZE)) && - (tfm->crt_flags & CRYPTO_TFM_REQ_WEAK_KEY)) { + (tfm->crt_flags & CRYPTO_TFM_REQ_FORBID_WEAK_KEYS)) { tfm->crt_flags |= CRYPTO_TFM_RES_WEAK_KEY; return -EINVAL; } diff --git a/arch/sparc/crypto/des_glue.c b/arch/sparc/crypto/des_glue.c index 56499ea39fd3..4884315daff4 100644 --- a/arch/sparc/crypto/des_glue.c +++ b/arch/sparc/crypto/des_glue.c @@ -53,7 +53,7 @@ static int des_set_key(struct crypto_tfm *tfm, const u8 *key, * weak key detection code. */ ret = des_ekey(tmp, key); - if (unlikely(ret == 0) && (*flags & CRYPTO_TFM_REQ_WEAK_KEY)) { + if (unlikely(ret == 0) && (*flags & CRYPTO_TFM_REQ_FORBID_WEAK_KEYS)) { *flags |= CRYPTO_TFM_RES_WEAK_KEY; return -EINVAL; } @@ -209,7 +209,7 @@ static int des3_ede_set_key(struct crypto_tfm *tfm, const u8 *key, if (unlikely(!((K[0] ^ K[2]) | (K[1] ^ K[3])) || !((K[2] ^ K[4]) | (K[3] ^ K[5]))) && - (*flags & CRYPTO_TFM_REQ_WEAK_KEY)) { + (*flags & CRYPTO_TFM_REQ_FORBID_WEAK_KEYS)) { *flags |= CRYPTO_TFM_RES_WEAK_KEY; return -EINVAL; } diff --git a/crypto/des_generic.c b/crypto/des_generic.c index a71720544d11..1e6621665dd9 100644 --- a/crypto/des_generic.c +++ b/crypto/des_generic.c @@ -789,7 +789,7 @@ static int des_setkey(struct crypto_tfm *tfm, const u8 *key, /* Expand to tmp */ ret = des_ekey(tmp, key); - if (unlikely(ret == 0) && (*flags & CRYPTO_TFM_REQ_WEAK_KEY)) { + if (unlikely(ret == 0) && (*flags & CRYPTO_TFM_REQ_FORBID_WEAK_KEYS)) { *flags |= CRYPTO_TFM_RES_WEAK_KEY; return -EINVAL; } @@ -866,7 +866,7 @@ int __des3_ede_setkey(u32 *expkey, u32 *flags, const u8 *key, if (unlikely(!((K[0] ^ K[2]) | (K[1] ^ K[3])) || !((K[2] ^ K[4]) | (K[3] ^ K[5]))) && - (*flags & CRYPTO_TFM_REQ_WEAK_KEY)) { + (*flags & CRYPTO_TFM_REQ_FORBID_WEAK_KEYS)) { *flags |= CRYPTO_TFM_RES_WEAK_KEY; return -EINVAL; } diff --git a/crypto/testmgr.c b/crypto/testmgr.c index e4f3f5f688e7..4ac3d22256c3 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -706,7 +706,8 @@ static int __test_aead(struct crypto_aead *tfm, int enc, crypto_aead_clear_flags(tfm, ~0); if (template[i].wk) - crypto_aead_set_flags(tfm, CRYPTO_TFM_REQ_WEAK_KEY); + crypto_aead_set_flags(tfm, + CRYPTO_TFM_REQ_FORBID_WEAK_KEYS); if (template[i].klen > MAX_KEYLEN) { pr_err("alg: aead%s: setkey failed on test %d for %s: key size %d > %d\n", @@ -820,7 +821,8 @@ static int __test_aead(struct crypto_aead *tfm, int enc, crypto_aead_clear_flags(tfm, ~0); if (template[i].wk) - crypto_aead_set_flags(tfm, CRYPTO_TFM_REQ_WEAK_KEY); + crypto_aead_set_flags(tfm, + CRYPTO_TFM_REQ_FORBID_WEAK_KEYS); if (template[i].klen > MAX_KEYLEN) { pr_err("alg: aead%s: setkey failed on test %d for %s: key size %d > %d\n", d, j, algo, template[i].klen, MAX_KEYLEN); @@ -1078,7 +1080,7 @@ static int test_cipher(struct crypto_cipher *tfm, int enc, crypto_cipher_clear_flags(tfm, ~0); if (template[i].wk) - crypto_cipher_set_flags(tfm, CRYPTO_TFM_REQ_WEAK_KEY); + crypto_cipher_set_flags(tfm, CRYPTO_TFM_REQ_FORBID_WEAK_KEYS); ret = crypto_cipher_setkey(tfm, template[i].key, template[i].klen); @@ -1194,8 +1196,7 @@ static int __test_skcipher(struct crypto_skcipher *tfm, int enc, crypto_skcipher_clear_flags(tfm, ~0); if (template[i].wk) - crypto_skcipher_set_flags(tfm, - CRYPTO_TFM_REQ_WEAK_KEY); + crypto_skcipher_set_flags(tfm, CRYPTO_TFM_REQ_FORBID_WEAK_KEYS); ret = crypto_skcipher_setkey(tfm, template[i].key, template[i].klen); @@ -1265,8 +1266,7 @@ static int __test_skcipher(struct crypto_skcipher *tfm, int enc, j++; crypto_skcipher_clear_flags(tfm, ~0); if (template[i].wk) - crypto_skcipher_set_flags(tfm, - CRYPTO_TFM_REQ_WEAK_KEY); + crypto_skcipher_set_flags(tfm, CRYPTO_TFM_REQ_FORBID_WEAK_KEYS); ret = crypto_skcipher_setkey(tfm, template[i].key, template[i].klen); diff --git a/crypto/testmgr.h b/crypto/testmgr.h index 95297240b0f1..d8f6035c7ff2 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -50,7 +50,7 @@ struct hash_testvec { * @ctext: Pointer to ciphertext * @len: Length of @ptext and @ctext in bytes * @fail: If set to one, the test need to fail - * @wk: Does the test need CRYPTO_TFM_REQ_WEAK_KEY + * @wk: Does the test need CRYPTO_TFM_REQ_FORBID_WEAK_KEYS? * ( e.g. test needs to fail due to a weak key ) * @np: numbers of SG to distribute data in (from 1 to MAX_TAP) * @tap: How to distribute data in @np SGs @@ -91,7 +91,7 @@ struct cipher_testvec { * @anp: Numbers of SG to distribute assoc data in * @fail: setkey() failure expected? * @novrfy: Decryption verification failure expected? - * @wk: Does the test need CRYPTO_TFM_REQ_WEAK_KEY? + * @wk: Does the test need CRYPTO_TFM_REQ_FORBID_WEAK_KEYS? * (e.g. setkey() needs to fail due to a weak key) * @klen: Length of @key in bytes * @plen: Length of @ptext in bytes diff --git a/drivers/crypto/atmel-tdes.c b/drivers/crypto/atmel-tdes.c index 438e1ffb2ec0..65bf1a299562 100644 --- a/drivers/crypto/atmel-tdes.c +++ b/drivers/crypto/atmel-tdes.c @@ -785,7 +785,7 @@ static int atmel_des_setkey(struct crypto_ablkcipher *tfm, const u8 *key, } err = des_ekey(tmp, key); - if (err == 0 && (ctfm->crt_flags & CRYPTO_TFM_REQ_WEAK_KEY)) { + if (err == 0 && (ctfm->crt_flags & CRYPTO_TFM_REQ_FORBID_WEAK_KEYS)) { ctfm->crt_flags |= CRYPTO_TFM_RES_WEAK_KEY; return -EINVAL; } diff --git a/drivers/crypto/bcm/cipher.c b/drivers/crypto/bcm/cipher.c index 2099d7bcfd44..28f592f7e1b7 100644 --- a/drivers/crypto/bcm/cipher.c +++ b/drivers/crypto/bcm/cipher.c @@ -1818,7 +1818,7 @@ static int des_setkey(struct crypto_ablkcipher *cipher, const u8 *key, if (keylen == DES_KEY_SIZE) { if (des_ekey(tmp, key) == 0) { if (crypto_ablkcipher_get_flags(cipher) & - CRYPTO_TFM_REQ_WEAK_KEY) { + CRYPTO_TFM_REQ_FORBID_WEAK_KEYS) { u32 flags = CRYPTO_TFM_RES_WEAK_KEY; crypto_ablkcipher_set_flags(cipher, flags); @@ -2872,7 +2872,7 @@ static int aead_authenc_setkey(struct crypto_aead *cipher, if (des_ekey(tmp, keys.enckey) == 0) { if (crypto_aead_get_flags(cipher) & - CRYPTO_TFM_REQ_WEAK_KEY) { + CRYPTO_TFM_REQ_FORBID_WEAK_KEYS) { crypto_aead_set_flags(cipher, flags); return -EINVAL; } diff --git a/drivers/crypto/ccp/ccp-crypto-des3.c b/drivers/crypto/ccp/ccp-crypto-des3.c index ae87b741f9d5..c2ff551d215b 100644 --- a/drivers/crypto/ccp/ccp-crypto-des3.c +++ b/drivers/crypto/ccp/ccp-crypto-des3.c @@ -57,7 +57,7 @@ static int ccp_des3_setkey(struct crypto_ablkcipher *tfm, const u8 *key, if (unlikely(!((K[0] ^ K[2]) | (K[1] ^ K[3])) || !((K[2] ^ K[4]) | (K[3] ^ K[5]))) && - (*flags & CRYPTO_TFM_REQ_WEAK_KEY)) { + (*flags & CRYPTO_TFM_REQ_FORBID_WEAK_KEYS)) { *flags |= CRYPTO_TFM_RES_WEAK_KEY; return -EINVAL; } diff --git a/drivers/crypto/ccree/cc_cipher.c b/drivers/crypto/ccree/cc_cipher.c index e202d7c7ea00..5e3361a363b5 100644 --- a/drivers/crypto/ccree/cc_cipher.c +++ b/drivers/crypto/ccree/cc_cipher.c @@ -352,7 +352,8 @@ static int cc_cipher_setkey(struct crypto_skcipher *sktfm, const u8 *key, dev_dbg(dev, "weak 3DES key"); return -EINVAL; } else if (!des_ekey(tmp, key) && - (crypto_tfm_get_flags(tfm) & CRYPTO_TFM_REQ_WEAK_KEY)) { + (crypto_tfm_get_flags(tfm) & + CRYPTO_TFM_REQ_FORBID_WEAK_KEYS)) { tfm->crt_flags |= CRYPTO_TFM_RES_WEAK_KEY; dev_dbg(dev, "weak DES key"); return -EINVAL; diff --git a/drivers/crypto/hifn_795x.c b/drivers/crypto/hifn_795x.c index a5a36fe7bf2c..dad212cabe63 100644 --- a/drivers/crypto/hifn_795x.c +++ b/drivers/crypto/hifn_795x.c @@ -1961,7 +1961,8 @@ static int hifn_setkey(struct crypto_ablkcipher *cipher, const u8 *key, u32 tmp[DES_EXPKEY_WORDS]; int ret = des_ekey(tmp, key); - if (unlikely(ret == 0) && (tfm->crt_flags & CRYPTO_TFM_REQ_WEAK_KEY)) { + if (unlikely(ret == 0) && + (tfm->crt_flags & CRYPTO_TFM_REQ_FORBID_WEAK_KEYS)) { tfm->crt_flags |= CRYPTO_TFM_RES_WEAK_KEY; return -EINVAL; } diff --git a/drivers/crypto/inside-secure/safexcel_cipher.c b/drivers/crypto/inside-secure/safexcel_cipher.c index d531c14020dc..7ef30a98cb24 100644 --- a/drivers/crypto/inside-secure/safexcel_cipher.c +++ b/drivers/crypto/inside-secure/safexcel_cipher.c @@ -940,7 +940,7 @@ static int safexcel_des_setkey(struct crypto_skcipher *ctfm, const u8 *key, } ret = des_ekey(tmp, key); - if (!ret && (tfm->crt_flags & CRYPTO_TFM_REQ_WEAK_KEY)) { + if (!ret && (tfm->crt_flags & CRYPTO_TFM_REQ_FORBID_WEAK_KEYS)) { tfm->crt_flags |= CRYPTO_TFM_RES_WEAK_KEY; return -EINVAL; } diff --git a/drivers/crypto/ixp4xx_crypto.c b/drivers/crypto/ixp4xx_crypto.c index 19fba998b86b..95c1af227bd5 100644 --- a/drivers/crypto/ixp4xx_crypto.c +++ b/drivers/crypto/ixp4xx_crypto.c @@ -847,7 +847,7 @@ static int ablk_setkey(struct crypto_ablkcipher *tfm, const u8 *key, goto out; if (*flags & CRYPTO_TFM_RES_WEAK_KEY) { - if (*flags & CRYPTO_TFM_REQ_WEAK_KEY) { + if (*flags & CRYPTO_TFM_REQ_FORBID_WEAK_KEYS) { ret = -EINVAL; } else { *flags &= ~CRYPTO_TFM_RES_WEAK_KEY; @@ -1125,7 +1125,7 @@ static int aead_setup(struct crypto_aead *tfm, unsigned int authsize) goto out; if (*flags & CRYPTO_TFM_RES_WEAK_KEY) { - if (*flags & CRYPTO_TFM_REQ_WEAK_KEY) { + if (*flags & CRYPTO_TFM_REQ_FORBID_WEAK_KEYS) { ret = -EINVAL; goto out; } else { diff --git a/drivers/crypto/marvell/cipher.c b/drivers/crypto/marvell/cipher.c index 0ae84ec9e21c..066830dcc53e 100644 --- a/drivers/crypto/marvell/cipher.c +++ b/drivers/crypto/marvell/cipher.c @@ -286,7 +286,7 @@ static int mv_cesa_des_setkey(struct crypto_skcipher *cipher, const u8 *key, } ret = des_ekey(tmp, key); - if (!ret && (tfm->crt_flags & CRYPTO_TFM_REQ_WEAK_KEY)) { + if (!ret && (tfm->crt_flags & CRYPTO_TFM_REQ_FORBID_WEAK_KEYS)) { tfm->crt_flags |= CRYPTO_TFM_RES_WEAK_KEY; return -EINVAL; } diff --git a/drivers/crypto/n2_core.c b/drivers/crypto/n2_core.c index 55f34cfc43ff..9450c41211b2 100644 --- a/drivers/crypto/n2_core.c +++ b/drivers/crypto/n2_core.c @@ -772,7 +772,7 @@ static int n2_des_setkey(struct crypto_ablkcipher *cipher, const u8 *key, } err = des_ekey(tmp, key); - if (err == 0 && (tfm->crt_flags & CRYPTO_TFM_REQ_WEAK_KEY)) { + if (err == 0 && (tfm->crt_flags & CRYPTO_TFM_REQ_FORBID_WEAK_KEYS)) { tfm->crt_flags |= CRYPTO_TFM_RES_WEAK_KEY; return -EINVAL; } diff --git a/drivers/crypto/omap-des.c b/drivers/crypto/omap-des.c index 6369019219d4..1ba2633e90d6 100644 --- a/drivers/crypto/omap-des.c +++ b/drivers/crypto/omap-des.c @@ -662,7 +662,7 @@ static int omap_des_setkey(struct crypto_ablkcipher *cipher, const u8 *key, pr_debug("enter, keylen: %d\n", keylen); /* Do we need to test against weak key? */ - if (tfm->crt_flags & CRYPTO_TFM_REQ_WEAK_KEY) { + if (tfm->crt_flags & CRYPTO_TFM_REQ_FORBID_WEAK_KEYS) { u32 tmp[DES_EXPKEY_WORDS]; int ret = des_ekey(tmp, key); diff --git a/drivers/crypto/picoxcell_crypto.c b/drivers/crypto/picoxcell_crypto.c index 17068b55fea5..1b3acdeffede 100644 --- a/drivers/crypto/picoxcell_crypto.c +++ b/drivers/crypto/picoxcell_crypto.c @@ -759,7 +759,8 @@ static int spacc_des_setkey(struct crypto_ablkcipher *cipher, const u8 *key, } if (unlikely(!des_ekey(tmp, key)) && - (crypto_ablkcipher_get_flags(cipher) & CRYPTO_TFM_REQ_WEAK_KEY)) { + (crypto_ablkcipher_get_flags(cipher) & + CRYPTO_TFM_REQ_FORBID_WEAK_KEYS)) { tfm->crt_flags |= CRYPTO_TFM_RES_WEAK_KEY; return -EINVAL; } diff --git a/drivers/crypto/qce/ablkcipher.c b/drivers/crypto/qce/ablkcipher.c index 25c13e26d012..154b6baa124e 100644 --- a/drivers/crypto/qce/ablkcipher.c +++ b/drivers/crypto/qce/ablkcipher.c @@ -180,8 +180,8 @@ static int qce_ablkcipher_setkey(struct crypto_ablkcipher *ablk, const u8 *key, u32 tmp[DES_EXPKEY_WORDS]; ret = des_ekey(tmp, key); - if (!ret && crypto_ablkcipher_get_flags(ablk) & - CRYPTO_TFM_REQ_WEAK_KEY) + if (!ret && (crypto_ablkcipher_get_flags(ablk) & + CRYPTO_TFM_REQ_FORBID_WEAK_KEYS)) goto weakkey; } diff --git a/drivers/crypto/rockchip/rk3288_crypto_ablkcipher.c b/drivers/crypto/rockchip/rk3288_crypto_ablkcipher.c index 639c15c5364b..87dd571466c1 100644 --- a/drivers/crypto/rockchip/rk3288_crypto_ablkcipher.c +++ b/drivers/crypto/rockchip/rk3288_crypto_ablkcipher.c @@ -60,7 +60,7 @@ static int rk_tdes_setkey(struct crypto_ablkcipher *cipher, if (keylen == DES_KEY_SIZE) { if (!des_ekey(tmp, key) && - (tfm->crt_flags & CRYPTO_TFM_REQ_WEAK_KEY)) { + (tfm->crt_flags & CRYPTO_TFM_REQ_FORBID_WEAK_KEYS)) { tfm->crt_flags |= CRYPTO_TFM_RES_WEAK_KEY; return -EINVAL; } diff --git a/drivers/crypto/sunxi-ss/sun4i-ss-cipher.c b/drivers/crypto/sunxi-ss/sun4i-ss-cipher.c index 5cf64746731a..54fd714d53ca 100644 --- a/drivers/crypto/sunxi-ss/sun4i-ss-cipher.c +++ b/drivers/crypto/sunxi-ss/sun4i-ss-cipher.c @@ -517,7 +517,7 @@ int sun4i_ss_des_setkey(struct crypto_skcipher *tfm, const u8 *key, flags = crypto_skcipher_get_flags(tfm); ret = des_ekey(tmp, key); - if (unlikely(!ret) && (flags & CRYPTO_TFM_REQ_WEAK_KEY)) { + if (unlikely(!ret) && (flags & CRYPTO_TFM_REQ_FORBID_WEAK_KEYS)) { crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_WEAK_KEY); dev_dbg(ss->dev, "Weak key %u\n", keylen); return -EINVAL; diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index f8e2c5c3f4eb..de78b54bcfb1 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -1535,7 +1535,7 @@ static int ablkcipher_setkey(struct crypto_ablkcipher *cipher, } if (unlikely(crypto_ablkcipher_get_flags(cipher) & - CRYPTO_TFM_REQ_WEAK_KEY) && + CRYPTO_TFM_REQ_FORBID_WEAK_KEYS) && !des_ekey(tmp, key)) { crypto_ablkcipher_set_flags(cipher, CRYPTO_TFM_RES_WEAK_KEY); return -EINVAL; diff --git a/drivers/crypto/ux500/cryp/cryp_core.c b/drivers/crypto/ux500/cryp/cryp_core.c index db94f89d8d11..3235611928f2 100644 --- a/drivers/crypto/ux500/cryp/cryp_core.c +++ b/drivers/crypto/ux500/cryp/cryp_core.c @@ -1000,10 +1000,11 @@ static int des_ablkcipher_setkey(struct crypto_ablkcipher *cipher, } ret = des_ekey(tmp, key); - if (unlikely(ret == 0) && (*flags & CRYPTO_TFM_REQ_WEAK_KEY)) { + if (unlikely(ret == 0) && + (*flags & CRYPTO_TFM_REQ_FORBID_WEAK_KEYS)) { *flags |= CRYPTO_TFM_RES_WEAK_KEY; - pr_debug(DEV_DBG_NAME " [%s]: CRYPTO_TFM_REQ_WEAK_KEY", - __func__); + pr_debug(DEV_DBG_NAME " [%s]: CRYPTO_TFM_RES_WEAK_KEY", + __func__); return -EINVAL; } @@ -1034,18 +1035,19 @@ static int des3_ablkcipher_setkey(struct crypto_ablkcipher *cipher, /* Checking key interdependency for weak key detection. */ if (unlikely(!((K[0] ^ K[2]) | (K[1] ^ K[3])) || !((K[2] ^ K[4]) | (K[3] ^ K[5]))) && - (*flags & CRYPTO_TFM_REQ_WEAK_KEY)) { + (*flags & CRYPTO_TFM_REQ_FORBID_WEAK_KEYS)) { *flags |= CRYPTO_TFM_RES_WEAK_KEY; - pr_debug(DEV_DBG_NAME " [%s]: CRYPTO_TFM_REQ_WEAK_KEY", - __func__); + pr_debug(DEV_DBG_NAME " [%s]: CRYPTO_TFM_RES_WEAK_KEY", + __func__); return -EINVAL; } for (i = 0; i < 3; i++) { ret = des_ekey(tmp, key + i*DES_KEY_SIZE); - if (unlikely(ret == 0) && (*flags & CRYPTO_TFM_REQ_WEAK_KEY)) { + if (unlikely(ret == 0) && + (*flags & CRYPTO_TFM_REQ_FORBID_WEAK_KEYS)) { *flags |= CRYPTO_TFM_RES_WEAK_KEY; - pr_debug(DEV_DBG_NAME " [%s]: " - "CRYPTO_TFM_REQ_WEAK_KEY", __func__); + pr_debug(DEV_DBG_NAME " [%s]: CRYPTO_TFM_RES_WEAK_KEY", + __func__); return -EINVAL; } } diff --git a/fs/crypto/keyinfo.c b/fs/crypto/keyinfo.c index 1e11a683f63d..322ce9686bdb 100644 --- a/fs/crypto/keyinfo.c +++ b/fs/crypto/keyinfo.c @@ -47,7 +47,7 @@ static int derive_key_aes(const u8 *master_key, tfm = NULL; goto out; } - crypto_skcipher_set_flags(tfm, CRYPTO_TFM_REQ_WEAK_KEY); + crypto_skcipher_set_flags(tfm, CRYPTO_TFM_REQ_FORBID_WEAK_KEYS); req = skcipher_request_alloc(tfm, GFP_NOFS); if (!req) { res = -ENOMEM; @@ -257,7 +257,7 @@ allocate_skcipher_for_mode(struct fscrypt_mode *mode, const u8 *raw_key, mode->friendly_name, crypto_skcipher_alg(tfm)->base.cra_driver_name); } - crypto_skcipher_set_flags(tfm, CRYPTO_TFM_REQ_WEAK_KEY); + crypto_skcipher_set_flags(tfm, CRYPTO_TFM_REQ_FORBID_WEAK_KEYS); err = crypto_skcipher_setkey(tfm, raw_key, mode->keysize); if (err) goto err_free_tfm; diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index 4dd842f72846..f664da55234e 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -610,7 +610,8 @@ int ecryptfs_init_crypt_ctx(struct ecryptfs_crypt_stat *crypt_stat) full_alg_name); goto out_free; } - crypto_skcipher_set_flags(crypt_stat->tfm, CRYPTO_TFM_REQ_WEAK_KEY); + crypto_skcipher_set_flags(crypt_stat->tfm, + CRYPTO_TFM_REQ_FORBID_WEAK_KEYS); rc = 0; out_free: kfree(full_alg_name); @@ -1590,7 +1591,7 @@ ecryptfs_process_key_cipher(struct crypto_skcipher **key_tfm, "[%s]; rc = [%d]\n", full_alg_name, rc); goto out; } - crypto_skcipher_set_flags(*key_tfm, CRYPTO_TFM_REQ_WEAK_KEY); + crypto_skcipher_set_flags(*key_tfm, CRYPTO_TFM_REQ_FORBID_WEAK_KEYS); if (*key_size == 0) *key_size = crypto_skcipher_default_keysize(*key_tfm); get_random_bytes(dummy_key, *key_size); diff --git a/include/crypto/xts.h b/include/crypto/xts.h index 34d94c95445a..75fd96ff976b 100644 --- a/include/crypto/xts.h +++ b/include/crypto/xts.h @@ -47,8 +47,8 @@ static inline int xts_verify_key(struct crypto_skcipher *tfm, } /* ensure that the AES and tweak key are not identical */ - if ((fips_enabled || crypto_skcipher_get_flags(tfm) & - CRYPTO_TFM_REQ_WEAK_KEY) && + if ((fips_enabled || (crypto_skcipher_get_flags(tfm) & + CRYPTO_TFM_REQ_FORBID_WEAK_KEYS)) && !crypto_memneq(key, key + (keylen / 2), keylen / 2)) { crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_WEAK_KEY); return -EINVAL; diff --git a/include/linux/crypto.h b/include/linux/crypto.h index c3c98a62e503..f2565a103158 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -118,7 +118,7 @@ #define CRYPTO_TFM_REQ_MASK 0x000fff00 #define CRYPTO_TFM_RES_MASK 0xfff00000 -#define CRYPTO_TFM_REQ_WEAK_KEY 0x00000100 +#define CRYPTO_TFM_REQ_FORBID_WEAK_KEYS 0x00000100 #define CRYPTO_TFM_REQ_MAY_SLEEP 0x00000200 #define CRYPTO_TFM_REQ_MAY_BACKLOG 0x00000400 #define CRYPTO_TFM_RES_WEAK_KEY 0x00100000 -- cgit v1.2.3 From 275f22148e8720e84b180d9e0cdf8abfd69bac5b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 31 Dec 2018 22:22:40 +0100 Subject: ipc: rename old-style shmctl/semctl/msgctl syscalls The behavior of these system calls is slightly different between architectures, as determined by the CONFIG_ARCH_WANT_IPC_PARSE_VERSION symbol. Most architectures that implement the split IPC syscalls don't set that symbol and only get the modern version, but alpha, arm, microblaze, mips-n32, mips-n64 and xtensa expect the caller to pass the IPC_64 flag. For the architectures that so far only implement sys_ipc(), i.e. m68k, mips-o32, powerpc, s390, sh, sparc, and x86-32, we want the new behavior when adding the split syscalls, so we need to distinguish between the two groups of architectures. The method I picked for this distinction is to have a separate system call entry point: sys_old_*ctl() now uses ipc_parse_version, while sys_*ctl() does not. The system call tables of the five architectures are changed accordingly. As an additional benefit, we no longer need the configuration specific definition for ipc_parse_version(), it always does the same thing now, but simply won't get called on architectures with the modern interface. A small downside is that on architectures that do set ARCH_WANT_IPC_PARSE_VERSION, we now have an extra set of entry points that are never called. They only add a few bytes of bloat, so it seems better to keep them compared to adding yet another Kconfig symbol. I considered adding new syscall numbers for the IPC_64 variants for consistency, but decided against that for now. Signed-off-by: Arnd Bergmann --- arch/alpha/kernel/syscalls/syscall.tbl | 6 ++--- arch/arm/tools/syscall.tbl | 6 ++--- arch/arm64/include/asm/unistd32.h | 6 ++--- arch/microblaze/kernel/syscalls/syscall.tbl | 6 ++--- arch/mips/kernel/syscalls/syscall_n32.tbl | 6 ++--- arch/mips/kernel/syscalls/syscall_n64.tbl | 6 ++--- arch/xtensa/kernel/syscalls/syscall.tbl | 6 ++--- include/linux/syscalls.h | 3 +++ ipc/msg.c | 39 +++++++++++++++++++++++----- ipc/sem.c | 39 +++++++++++++++++++++++----- ipc/shm.c | 40 ++++++++++++++++++++++++----- ipc/syscall.c | 12 ++++----- ipc/util.h | 21 +++++---------- kernel/sys_ni.c | 3 +++ 14 files changed, 137 insertions(+), 62 deletions(-) (limited to 'include/linux') diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl index f920b65e8c49..b0e247287908 100644 --- a/arch/alpha/kernel/syscalls/syscall.tbl +++ b/arch/alpha/kernel/syscalls/syscall.tbl @@ -174,17 +174,17 @@ 187 common osf_alt_sigpending sys_ni_syscall 188 common osf_alt_setsid sys_ni_syscall 199 common osf_swapon sys_swapon -200 common msgctl sys_msgctl +200 common msgctl sys_old_msgctl 201 common msgget sys_msgget 202 common msgrcv sys_msgrcv 203 common msgsnd sys_msgsnd -204 common semctl sys_semctl +204 common semctl sys_old_semctl 205 common semget sys_semget 206 common semop sys_semop 207 common osf_utsname sys_osf_utsname 208 common lchown sys_lchown 209 common shmat sys_shmat -210 common shmctl sys_shmctl +210 common shmctl sys_old_shmctl 211 common shmdt sys_shmdt 212 common shmget sys_shmget 213 common osf_mvalid sys_ni_syscall diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl index 20ed7e026723..b54b7f2bc24a 100644 --- a/arch/arm/tools/syscall.tbl +++ b/arch/arm/tools/syscall.tbl @@ -314,15 +314,15 @@ 297 common recvmsg sys_recvmsg 298 common semop sys_semop sys_oabi_semop 299 common semget sys_semget -300 common semctl sys_semctl +300 common semctl sys_old_semctl 301 common msgsnd sys_msgsnd 302 common msgrcv sys_msgrcv 303 common msgget sys_msgget -304 common msgctl sys_msgctl +304 common msgctl sys_old_msgctl 305 common shmat sys_shmat 306 common shmdt sys_shmdt 307 common shmget sys_shmget -308 common shmctl sys_shmctl +308 common shmctl sys_old_shmctl 309 common add_key sys_add_key 310 common request_key sys_request_key 311 common keyctl sys_keyctl diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index 8ca1d4c304f4..d10cce69a4b0 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -622,7 +622,7 @@ __SYSCALL(__NR_semop, sys_semop) #define __NR_semget 299 __SYSCALL(__NR_semget, sys_semget) #define __NR_semctl 300 -__SYSCALL(__NR_semctl, compat_sys_semctl) +__SYSCALL(__NR_semctl, compat_sys_old_semctl) #define __NR_msgsnd 301 __SYSCALL(__NR_msgsnd, compat_sys_msgsnd) #define __NR_msgrcv 302 @@ -630,7 +630,7 @@ __SYSCALL(__NR_msgrcv, compat_sys_msgrcv) #define __NR_msgget 303 __SYSCALL(__NR_msgget, sys_msgget) #define __NR_msgctl 304 -__SYSCALL(__NR_msgctl, compat_sys_msgctl) +__SYSCALL(__NR_msgctl, compat_sys_old_msgctl) #define __NR_shmat 305 __SYSCALL(__NR_shmat, compat_sys_shmat) #define __NR_shmdt 306 @@ -638,7 +638,7 @@ __SYSCALL(__NR_shmdt, sys_shmdt) #define __NR_shmget 307 __SYSCALL(__NR_shmget, sys_shmget) #define __NR_shmctl 308 -__SYSCALL(__NR_shmctl, compat_sys_shmctl) +__SYSCALL(__NR_shmctl, compat_sys_old_shmctl) #define __NR_add_key 309 __SYSCALL(__NR_add_key, sys_add_key) #define __NR_request_key 310 diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl index a24d09e937dd..7cc0f9554da3 100644 --- a/arch/microblaze/kernel/syscalls/syscall.tbl +++ b/arch/microblaze/kernel/syscalls/syscall.tbl @@ -335,15 +335,15 @@ 325 common semtimedop sys_semtimedop 326 common timerfd_settime sys_timerfd_settime 327 common timerfd_gettime sys_timerfd_gettime -328 common semctl sys_semctl +328 common semctl sys_old_semctl 329 common semget sys_semget 330 common semop sys_semop -331 common msgctl sys_msgctl +331 common msgctl sys_old_msgctl 332 common msgget sys_msgget 333 common msgrcv sys_msgrcv 334 common msgsnd sys_msgsnd 335 common shmat sys_shmat -336 common shmctl sys_shmctl +336 common shmctl sys_old_shmctl 337 common shmdt sys_shmdt 338 common shmget sys_shmget 339 common signalfd4 sys_signalfd4 diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl index 53d5862649ae..cc134b1211aa 100644 --- a/arch/mips/kernel/syscalls/syscall_n32.tbl +++ b/arch/mips/kernel/syscalls/syscall_n32.tbl @@ -37,7 +37,7 @@ 27 n32 madvise sys_madvise 28 n32 shmget sys_shmget 29 n32 shmat sys_shmat -30 n32 shmctl compat_sys_shmctl +30 n32 shmctl compat_sys_old_shmctl 31 n32 dup sys_dup 32 n32 dup2 sys_dup2 33 n32 pause sys_pause @@ -71,12 +71,12 @@ 61 n32 uname sys_newuname 62 n32 semget sys_semget 63 n32 semop sys_semop -64 n32 semctl compat_sys_semctl +64 n32 semctl compat_sys_old_semctl 65 n32 shmdt sys_shmdt 66 n32 msgget sys_msgget 67 n32 msgsnd compat_sys_msgsnd 68 n32 msgrcv compat_sys_msgrcv -69 n32 msgctl compat_sys_msgctl +69 n32 msgctl compat_sys_old_msgctl 70 n32 fcntl compat_sys_fcntl 71 n32 flock sys_flock 72 n32 fsync sys_fsync diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl index a8286ccbb66c..af0da757a7b2 100644 --- a/arch/mips/kernel/syscalls/syscall_n64.tbl +++ b/arch/mips/kernel/syscalls/syscall_n64.tbl @@ -37,7 +37,7 @@ 27 n64 madvise sys_madvise 28 n64 shmget sys_shmget 29 n64 shmat sys_shmat -30 n64 shmctl sys_shmctl +30 n64 shmctl sys_old_shmctl 31 n64 dup sys_dup 32 n64 dup2 sys_dup2 33 n64 pause sys_pause @@ -71,12 +71,12 @@ 61 n64 uname sys_newuname 62 n64 semget sys_semget 63 n64 semop sys_semop -64 n64 semctl sys_semctl +64 n64 semctl sys_old_semctl 65 n64 shmdt sys_shmdt 66 n64 msgget sys_msgget 67 n64 msgsnd sys_msgsnd 68 n64 msgrcv sys_msgrcv -69 n64 msgctl sys_msgctl +69 n64 msgctl sys_old_msgctl 70 n64 fcntl sys_fcntl 71 n64 flock sys_flock 72 n64 fsync sys_fsync diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl index 69cf91b03b26..f8befa11b0c4 100644 --- a/arch/xtensa/kernel/syscalls/syscall.tbl +++ b/arch/xtensa/kernel/syscalls/syscall.tbl @@ -103,7 +103,7 @@ 91 common madvise sys_madvise 92 common shmget sys_shmget 93 common shmat xtensa_shmat -94 common shmctl sys_shmctl +94 common shmctl sys_old_shmctl 95 common shmdt sys_shmdt # Socket Operations 96 common socket sys_socket @@ -177,12 +177,12 @@ 161 common semtimedop sys_semtimedop 162 common semget sys_semget 163 common semop sys_semop -164 common semctl sys_semctl +164 common semctl sys_old_semctl 165 common available165 sys_ni_syscall 166 common msgget sys_msgget 167 common msgsnd sys_msgsnd 168 common msgrcv sys_msgrcv -169 common msgctl sys_msgctl +169 common msgctl sys_old_msgctl 170 common available170 sys_ni_syscall # File System 171 common umount2 sys_umount diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index fb63045a0fb6..938d8908b9e0 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -717,6 +717,7 @@ asmlinkage long sys_mq_getsetattr(mqd_t mqdes, const struct mq_attr __user *mqst /* ipc/msg.c */ asmlinkage long sys_msgget(key_t key, int msgflg); +asmlinkage long sys_old_msgctl(int msqid, int cmd, struct msqid_ds __user *buf); asmlinkage long sys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf); asmlinkage long sys_msgrcv(int msqid, struct msgbuf __user *msgp, size_t msgsz, long msgtyp, int msgflg); @@ -726,6 +727,7 @@ asmlinkage long sys_msgsnd(int msqid, struct msgbuf __user *msgp, /* ipc/sem.c */ asmlinkage long sys_semget(key_t key, int nsems, int semflg); asmlinkage long sys_semctl(int semid, int semnum, int cmd, unsigned long arg); +asmlinkage long sys_old_semctl(int semid, int semnum, int cmd, unsigned long arg); asmlinkage long sys_semtimedop(int semid, struct sembuf __user *sops, unsigned nsops, const struct __kernel_timespec __user *timeout); @@ -734,6 +736,7 @@ asmlinkage long sys_semop(int semid, struct sembuf __user *sops, /* ipc/shm.c */ asmlinkage long sys_shmget(key_t key, size_t size, int flag); +asmlinkage long sys_old_shmctl(int shmid, int cmd, struct shmid_ds __user *buf); asmlinkage long sys_shmctl(int shmid, int cmd, struct shmid_ds __user *buf); asmlinkage long sys_shmat(int shmid, char __user *shmaddr, int shmflg); asmlinkage long sys_shmdt(char __user *shmaddr); diff --git a/ipc/msg.c b/ipc/msg.c index 0833c6405915..8dec945fa030 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -567,9 +567,8 @@ out_unlock: return err; } -long ksys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf) +static long ksys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf, int version) { - int version; struct ipc_namespace *ns; struct msqid64_ds msqid64; int err; @@ -577,7 +576,6 @@ long ksys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf) if (msqid < 0 || cmd < 0) return -EINVAL; - version = ipc_parse_version(&cmd); ns = current->nsproxy->ipc_ns; switch (cmd) { @@ -613,9 +611,23 @@ long ksys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf) SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, struct msqid_ds __user *, buf) { - return ksys_msgctl(msqid, cmd, buf); + return ksys_msgctl(msqid, cmd, buf, IPC_64); } +#ifdef CONFIG_ARCH_WANT_IPC_PARSE_VERSION +long ksys_old_msgctl(int msqid, int cmd, struct msqid_ds __user *buf) +{ + int version = ipc_parse_version(&cmd); + + return ksys_msgctl(msqid, cmd, buf, version); +} + +SYSCALL_DEFINE3(old_msgctl, int, msqid, int, cmd, struct msqid_ds __user *, buf) +{ + return ksys_old_msgctl(msqid, cmd, buf); +} +#endif + #ifdef CONFIG_COMPAT struct compat_msqid_ds { @@ -689,12 +701,11 @@ static int copy_compat_msqid_to_user(void __user *buf, struct msqid64_ds *in, } } -long compat_ksys_msgctl(int msqid, int cmd, void __user *uptr) +static long compat_ksys_msgctl(int msqid, int cmd, void __user *uptr, int version) { struct ipc_namespace *ns; int err; struct msqid64_ds msqid64; - int version = compat_ipc_parse_version(&cmd); ns = current->nsproxy->ipc_ns; @@ -734,8 +745,22 @@ long compat_ksys_msgctl(int msqid, int cmd, void __user *uptr) COMPAT_SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, void __user *, uptr) { - return compat_ksys_msgctl(msqid, cmd, uptr); + return compat_ksys_msgctl(msqid, cmd, uptr, IPC_64); } + +#ifdef CONFIG_ARCH_WANT_COMPAT_IPC_PARSE_VERSION +long compat_ksys_old_msgctl(int msqid, int cmd, void __user *uptr) +{ + int version = compat_ipc_parse_version(&cmd); + + return compat_ksys_msgctl(msqid, cmd, uptr, version); +} + +COMPAT_SYSCALL_DEFINE3(old_msgctl, int, msqid, int, cmd, void __user *, uptr) +{ + return compat_ksys_old_msgctl(msqid, cmd, uptr); +} +#endif #endif static int testmsg(struct msg_msg *msg, long type, int mode) diff --git a/ipc/sem.c b/ipc/sem.c index 745dc6187e84..d1efff3a81bb 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -1634,9 +1634,8 @@ out_up: return err; } -long ksys_semctl(int semid, int semnum, int cmd, unsigned long arg) +static long ksys_semctl(int semid, int semnum, int cmd, unsigned long arg, int version) { - int version; struct ipc_namespace *ns; void __user *p = (void __user *)arg; struct semid64_ds semid64; @@ -1645,7 +1644,6 @@ long ksys_semctl(int semid, int semnum, int cmd, unsigned long arg) if (semid < 0) return -EINVAL; - version = ipc_parse_version(&cmd); ns = current->nsproxy->ipc_ns; switch (cmd) { @@ -1691,9 +1689,23 @@ long ksys_semctl(int semid, int semnum, int cmd, unsigned long arg) SYSCALL_DEFINE4(semctl, int, semid, int, semnum, int, cmd, unsigned long, arg) { - return ksys_semctl(semid, semnum, cmd, arg); + return ksys_semctl(semid, semnum, cmd, arg, IPC_64); } +#ifdef CONFIG_ARCH_WANT_IPC_PARSE_VERSION +long ksys_old_semctl(int semid, int semnum, int cmd, unsigned long arg) +{ + int version = ipc_parse_version(&cmd); + + return ksys_semctl(semid, semnum, cmd, arg, version); +} + +SYSCALL_DEFINE4(old_semctl, int, semid, int, semnum, int, cmd, unsigned long, arg) +{ + return ksys_old_semctl(semid, semnum, cmd, arg); +} +#endif + #ifdef CONFIG_COMPAT struct compat_semid_ds { @@ -1744,12 +1756,11 @@ static int copy_compat_semid_to_user(void __user *buf, struct semid64_ds *in, } } -long compat_ksys_semctl(int semid, int semnum, int cmd, int arg) +static long compat_ksys_semctl(int semid, int semnum, int cmd, int arg, int version) { void __user *p = compat_ptr(arg); struct ipc_namespace *ns; struct semid64_ds semid64; - int version = compat_ipc_parse_version(&cmd); int err; ns = current->nsproxy->ipc_ns; @@ -1792,8 +1803,22 @@ long compat_ksys_semctl(int semid, int semnum, int cmd, int arg) COMPAT_SYSCALL_DEFINE4(semctl, int, semid, int, semnum, int, cmd, int, arg) { - return compat_ksys_semctl(semid, semnum, cmd, arg); + return compat_ksys_semctl(semid, semnum, cmd, arg, IPC_64); } + +#ifdef CONFIG_ARCH_WANT_COMPAT_IPC_PARSE_VERSION +long compat_ksys_old_semctl(int semid, int semnum, int cmd, int arg) +{ + int version = compat_ipc_parse_version(&cmd); + + return compat_ksys_semctl(semid, semnum, cmd, arg, version); +} + +COMPAT_SYSCALL_DEFINE4(old_semctl, int, semid, int, semnum, int, cmd, int, arg) +{ + return compat_ksys_old_semctl(semid, semnum, cmd, arg); +} +#endif #endif /* If the task doesn't already have a undo_list, then allocate one diff --git a/ipc/shm.c b/ipc/shm.c index 0842411cb0e9..ce1ca9f7c6e9 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -1137,16 +1137,15 @@ out_unlock1: return err; } -long ksys_shmctl(int shmid, int cmd, struct shmid_ds __user *buf) +static long ksys_shmctl(int shmid, int cmd, struct shmid_ds __user *buf, int version) { - int err, version; + int err; struct ipc_namespace *ns; struct shmid64_ds sem64; if (cmd < 0 || shmid < 0) return -EINVAL; - version = ipc_parse_version(&cmd); ns = current->nsproxy->ipc_ns; switch (cmd) { @@ -1194,8 +1193,22 @@ long ksys_shmctl(int shmid, int cmd, struct shmid_ds __user *buf) SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf) { - return ksys_shmctl(shmid, cmd, buf); + return ksys_shmctl(shmid, cmd, buf, IPC_64); +} + +#ifdef CONFIG_ARCH_WANT_IPC_PARSE_VERSION +long ksys_old_shmctl(int shmid, int cmd, struct shmid_ds __user *buf) +{ + int version = ipc_parse_version(&cmd); + + return ksys_shmctl(shmid, cmd, buf, version); +} + +SYSCALL_DEFINE3(old_shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf) +{ + return ksys_old_shmctl(shmid, cmd, buf); } +#endif #ifdef CONFIG_COMPAT @@ -1319,11 +1332,10 @@ static int copy_compat_shmid_from_user(struct shmid64_ds *out, void __user *buf, } } -long compat_ksys_shmctl(int shmid, int cmd, void __user *uptr) +long compat_ksys_shmctl(int shmid, int cmd, void __user *uptr, int version) { struct ipc_namespace *ns; struct shmid64_ds sem64; - int version = compat_ipc_parse_version(&cmd); int err; ns = current->nsproxy->ipc_ns; @@ -1378,8 +1390,22 @@ long compat_ksys_shmctl(int shmid, int cmd, void __user *uptr) COMPAT_SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, void __user *, uptr) { - return compat_ksys_shmctl(shmid, cmd, uptr); + return compat_ksys_shmctl(shmid, cmd, uptr, IPC_64); } + +#ifdef CONFIG_ARCH_WANT_COMPAT_IPC_PARSE_VERSION +long compat_ksys_old_shmctl(int shmid, int cmd, void __user *uptr) +{ + int version = compat_ipc_parse_version(&cmd); + + return compat_ksys_shmctl(shmid, cmd, uptr, version); +} + +COMPAT_SYSCALL_DEFINE3(old_shmctl, int, shmid, int, cmd, void __user *, uptr) +{ + return compat_ksys_old_shmctl(shmid, cmd, uptr); +} +#endif #endif /* diff --git a/ipc/syscall.c b/ipc/syscall.c index 3cf8ad703a4d..581bdff4e7c5 100644 --- a/ipc/syscall.c +++ b/ipc/syscall.c @@ -47,7 +47,7 @@ int ksys_ipc(unsigned int call, int first, unsigned long second, return -EINVAL; if (get_user(arg, (unsigned long __user *) ptr)) return -EFAULT; - return ksys_semctl(first, second, third, arg); + return ksys_old_semctl(first, second, third, arg); } case MSGSND: @@ -75,7 +75,7 @@ int ksys_ipc(unsigned int call, int first, unsigned long second, case MSGGET: return ksys_msgget((key_t) first, second); case MSGCTL: - return ksys_msgctl(first, second, + return ksys_old_msgctl(first, second, (struct msqid_ds __user *)ptr); case SHMAT: @@ -100,7 +100,7 @@ int ksys_ipc(unsigned int call, int first, unsigned long second, case SHMGET: return ksys_shmget(first, second, third); case SHMCTL: - return ksys_shmctl(first, second, + return ksys_old_shmctl(first, second, (struct shmid_ds __user *) ptr); default: return -ENOSYS; @@ -152,7 +152,7 @@ int compat_ksys_ipc(u32 call, int first, int second, return -EINVAL; if (get_user(pad, (u32 __user *) compat_ptr(ptr))) return -EFAULT; - return compat_ksys_semctl(first, second, third, pad); + return compat_ksys_old_semctl(first, second, third, pad); case MSGSND: return compat_ksys_msgsnd(first, ptr, second, third); @@ -177,7 +177,7 @@ int compat_ksys_ipc(u32 call, int first, int second, case MSGGET: return ksys_msgget(first, second); case MSGCTL: - return compat_ksys_msgctl(first, second, compat_ptr(ptr)); + return compat_ksys_old_msgctl(first, second, compat_ptr(ptr)); case SHMAT: { int err; @@ -196,7 +196,7 @@ int compat_ksys_ipc(u32 call, int first, int second, case SHMGET: return ksys_shmget(first, (unsigned int)second, third); case SHMCTL: - return compat_ksys_shmctl(first, second, compat_ptr(ptr)); + return compat_ksys_old_shmctl(first, second, compat_ptr(ptr)); } return -ENOSYS; diff --git a/ipc/util.h b/ipc/util.h index d768fdbed515..e272be622ae7 100644 --- a/ipc/util.h +++ b/ipc/util.h @@ -160,10 +160,7 @@ static inline void ipc_update_pid(struct pid **pos, struct pid *pid) } } -#ifndef CONFIG_ARCH_WANT_IPC_PARSE_VERSION -/* On IA-64, we always use the "64-bit version" of the IPC structures. */ -# define ipc_parse_version(cmd) IPC_64 -#else +#ifdef CONFIG_ARCH_WANT_IPC_PARSE_VERSION int ipc_parse_version(int *cmd); #endif @@ -246,13 +243,9 @@ int get_compat_ipc64_perm(struct ipc64_perm *, static inline int compat_ipc_parse_version(int *cmd) { -#ifdef CONFIG_ARCH_WANT_COMPAT_IPC_PARSE_VERSION int version = *cmd & IPC_64; *cmd &= ~IPC_64; return version; -#else - return IPC_64; -#endif } #endif @@ -261,29 +254,29 @@ long ksys_semtimedop(int semid, struct sembuf __user *tsops, unsigned int nsops, const struct __kernel_timespec __user *timeout); long ksys_semget(key_t key, int nsems, int semflg); -long ksys_semctl(int semid, int semnum, int cmd, unsigned long arg); +long ksys_old_semctl(int semid, int semnum, int cmd, unsigned long arg); long ksys_msgget(key_t key, int msgflg); -long ksys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf); +long ksys_old_msgctl(int msqid, int cmd, struct msqid_ds __user *buf); long ksys_msgrcv(int msqid, struct msgbuf __user *msgp, size_t msgsz, long msgtyp, int msgflg); long ksys_msgsnd(int msqid, struct msgbuf __user *msgp, size_t msgsz, int msgflg); long ksys_shmget(key_t key, size_t size, int shmflg); long ksys_shmdt(char __user *shmaddr); -long ksys_shmctl(int shmid, int cmd, struct shmid_ds __user *buf); +long ksys_old_shmctl(int shmid, int cmd, struct shmid_ds __user *buf); /* for CONFIG_ARCH_WANT_OLD_COMPAT_IPC */ long compat_ksys_semtimedop(int semid, struct sembuf __user *tsems, unsigned int nsops, const struct old_timespec32 __user *timeout); #ifdef CONFIG_COMPAT -long compat_ksys_semctl(int semid, int semnum, int cmd, int arg); -long compat_ksys_msgctl(int msqid, int cmd, void __user *uptr); +long compat_ksys_old_semctl(int semid, int semnum, int cmd, int arg); +long compat_ksys_old_msgctl(int msqid, int cmd, void __user *uptr); long compat_ksys_msgrcv(int msqid, compat_uptr_t msgp, compat_ssize_t msgsz, compat_long_t msgtyp, int msgflg); long compat_ksys_msgsnd(int msqid, compat_uptr_t msgp, compat_ssize_t msgsz, int msgflg); -long compat_ksys_shmctl(int shmid, int cmd, void __user *uptr); +long compat_ksys_old_shmctl(int shmid, int cmd, void __user *uptr); #endif /* CONFIG_COMPAT */ #endif diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index bc934f31ab10..ce04431a40d1 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -197,6 +197,7 @@ COND_SYSCALL_COMPAT(mq_getsetattr); /* ipc/msg.c */ COND_SYSCALL(msgget); +COND_SYSCALL(old_msgctl); COND_SYSCALL(msgctl); COND_SYSCALL_COMPAT(msgctl); COND_SYSCALL(msgrcv); @@ -206,6 +207,7 @@ COND_SYSCALL_COMPAT(msgsnd); /* ipc/sem.c */ COND_SYSCALL(semget); +COND_SYSCALL(old_semctl); COND_SYSCALL(semctl); COND_SYSCALL_COMPAT(semctl); COND_SYSCALL(semtimedop); @@ -214,6 +216,7 @@ COND_SYSCALL(semop); /* ipc/shm.c */ COND_SYSCALL(shmget); +COND_SYSCALL(old_shmctl); COND_SYSCALL(shmctl); COND_SYSCALL_COMPAT(shmctl); COND_SYSCALL(shmat); -- cgit v1.2.3 From 4b7d248b3a1de483ffe9d05c1debbf32a544164d Mon Sep 17 00:00:00 2001 From: Richard Guy Briggs Date: Tue, 22 Jan 2019 17:06:39 -0500 Subject: audit: move loginuid and sessionid from CONFIG_AUDITSYSCALL to CONFIG_AUDIT loginuid and sessionid (and audit_log_session_info) should be part of CONFIG_AUDIT scope and not CONFIG_AUDITSYSCALL since it is used in CONFIG_CHANGE, ANOM_LINK, FEATURE_CHANGE (and INTEGRITY_RULE), none of which are otherwise dependent on AUDITSYSCALL. Please see github issue https://github.com/linux-audit/audit-kernel/issues/104 Signed-off-by: Richard Guy Briggs [PM: tweaked subject line for better grep'ing] Signed-off-by: Paul Moore --- fs/proc/base.c | 6 ++-- include/linux/audit.h | 42 +++++++++++++------------ include/linux/sched.h | 2 +- init/init_task.c | 2 +- kernel/audit.c | 85 +++++++++++++++++++++++++++++++++++++++++++++++++++ kernel/auditsc.c | 84 -------------------------------------------------- 6 files changed, 113 insertions(+), 108 deletions(-) (limited to 'include/linux') diff --git a/fs/proc/base.c b/fs/proc/base.c index 633a63462573..a23651ce6960 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1210,7 +1210,7 @@ static const struct file_operations proc_oom_score_adj_operations = { .llseek = default_llseek, }; -#ifdef CONFIG_AUDITSYSCALL +#ifdef CONFIG_AUDIT #define TMPBUFLEN 11 static ssize_t proc_loginuid_read(struct file * file, char __user * buf, size_t count, loff_t *ppos) @@ -3002,7 +3002,7 @@ static const struct pid_entry tgid_base_stuff[] = { ONE("oom_score", S_IRUGO, proc_oom_score), REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations), REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), -#ifdef CONFIG_AUDITSYSCALL +#ifdef CONFIG_AUDIT REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), REG("sessionid", S_IRUGO, proc_sessionid_operations), #endif @@ -3390,7 +3390,7 @@ static const struct pid_entry tid_base_stuff[] = { ONE("oom_score", S_IRUGO, proc_oom_score), REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations), REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), -#ifdef CONFIG_AUDITSYSCALL +#ifdef CONFIG_AUDIT REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), REG("sessionid", S_IRUGO, proc_sessionid_operations), #endif diff --git a/include/linux/audit.h b/include/linux/audit.h index a625c29a2ea2..ecb5d317d6a2 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -159,6 +159,18 @@ extern int audit_update_lsm_rules(void); extern int audit_rule_change(int type, int seq, void *data, size_t datasz); extern int audit_list_rules_send(struct sk_buff *request_skb, int seq); +extern int audit_set_loginuid(kuid_t loginuid); + +static inline kuid_t audit_get_loginuid(struct task_struct *tsk) +{ + return tsk->loginuid; +} + +static inline unsigned int audit_get_sessionid(struct task_struct *tsk) +{ + return tsk->sessionid; +} + extern u32 audit_enabled; #else /* CONFIG_AUDIT */ static inline __printf(4, 5) @@ -201,6 +213,17 @@ static inline int audit_log_task_context(struct audit_buffer *ab) } static inline void audit_log_task_info(struct audit_buffer *ab) { } + +static inline kuid_t audit_get_loginuid(struct task_struct *tsk) +{ + return INVALID_UID; +} + +static inline unsigned int audit_get_sessionid(struct task_struct *tsk) +{ + return AUDIT_SID_UNSET; +} + #define audit_enabled AUDIT_OFF #endif /* CONFIG_AUDIT */ @@ -323,17 +346,6 @@ static inline void audit_ptrace(struct task_struct *t) extern unsigned int audit_serial(void); extern int auditsc_get_stamp(struct audit_context *ctx, struct timespec64 *t, unsigned int *serial); -extern int audit_set_loginuid(kuid_t loginuid); - -static inline kuid_t audit_get_loginuid(struct task_struct *tsk) -{ - return tsk->loginuid; -} - -static inline unsigned int audit_get_sessionid(struct task_struct *tsk) -{ - return tsk->sessionid; -} extern void __audit_ipc_obj(struct kern_ipc_perm *ipcp); extern void __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, umode_t mode); @@ -519,14 +531,6 @@ static inline int auditsc_get_stamp(struct audit_context *ctx, { return 0; } -static inline kuid_t audit_get_loginuid(struct task_struct *tsk) -{ - return INVALID_UID; -} -static inline unsigned int audit_get_sessionid(struct task_struct *tsk) -{ - return AUDIT_SID_UNSET; -} static inline void audit_ipc_obj(struct kern_ipc_perm *ipcp) { } static inline void audit_ipc_set_perm(unsigned long qbytes, uid_t uid, diff --git a/include/linux/sched.h b/include/linux/sched.h index 89541d248893..f9788bb122c5 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -886,7 +886,7 @@ struct task_struct { struct callback_head *task_works; struct audit_context *audit_context; -#ifdef CONFIG_AUDITSYSCALL +#ifdef CONFIG_AUDIT kuid_t loginuid; unsigned int sessionid; #endif diff --git a/init/init_task.c b/init/init_task.c index 5aebe3be4d7c..39c3109acc1a 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -121,7 +121,7 @@ struct task_struct init_task .thread_pid = &init_struct_pid, .thread_group = LIST_HEAD_INIT(init_task.thread_group), .thread_node = LIST_HEAD_INIT(init_signals.thread_head), -#ifdef CONFIG_AUDITSYSCALL +#ifdef CONFIG_AUDIT .loginuid = INVALID_UID, .sessionid = AUDIT_SID_UNSET, #endif diff --git a/kernel/audit.c b/kernel/audit.c index c2a7662cc254..2a32f304223d 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -2335,6 +2335,91 @@ void audit_log_link_denied(const char *operation) audit_log_end(ab); } +/* global counter which is incremented every time something logs in */ +static atomic_t session_id = ATOMIC_INIT(0); + +static int audit_set_loginuid_perm(kuid_t loginuid) +{ + /* if we are unset, we don't need privs */ + if (!audit_loginuid_set(current)) + return 0; + /* if AUDIT_FEATURE_LOGINUID_IMMUTABLE means never ever allow a change*/ + if (is_audit_feature_set(AUDIT_FEATURE_LOGINUID_IMMUTABLE)) + return -EPERM; + /* it is set, you need permission */ + if (!capable(CAP_AUDIT_CONTROL)) + return -EPERM; + /* reject if this is not an unset and we don't allow that */ + if (is_audit_feature_set(AUDIT_FEATURE_ONLY_UNSET_LOGINUID) + && uid_valid(loginuid)) + return -EPERM; + return 0; +} + +static void audit_log_set_loginuid(kuid_t koldloginuid, kuid_t kloginuid, + unsigned int oldsessionid, + unsigned int sessionid, int rc) +{ + struct audit_buffer *ab; + uid_t uid, oldloginuid, loginuid; + struct tty_struct *tty; + + if (!audit_enabled) + return; + + ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_LOGIN); + if (!ab) + return; + + uid = from_kuid(&init_user_ns, task_uid(current)); + oldloginuid = from_kuid(&init_user_ns, koldloginuid); + loginuid = from_kuid(&init_user_ns, kloginuid), + tty = audit_get_tty(); + + audit_log_format(ab, "pid=%d uid=%u", task_tgid_nr(current), uid); + audit_log_task_context(ab); + audit_log_format(ab, " old-auid=%u auid=%u tty=%s old-ses=%u ses=%u res=%d", + oldloginuid, loginuid, tty ? tty_name(tty) : "(none)", + oldsessionid, sessionid, !rc); + audit_put_tty(tty); + audit_log_end(ab); +} + +/** + * audit_set_loginuid - set current task's loginuid + * @loginuid: loginuid value + * + * Returns 0. + * + * Called (set) from fs/proc/base.c::proc_loginuid_write(). + */ +int audit_set_loginuid(kuid_t loginuid) +{ + unsigned int oldsessionid, sessionid = AUDIT_SID_UNSET; + kuid_t oldloginuid; + int rc; + + oldloginuid = audit_get_loginuid(current); + oldsessionid = audit_get_sessionid(current); + + rc = audit_set_loginuid_perm(loginuid); + if (rc) + goto out; + + /* are we setting or clearing? */ + if (uid_valid(loginuid)) { + sessionid = (unsigned int)atomic_inc_return(&session_id); + if (unlikely(sessionid == AUDIT_SID_UNSET)) + sessionid = (unsigned int)atomic_inc_return(&session_id); + } + + current->sessionid = sessionid; + current->loginuid = loginuid; +out: + audit_log_set_loginuid(oldloginuid, loginuid, oldsessionid, sessionid, rc); + return rc; +} + /** * audit_log_end - end one audit record * @ab: the audit_buffer diff --git a/kernel/auditsc.c b/kernel/auditsc.c index b585ceb2f7a2..572d247957fb 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -1983,90 +1983,6 @@ int auditsc_get_stamp(struct audit_context *ctx, return 1; } -/* global counter which is incremented every time something logs in */ -static atomic_t session_id = ATOMIC_INIT(0); - -static int audit_set_loginuid_perm(kuid_t loginuid) -{ - /* if we are unset, we don't need privs */ - if (!audit_loginuid_set(current)) - return 0; - /* if AUDIT_FEATURE_LOGINUID_IMMUTABLE means never ever allow a change*/ - if (is_audit_feature_set(AUDIT_FEATURE_LOGINUID_IMMUTABLE)) - return -EPERM; - /* it is set, you need permission */ - if (!capable(CAP_AUDIT_CONTROL)) - return -EPERM; - /* reject if this is not an unset and we don't allow that */ - if (is_audit_feature_set(AUDIT_FEATURE_ONLY_UNSET_LOGINUID) && uid_valid(loginuid)) - return -EPERM; - return 0; -} - -static void audit_log_set_loginuid(kuid_t koldloginuid, kuid_t kloginuid, - unsigned int oldsessionid, unsigned int sessionid, - int rc) -{ - struct audit_buffer *ab; - uid_t uid, oldloginuid, loginuid; - struct tty_struct *tty; - - if (!audit_enabled) - return; - - ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_LOGIN); - if (!ab) - return; - - uid = from_kuid(&init_user_ns, task_uid(current)); - oldloginuid = from_kuid(&init_user_ns, koldloginuid); - loginuid = from_kuid(&init_user_ns, kloginuid), - tty = audit_get_tty(); - - audit_log_format(ab, "pid=%d uid=%u", task_tgid_nr(current), uid); - audit_log_task_context(ab); - audit_log_format(ab, " old-auid=%u auid=%u tty=%s old-ses=%u ses=%u res=%d", - oldloginuid, loginuid, tty ? tty_name(tty) : "(none)", - oldsessionid, sessionid, !rc); - audit_put_tty(tty); - audit_log_end(ab); -} - -/** - * audit_set_loginuid - set current task's audit_context loginuid - * @loginuid: loginuid value - * - * Returns 0. - * - * Called (set) from fs/proc/base.c::proc_loginuid_write(). - */ -int audit_set_loginuid(kuid_t loginuid) -{ - unsigned int oldsessionid, sessionid = AUDIT_SID_UNSET; - kuid_t oldloginuid; - int rc; - - oldloginuid = audit_get_loginuid(current); - oldsessionid = audit_get_sessionid(current); - - rc = audit_set_loginuid_perm(loginuid); - if (rc) - goto out; - - /* are we setting or clearing? */ - if (uid_valid(loginuid)) { - sessionid = (unsigned int)atomic_inc_return(&session_id); - if (unlikely(sessionid == AUDIT_SID_UNSET)) - sessionid = (unsigned int)atomic_inc_return(&session_id); - } - - current->sessionid = sessionid; - current->loginuid = loginuid; -out: - audit_log_set_loginuid(oldloginuid, loginuid, oldsessionid, sessionid, rc); - return rc; -} - /** * __audit_mq_open - record audit data for a POSIX MQ open * @oflag: open flag -- cgit v1.2.3 From 2fec30e245a3b46fef89c4cb1f74eefc5fbb29a6 Mon Sep 17 00:00:00 2001 From: Richard Guy Briggs Date: Wed, 23 Jan 2019 21:36:25 -0500 Subject: audit: add support for fcaps v3 V3 namespaced file capabilities were introduced in commit 8db6c34f1dbc ("Introduce v3 namespaced file capabilities") Add support for these by adding the "frootid" field to the existing fcaps fields in the NAME and BPRM_FCAPS records. Please see github issue https://github.com/linux-audit/audit-kernel/issues/103 Signed-off-by: Richard Guy Briggs Acked-by: Serge Hallyn [PM: comment tweak to fit an 80 char line width] Signed-off-by: Paul Moore --- include/linux/capability.h | 5 +++-- kernel/audit.c | 6 ++++-- kernel/audit.h | 1 + kernel/auditsc.c | 4 ++++ security/commoncap.c | 2 ++ 5 files changed, 14 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/capability.h b/include/linux/capability.h index f640dcbc880c..b769330e9380 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -14,7 +14,7 @@ #define _LINUX_CAPABILITY_H #include - +#include #define _KERNEL_CAPABILITY_VERSION _LINUX_CAPABILITY_VERSION_3 #define _KERNEL_CAPABILITY_U32S _LINUX_CAPABILITY_U32S_3 @@ -25,11 +25,12 @@ typedef struct kernel_cap_struct { __u32 cap[_KERNEL_CAPABILITY_U32S]; } kernel_cap_t; -/* exact same as vfs_cap_data but in cpu endian and always filled completely */ +/* same as vfs_ns_cap_data but in cpu endian and always filled completely */ struct cpu_vfs_cap_data { __u32 magic_etc; kernel_cap_t permitted; kernel_cap_t inheritable; + kuid_t rootid; }; #define _USER_CAP_HEADER_SIZE (sizeof(struct __user_cap_header_struct)) diff --git a/kernel/audit.c b/kernel/audit.c index 2a32f304223d..3f3f1888cac7 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -2084,8 +2084,9 @@ static void audit_log_fcaps(struct audit_buffer *ab, struct audit_names *name) { audit_log_cap(ab, "cap_fp", &name->fcap.permitted); audit_log_cap(ab, "cap_fi", &name->fcap.inheritable); - audit_log_format(ab, " cap_fe=%d cap_fver=%x", - name->fcap.fE, name->fcap_ver); + audit_log_format(ab, " cap_fe=%d cap_fver=%x cap_frootid=%d", + name->fcap.fE, name->fcap_ver, + from_kuid(&init_user_ns, name->fcap.rootid)); } static inline int audit_copy_fcaps(struct audit_names *name, @@ -2104,6 +2105,7 @@ static inline int audit_copy_fcaps(struct audit_names *name, name->fcap.permitted = caps.permitted; name->fcap.inheritable = caps.inheritable; name->fcap.fE = !!(caps.magic_etc & VFS_CAP_FLAGS_EFFECTIVE); + name->fcap.rootid = caps.rootid; name->fcap_ver = (caps.magic_etc & VFS_CAP_REVISION_MASK) >> VFS_CAP_REVISION_SHIFT; diff --git a/kernel/audit.h b/kernel/audit.h index 6ffb70575082..deefdbe61a47 100644 --- a/kernel/audit.h +++ b/kernel/audit.h @@ -69,6 +69,7 @@ struct audit_cap_data { kernel_cap_t effective; /* effective set of process */ }; kernel_cap_t ambient; + kuid_t rootid; }; /* When fs/namei.c:getname() is called, we store the pointer in name and bump diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 572d247957fb..c16beb25fd0a 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -1358,6 +1358,9 @@ static void audit_log_exit(void) audit_log_cap(ab, "pi", &axs->new_pcap.inheritable); audit_log_cap(ab, "pe", &axs->new_pcap.effective); audit_log_cap(ab, "pa", &axs->new_pcap.ambient); + audit_log_format(ab, " frootid=%d", + from_kuid(&init_user_ns, + axs->fcap.rootid)); break; } } @@ -2271,6 +2274,7 @@ int __audit_log_bprm_fcaps(struct linux_binprm *bprm, ax->fcap.permitted = vcaps.permitted; ax->fcap.inheritable = vcaps.inheritable; ax->fcap.fE = !!(vcaps.magic_etc & VFS_CAP_FLAGS_EFFECTIVE); + ax->fcap.rootid = vcaps.rootid; ax->fcap_ver = (vcaps.magic_etc & VFS_CAP_REVISION_MASK) >> VFS_CAP_REVISION_SHIFT; ax->old_pcap.permitted = old->cap_permitted; diff --git a/security/commoncap.c b/security/commoncap.c index 232db019f051..c097f3568001 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -643,6 +643,8 @@ int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data cpu_caps->permitted.cap[CAP_LAST_U32] &= CAP_LAST_U32_VALID_MASK; cpu_caps->inheritable.cap[CAP_LAST_U32] &= CAP_LAST_U32_VALID_MASK; + cpu_caps->rootid = rootkuid; + return 0; } -- cgit v1.2.3 From 40852275a94afb3e836be9248399e036982d1a79 Mon Sep 17 00:00:00 2001 From: Micah Morton Date: Tue, 22 Jan 2019 14:42:09 -0800 Subject: LSM: add SafeSetID module that gates setid calls This change ensures that the set*uid family of syscalls in kernel/sys.c (setreuid, setuid, setresuid, setfsuid) all call ns_capable_common with the CAP_OPT_INSETID flag, so capability checks in the security_capable hook can know whether they are being called from within a set*uid syscall. This change is a no-op by itself, but is needed for the proposed SafeSetID LSM. Signed-off-by: Micah Morton Acked-by: Kees Cook Signed-off-by: James Morris --- include/linux/capability.h | 5 +++++ kernel/capability.c | 19 +++++++++++++++++++ kernel/sys.c | 10 +++++----- 3 files changed, 29 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/capability.h b/include/linux/capability.h index f640dcbc880c..c3f9a4d558a0 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -209,6 +209,7 @@ extern bool has_ns_capability_noaudit(struct task_struct *t, extern bool capable(int cap); extern bool ns_capable(struct user_namespace *ns, int cap); extern bool ns_capable_noaudit(struct user_namespace *ns, int cap); +extern bool ns_capable_setid(struct user_namespace *ns, int cap); #else static inline bool has_capability(struct task_struct *t, int cap) { @@ -240,6 +241,10 @@ static inline bool ns_capable_noaudit(struct user_namespace *ns, int cap) { return true; } +static inline bool ns_capable_setid(struct user_namespace *ns, int cap) +{ + return true; +} #endif /* CONFIG_MULTIUSER */ extern bool privileged_wrt_inode_uidgid(struct user_namespace *ns, const struct inode *inode); extern bool capable_wrt_inode_uidgid(const struct inode *inode, int cap); diff --git a/kernel/capability.c b/kernel/capability.c index cfbbcb68e11e..1444f3954d75 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -415,6 +415,25 @@ bool ns_capable_noaudit(struct user_namespace *ns, int cap) } EXPORT_SYMBOL(ns_capable_noaudit); +/** + * ns_capable_setid - Determine if the current task has a superior capability + * in effect, while signalling that this check is being done from within a + * setid syscall. + * @ns: The usernamespace we want the capability in + * @cap: The capability to be tested for + * + * Return true if the current task has the given superior capability currently + * available for use, false if not. + * + * This sets PF_SUPERPRIV on the task if the capability is available on the + * assumption that it's about to be used. + */ +bool ns_capable_setid(struct user_namespace *ns, int cap) +{ + return ns_capable_common(ns, cap, CAP_OPT_INSETID); +} +EXPORT_SYMBOL(ns_capable_setid); + /** * capable - Determine if the current task has a superior capability in effect * @cap: The capability to be tested for diff --git a/kernel/sys.c b/kernel/sys.c index f7eb62eceb24..c5f875048aef 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -516,7 +516,7 @@ long __sys_setreuid(uid_t ruid, uid_t euid) new->uid = kruid; if (!uid_eq(old->uid, kruid) && !uid_eq(old->euid, kruid) && - !ns_capable(old->user_ns, CAP_SETUID)) + !ns_capable_setid(old->user_ns, CAP_SETUID)) goto error; } @@ -525,7 +525,7 @@ long __sys_setreuid(uid_t ruid, uid_t euid) if (!uid_eq(old->uid, keuid) && !uid_eq(old->euid, keuid) && !uid_eq(old->suid, keuid) && - !ns_capable(old->user_ns, CAP_SETUID)) + !ns_capable_setid(old->user_ns, CAP_SETUID)) goto error; } @@ -584,7 +584,7 @@ long __sys_setuid(uid_t uid) old = current_cred(); retval = -EPERM; - if (ns_capable(old->user_ns, CAP_SETUID)) { + if (ns_capable_setid(old->user_ns, CAP_SETUID)) { new->suid = new->uid = kuid; if (!uid_eq(kuid, old->uid)) { retval = set_user(new); @@ -646,7 +646,7 @@ long __sys_setresuid(uid_t ruid, uid_t euid, uid_t suid) old = current_cred(); retval = -EPERM; - if (!ns_capable(old->user_ns, CAP_SETUID)) { + if (!ns_capable_setid(old->user_ns, CAP_SETUID)) { if (ruid != (uid_t) -1 && !uid_eq(kruid, old->uid) && !uid_eq(kruid, old->euid) && !uid_eq(kruid, old->suid)) goto error; @@ -814,7 +814,7 @@ long __sys_setfsuid(uid_t uid) if (uid_eq(kuid, old->uid) || uid_eq(kuid, old->euid) || uid_eq(kuid, old->suid) || uid_eq(kuid, old->fsuid) || - ns_capable(old->user_ns, CAP_SETUID)) { + ns_capable_setid(old->user_ns, CAP_SETUID)) { if (!uid_eq(kuid, old->fsuid)) { new->fsuid = kuid; if (security_task_fix_setuid(new, old, LSM_SETID_FS) == 0) -- cgit v1.2.3 From 6ba7d681aca22e53385bdb35b1d7662e61905760 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 9 Jan 2019 15:22:03 -0800 Subject: rcu: Remove wrapper definitions for obsolete RCU update functions None of synchronize_rcu_bh, synchronize_rcu_bh_expedited, call_rcu_bh, rcu_barrier_bh, synchronize_sched, synchronize_sched_expedited, call_rcu_sched, rcu_barrier_sched, get_state_synchronize_sched, and cond_synchronize_sched are actually used. This commit therefore removes their trivial wrapper-function definitions. Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 53 ------------------------------------------------ 1 file changed, 53 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 4db8bcacc51a..0e39e0d2629e 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -896,57 +896,4 @@ rcu_head_after_call_rcu(struct rcu_head *rhp, rcu_callback_t f) return false; } - -/* Transitional pre-consolidation compatibility definitions. */ - -static inline void synchronize_rcu_bh(void) -{ - synchronize_rcu(); -} - -static inline void synchronize_rcu_bh_expedited(void) -{ - synchronize_rcu_expedited(); -} - -static inline void call_rcu_bh(struct rcu_head *head, rcu_callback_t func) -{ - call_rcu(head, func); -} - -static inline void rcu_barrier_bh(void) -{ - rcu_barrier(); -} - -static inline void synchronize_sched(void) -{ - synchronize_rcu(); -} - -static inline void synchronize_sched_expedited(void) -{ - synchronize_rcu_expedited(); -} - -static inline void call_rcu_sched(struct rcu_head *head, rcu_callback_t func) -{ - call_rcu(head, func); -} - -static inline void rcu_barrier_sched(void) -{ - rcu_barrier(); -} - -static inline unsigned long get_state_synchronize_sched(void) -{ - return get_state_synchronize_rcu(); -} - -static inline void cond_synchronize_sched(unsigned long oldstate) -{ - cond_synchronize_rcu(oldstate); -} - #endif /* __LINUX_RCUPDATE_H */ -- cgit v1.2.3 From 2aa5503026ceaa8860697b93c9e5bbbcd025ba89 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 20 Nov 2018 08:29:35 -0800 Subject: rcu: Docbook for rcu_head_init() and rcu_head_after_call_rcu() This commit adds the missing asterisks required to make Sphinx pick up the current header comments for these two functions. Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 0e39e0d2629e..632113946757 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -859,7 +859,7 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) /* Has the specified rcu_head structure been handed to call_rcu()? */ -/* +/** * rcu_head_init - Initialize rcu_head for rcu_head_after_call_rcu() * @rhp: The rcu_head structure to initialize. * @@ -874,10 +874,10 @@ static inline void rcu_head_init(struct rcu_head *rhp) rhp->func = (rcu_callback_t)~0L; } -/* +/** * rcu_head_after_call_rcu - Has this rcu_head been passed to call_rcu()? * @rhp: The rcu_head structure to test. - * @func: The function passed to call_rcu() along with @rhp. + * @f: The function passed to call_rcu() along with @rhp. * * Returns @true if the @rhp has been passed to call_rcu() with @func, * and @false otherwise. Emits a warning in any other case, including -- cgit v1.2.3 From c98cac603f1ce7d00e2a802b5640bced3bc3c1f2 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 21 Nov 2018 11:35:03 -0800 Subject: rcu: Rename rcu_check_callbacks() to rcu_sched_clock_irq() The name rcu_check_callbacks() arguably made sense back in the early 2000s when RCU was quite a bit simpler than it is today, but it has become quite misleading, especially with the advent of dyntick-idle and NO_HZ_FULL. The rcu_check_callbacks() function is RCU's hook into the scheduling-clock interrupt, and is now but one of many ways that callbacks get promoted to invocable state. This commit therefore changes the name to rcu_sched_clock_irq(), which is the same number of characters and clearly indicates this function's relation to the rest of the Linux kernel. In addition, for the sake of consistency, rcu_flavor_check_callbacks() is also renamed to rcu_flavor_sched_clock_irq(). While in the area, the header comments for both functions are reworked. Signed-off-by: Paul E. McKenney --- .../Memory-Ordering/Tree-RCU-Memory-Ordering.html | 4 ++-- .../TreeRCU-callback-invocation.svg | 2 +- .../RCU/Design/Memory-Ordering/TreeRCU-gp.svg | 4 ++-- .../RCU/Design/Memory-Ordering/TreeRCU-qs.svg | 2 +- include/linux/rcupdate.h | 2 +- kernel/rcu/tiny.c | 2 +- kernel/rcu/tree.c | 18 ++++++++-------- kernel/rcu/tree.h | 2 +- kernel/rcu/tree_plugin.h | 24 +++++++++------------- kernel/time/timer.c | 2 +- 10 files changed, 29 insertions(+), 33 deletions(-) (limited to 'include/linux') diff --git a/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.html b/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.html index e4d94fba6c89..a3acfd49255f 100644 --- a/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.html +++ b/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.html @@ -485,7 +485,7 @@ section that the grace period must wait on. noted by rcu_node_context_switch() on the left. On the other hand, if the CPU takes a scheduler-clock interrupt while executing in usermode, a quiescent state will be noted by -rcu_check_callbacks() on the right. +rcu_sched_clock_irq() on the right. Either way, the passage through a quiescent state will be noted in a per-CPU variable. @@ -651,7 +651,7 @@ to end. These callbacks are identified by rcu_advance_cbs(), which is usually invoked by __note_gp_changes(). As shown in the diagram below, this invocation can be triggered by -the scheduling-clock interrupt (rcu_check_callbacks() on +the scheduling-clock interrupt (rcu_sched_clock_irq() on the left) or by idle entry (rcu_cleanup_after_idle() on the right, but only for kernels build with CONFIG_RCU_FAST_NO_HZ=y). diff --git a/Documentation/RCU/Design/Memory-Ordering/TreeRCU-callback-invocation.svg b/Documentation/RCU/Design/Memory-Ordering/TreeRCU-callback-invocation.svg index 832408313d93..3fcf0c17cef2 100644 --- a/Documentation/RCU/Design/Memory-Ordering/TreeRCU-callback-invocation.svg +++ b/Documentation/RCU/Design/Memory-Ordering/TreeRCU-callback-invocation.svg @@ -349,7 +349,7 @@ font-weight="bold" font-size="192" id="text202-7-5" - style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">rcu_check_callbacks() + style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">rcu_sched_clock_irq() rcu_check_callbacks() + xml:space="preserve">rcu_sched_clock_irq() rcu_check_callbacks() + style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">rcu_sched_clock_irq() rcu_check_callbacks() + xml:space="preserve">rcu_sched_clock_irq() rcu_read_unlock_special.b.need_qs = false; } } @@ -778,13 +778,13 @@ static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp) } /* - * Check for a quiescent state from the current CPU. When a task blocks, - * the task is recorded in the corresponding CPU's rcu_node structure, - * which is checked elsewhere. - * - * Caller must disable hard irqs. + * Check for a quiescent state from the current CPU, including voluntary + * context switches for Tasks RCU. When a task blocks, the task is + * recorded in the corresponding CPU's rcu_node structure, which is checked + * elsewhere, hence this function need only check for quiescent states + * related to the current CPU, not to those related to tasks. */ -static void rcu_flavor_check_callbacks(int user) +static void rcu_flavor_sched_clock_irq(int user) { struct task_struct *t = current; @@ -1030,14 +1030,10 @@ static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp) } /* - * Check to see if this CPU is in a non-context-switch quiescent state - * (user mode or idle loop for rcu, non-softirq execution for rcu_bh). - * Also schedule RCU core processing. - * - * This function must be called from hardirq context. It is normally - * invoked from the scheduling-clock interrupt. + * Check to see if this CPU is in a non-context-switch quiescent state, + * namely user mode and idle loop. */ -static void rcu_flavor_check_callbacks(int user) +static void rcu_flavor_sched_clock_irq(int user) { if (user || rcu_is_cpu_rrupt_from_idle()) { diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 444156debfa0..6eb7cc4b6d52 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -1632,7 +1632,7 @@ void update_process_times(int user_tick) /* Note: this timer irq context must be accounted for as well. */ account_process_tick(p, user_tick); run_local_timers(); - rcu_check_callbacks(user_tick); + rcu_sched_clock_irq(user_tick); #ifdef CONFIG_IRQ_WORK if (in_irq()) irq_work_tick(); -- cgit v1.2.3 From 423a86a610cad121742ebe698ef98a3b4c87b5dd Mon Sep 17 00:00:00 2001 From: "Joel Fernandes (Google)" Date: Wed, 12 Dec 2018 14:37:10 -0800 Subject: rcu: Add sparse check to rcu_assign_pointer() The rcu_assign_pointer() function currently doesn't do any sparse checking on the assigned-to pointer. So its possible that a pointer that is not __rcu annotated is assigned with rcu_assign_pointer without sparse complaints. Because rcu_dereference() already does such checking, this commit makes rcu_assign_pointer() to do the same. The extra error could be helpful in cases where an RCU pointer is assigned with rcu_assign_pointer() but not annotated with __rcu. This doesn't generate any code in the normal case because __CHECKER__ is defined only in the context of sparse. This commit also renames rcu_dereference_sparse() to rcu_check_parse() since the checking now happens not only during derereferencing but also during assignment. Test: Introduced an rcu_assign_pointer in code and checked the output of sparse with and without this change. The change correctly causes sparse to throw an error. Signed-off-by: Joel Fernandes (Google) Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 6f8f047c4068..4a2cce4d4bd9 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -309,16 +309,16 @@ static inline void rcu_preempt_sleep_check(void) { } */ #ifdef __CHECKER__ -#define rcu_dereference_sparse(p, space) \ +#define rcu_check_sparse(p, space) \ ((void)(((typeof(*p) space *)p) == p)) #else /* #ifdef __CHECKER__ */ -#define rcu_dereference_sparse(p, space) +#define rcu_check_sparse(p, space) #endif /* #else #ifdef __CHECKER__ */ #define __rcu_access_pointer(p, space) \ ({ \ typeof(*p) *_________p1 = (typeof(*p) *__force)READ_ONCE(p); \ - rcu_dereference_sparse(p, space); \ + rcu_check_sparse(p, space); \ ((typeof(*p) __force __kernel *)(_________p1)); \ }) #define __rcu_dereference_check(p, c, space) \ @@ -326,13 +326,13 @@ static inline void rcu_preempt_sleep_check(void) { } /* Dependency order vs. p above. */ \ typeof(*p) *________p1 = (typeof(*p) *__force)READ_ONCE(p); \ RCU_LOCKDEP_WARN(!(c), "suspicious rcu_dereference_check() usage"); \ - rcu_dereference_sparse(p, space); \ + rcu_check_sparse(p, space); \ ((typeof(*p) __force __kernel *)(________p1)); \ }) #define __rcu_dereference_protected(p, c, space) \ ({ \ RCU_LOCKDEP_WARN(!(c), "suspicious rcu_dereference_protected() usage"); \ - rcu_dereference_sparse(p, space); \ + rcu_check_sparse(p, space); \ ((typeof(*p) __force __kernel *)(p)); \ }) #define rcu_dereference_raw(p) \ @@ -382,6 +382,7 @@ static inline void rcu_preempt_sleep_check(void) { } #define rcu_assign_pointer(p, v) \ ({ \ uintptr_t _r_a_p__v = (uintptr_t)(v); \ + rcu_check_sparse(p, __rcu); \ \ if (__builtin_constant_p(v) && (_r_a_p__v) == (uintptr_t)NULL) \ WRITE_ONCE((p), (typeof(p))(_r_a_p__v)); \ @@ -785,7 +786,7 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) */ #define RCU_INIT_POINTER(p, v) \ do { \ - rcu_dereference_sparse(p, __rcu); \ + rcu_check_sparse(p, __rcu); \ WRITE_ONCE(p, RCU_INITIALIZER(v)); \ } while (0) -- cgit v1.2.3 From c8ca1aa774b20f182733d1661f3b6aa3105338e7 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 30 Nov 2018 10:06:46 -0800 Subject: srcu: Check for invalid idx argument in srcu_read_unlock() The current SRCU implementation has an idx argument of zero or one, and never anything else. This commit therefore adds a WARN_ON_ONCE() to complain if this restriction is violated. Signed-off-by: Paul E. McKenney --- include/linux/srcu.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/srcu.h b/include/linux/srcu.h index c614375cd264..33cf83b9bda8 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -223,6 +223,7 @@ srcu_read_lock_notrace(struct srcu_struct *ssp) __acquires(ssp) static inline void srcu_read_unlock(struct srcu_struct *ssp, int idx) __releases(ssp) { + WARN_ON_ONCE(idx & ~0x1); rcu_lock_release(&(ssp)->dep_map); __srcu_read_unlock(ssp, idx); } -- cgit v1.2.3 From e81baf4cb19a9b428ba477fd0423f81672a58817 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 11 Dec 2018 12:12:38 +0100 Subject: srcu: Remove srcu_queue_delayed_work_on() srcu_queue_delayed_work_on() disables preemption (and therefore CPU hotplug in RCU's case) and then checks based on its own accounting if a CPU is online. If the CPU is online it uses queue_delayed_work_on() otherwise it fallbacks to queue_delayed_work(). The problem here is that queue_work() on -RT does not work with disabled preemption. queue_work_on() works also on an offlined CPU. queue_delayed_work_on() has the problem that it is possible to program a timer on an offlined CPU. This timer will fire once the CPU is online again. But until then, the timer remains programmed and nothing will happen. Add a local timer which will fire (as requested per delay) on the local CPU and then enqueue the work on the specific CPU. RCUtorture testing with SRCU-P for 24h showed no problems. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Paul E. McKenney --- include/linux/srcutree.h | 3 ++- kernel/rcu/srcutree.c | 55 +++++++++++++++++++++--------------------------- kernel/rcu/tree.c | 4 ---- kernel/rcu/tree.h | 8 ------- 4 files changed, 26 insertions(+), 44 deletions(-) (limited to 'include/linux') diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index 6f292bd3e7db..0faa978c9880 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -45,7 +45,8 @@ struct srcu_data { unsigned long srcu_gp_seq_needed; /* Furthest future GP needed. */ unsigned long srcu_gp_seq_needed_exp; /* Furthest future exp GP. */ bool srcu_cblist_invoking; /* Invoking these CBs? */ - struct delayed_work work; /* Context for CB invoking. */ + struct timer_list delay_work; /* Delay for CB invoking */ + struct work_struct work; /* Context for CB invoking. */ struct rcu_head srcu_barrier_head; /* For srcu_barrier() use. */ struct srcu_node *mynode; /* Leaf srcu_node. */ unsigned long grpmask; /* Mask for leaf srcu_node */ diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index 3600d88d8956..7f041f2435df 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -58,6 +58,7 @@ static bool __read_mostly srcu_init_done; static void srcu_invoke_callbacks(struct work_struct *work); static void srcu_reschedule(struct srcu_struct *ssp, unsigned long delay); static void process_srcu(struct work_struct *work); +static void srcu_delay_timer(struct timer_list *t); /* Wrappers for lock acquisition and release, see raw_spin_lock_rcu_node(). */ #define spin_lock_rcu_node(p) \ @@ -156,7 +157,8 @@ static void init_srcu_struct_nodes(struct srcu_struct *ssp, bool is_static) snp->grphi = cpu; } sdp->cpu = cpu; - INIT_DELAYED_WORK(&sdp->work, srcu_invoke_callbacks); + INIT_WORK(&sdp->work, srcu_invoke_callbacks); + timer_setup(&sdp->delay_work, srcu_delay_timer, 0); sdp->ssp = ssp; sdp->grpmask = 1 << (cpu - sdp->mynode->grplo); if (is_static) @@ -386,13 +388,19 @@ void _cleanup_srcu_struct(struct srcu_struct *ssp, bool quiesced) } else { flush_delayed_work(&ssp->work); } - for_each_possible_cpu(cpu) + for_each_possible_cpu(cpu) { + struct srcu_data *sdp = per_cpu_ptr(ssp->sda, cpu); + if (quiesced) { - if (WARN_ON(delayed_work_pending(&per_cpu_ptr(ssp->sda, cpu)->work))) + if (WARN_ON(timer_pending(&sdp->delay_work))) + return; /* Just leak it! */ + if (WARN_ON(work_pending(&sdp->work))) return; /* Just leak it! */ } else { - flush_delayed_work(&per_cpu_ptr(ssp->sda, cpu)->work); + del_timer_sync(&sdp->delay_work); + flush_work(&sdp->work); } + } if (WARN_ON(rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)) != SRCU_STATE_IDLE) || WARN_ON(srcu_readers_active(ssp))) { pr_info("%s: Active srcu_struct %p state: %d\n", @@ -463,39 +471,23 @@ static void srcu_gp_start(struct srcu_struct *ssp) WARN_ON_ONCE(state != SRCU_STATE_SCAN1); } -/* - * Track online CPUs to guide callback workqueue placement. - */ -DEFINE_PER_CPU(bool, srcu_online); -void srcu_online_cpu(unsigned int cpu) +static void srcu_delay_timer(struct timer_list *t) { - WRITE_ONCE(per_cpu(srcu_online, cpu), true); -} + struct srcu_data *sdp = container_of(t, struct srcu_data, delay_work); -void srcu_offline_cpu(unsigned int cpu) -{ - WRITE_ONCE(per_cpu(srcu_online, cpu), false); + queue_work_on(sdp->cpu, rcu_gp_wq, &sdp->work); } -/* - * Place the workqueue handler on the specified CPU if online, otherwise - * just run it whereever. This is useful for placing workqueue handlers - * that are to invoke the specified CPU's callbacks. - */ -static bool srcu_queue_delayed_work_on(int cpu, struct workqueue_struct *wq, - struct delayed_work *dwork, +static void srcu_queue_delayed_work_on(struct srcu_data *sdp, unsigned long delay) { - bool ret; + if (!delay) { + queue_work_on(sdp->cpu, rcu_gp_wq, &sdp->work); + return; + } - preempt_disable(); - if (READ_ONCE(per_cpu(srcu_online, cpu))) - ret = queue_delayed_work_on(cpu, wq, dwork, delay); - else - ret = queue_delayed_work(wq, dwork, delay); - preempt_enable(); - return ret; + timer_reduce(&sdp->delay_work, jiffies + delay); } /* @@ -504,7 +496,7 @@ static bool srcu_queue_delayed_work_on(int cpu, struct workqueue_struct *wq, */ static void srcu_schedule_cbs_sdp(struct srcu_data *sdp, unsigned long delay) { - srcu_queue_delayed_work_on(sdp->cpu, rcu_gp_wq, &sdp->work, delay); + srcu_queue_delayed_work_on(sdp, delay); } /* @@ -1186,7 +1178,8 @@ static void srcu_invoke_callbacks(struct work_struct *work) struct srcu_data *sdp; struct srcu_struct *ssp; - sdp = container_of(work, struct srcu_data, work.work); + sdp = container_of(work, struct srcu_data, work); + ssp = sdp->ssp; rcu_cblist_init(&ready_cbs); spin_lock_irq_rcu_node(sdp); diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 1c4add096078..127255795859 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -3408,8 +3408,6 @@ int rcutree_online_cpu(unsigned int cpu) raw_spin_lock_irqsave_rcu_node(rnp, flags); rnp->ffmask |= rdp->grpmask; raw_spin_unlock_irqrestore_rcu_node(rnp, flags); - if (IS_ENABLED(CONFIG_TREE_SRCU)) - srcu_online_cpu(cpu); if (rcu_scheduler_active == RCU_SCHEDULER_INACTIVE) return 0; /* Too early in boot for scheduler work. */ sync_sched_exp_online_cleanup(cpu); @@ -3434,8 +3432,6 @@ int rcutree_offline_cpu(unsigned int cpu) raw_spin_unlock_irqrestore_rcu_node(rnp, flags); rcutree_affinity_setting(cpu, cpu); - if (IS_ENABLED(CONFIG_TREE_SRCU)) - srcu_offline_cpu(cpu); return 0; } diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 149557b7c39c..4bba017c703c 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -458,11 +458,3 @@ static void rcu_bind_gp_kthread(void); static bool rcu_nohz_full_cpu(void); static void rcu_dynticks_task_enter(void); static void rcu_dynticks_task_exit(void); - -#ifdef CONFIG_SRCU -void srcu_online_cpu(unsigned int cpu); -void srcu_offline_cpu(unsigned int cpu); -#else /* #ifdef CONFIG_SRCU */ -void srcu_online_cpu(unsigned int cpu) { } -void srcu_offline_cpu(unsigned int cpu) { } -#endif /* #else #ifdef CONFIG_SRCU */ -- cgit v1.2.3 From 3a6cb58f159e64241b2af9374acad41a70939349 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 10 Dec 2018 09:44:52 -0800 Subject: rcutorture: Add grace period after CPU offline Beyond a certain point in the CPU-hotplug offline process, timers get stranded on the outgoing CPU, and won't fire until that CPU comes back online, which might well be never. This commit therefore adds a hook in torture_onoff_init() that is invoked from torture_offline(), which rcutorture uses to occasionally wait for a grace period. This should result in failures for RCU implementations that rely on stranded timers eventually firing in the absence of the CPU coming back online. Reported-by: Sebastian Andrzej Siewior Signed-off-by: Paul E. McKenney --- include/linux/torture.h | 3 ++- kernel/locking/locktorture.c | 2 +- kernel/rcu/rcutorture.c | 11 ++++++++++- kernel/torture.c | 6 +++++- 4 files changed, 18 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/torture.h b/include/linux/torture.h index 48fad21109fc..f2d3bcbf4337 100644 --- a/include/linux/torture.h +++ b/include/linux/torture.h @@ -50,11 +50,12 @@ do { if (verbose) pr_alert("%s" TORTURE_FLAG "!!! %s\n", torture_type, s); } while (0) /* Definitions for online/offline exerciser. */ +typedef void torture_ofl_func(void); bool torture_offline(int cpu, long *n_onl_attempts, long *n_onl_successes, unsigned long *sum_offl, int *min_onl, int *max_onl); bool torture_online(int cpu, long *n_onl_attempts, long *n_onl_successes, unsigned long *sum_onl, int *min_onl, int *max_onl); -int torture_onoff_init(long ooholdoff, long oointerval); +int torture_onoff_init(long ooholdoff, long oointerval, torture_ofl_func *f); void torture_onoff_stats(void); bool torture_onoff_failures(void); diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c index 7d0b0ed74404..c8b348097bb5 100644 --- a/kernel/locking/locktorture.c +++ b/kernel/locking/locktorture.c @@ -970,7 +970,7 @@ static int __init lock_torture_init(void) /* Prepare torture context. */ if (onoff_interval > 0) { firsterr = torture_onoff_init(onoff_holdoff * HZ, - onoff_interval * HZ); + onoff_interval * HZ, NULL); if (firsterr) goto unwind; } diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 0955f3a20952..9eb9235c1ec9 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -2243,6 +2243,14 @@ static void rcu_test_debug_objects(void) #endif /* #else #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD */ } +static void rcutorture_sync(void) +{ + static unsigned long n; + + if (cur_ops->sync && !(++n & 0xfff)) + cur_ops->sync(); +} + static int __init rcu_torture_init(void) { @@ -2404,7 +2412,8 @@ rcu_torture_init(void) firsterr = torture_shutdown_init(shutdown_secs, rcu_torture_cleanup); if (firsterr) goto unwind; - firsterr = torture_onoff_init(onoff_holdoff * HZ, onoff_interval); + firsterr = torture_onoff_init(onoff_holdoff * HZ, onoff_interval, + rcutorture_sync); if (firsterr) goto unwind; firsterr = rcu_torture_stall_init(); diff --git a/kernel/torture.c b/kernel/torture.c index bbf6d473e50c..a03ff722352b 100644 --- a/kernel/torture.c +++ b/kernel/torture.c @@ -75,6 +75,7 @@ static DEFINE_MUTEX(fullstop_mutex); static struct task_struct *onoff_task; static long onoff_holdoff; static long onoff_interval; +static torture_ofl_func *onoff_f; static long n_offline_attempts; static long n_offline_successes; static unsigned long sum_offline; @@ -118,6 +119,8 @@ bool torture_offline(int cpu, long *n_offl_attempts, long *n_offl_successes, pr_alert("%s" TORTURE_FLAG "torture_onoff task: offlined %d\n", torture_type, cpu); + if (onoff_f) + onoff_f(); (*n_offl_successes)++; delta = jiffies - starttime; *sum_offl += delta; @@ -243,11 +246,12 @@ stop: /* * Initiate online-offline handling. */ -int torture_onoff_init(long ooholdoff, long oointerval) +int torture_onoff_init(long ooholdoff, long oointerval, torture_ofl_func *f) { #ifdef CONFIG_HOTPLUG_CPU onoff_holdoff = ooholdoff; onoff_interval = oointerval; + onoff_f = f; if (onoff_interval <= 0) return 0; return torture_create_kthread(torture_onoff, NULL, onoff_task); -- cgit v1.2.3 From 9b28aa1d0eae1be1016c8f4ba504545caff01da3 Mon Sep 17 00:00:00 2001 From: Vadim Pasternak Date: Wed, 12 Dec 2018 23:59:13 +0000 Subject: platform_data/mlxreg: Document fixes for core platform data Remove "led" from the description, since the structure "mlxreg_core_platform_data" is used not only for led data. Signed-off-by: Vadim Pasternak Signed-off-by: Darren Hart (VMware) --- include/linux/platform_data/mlxreg.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/mlxreg.h b/include/linux/platform_data/mlxreg.h index 19f5cb618c55..d823713f94ec 100644 --- a/include/linux/platform_data/mlxreg.h +++ b/include/linux/platform_data/mlxreg.h @@ -107,9 +107,9 @@ struct mlxreg_core_item { /** * struct mlxreg_core_platform_data - platform data: * - * @led_data: led private data; + * @data: instance private data; * @regmap: register map of parent device; - * @counter: number of led instances; + * @counter: number of instances; */ struct mlxreg_core_platform_data { struct mlxreg_core_data *data; -- cgit v1.2.3 From 946e4e02b11889cb161b15ff4712a8ba21a50eb6 Mon Sep 17 00:00:00 2001 From: Vadim Pasternak Date: Wed, 12 Dec 2018 23:59:14 +0000 Subject: platform_data/mlxreg: Add capability field to core platform data Add capability field to "mlxreg_core_platform_data" structure. The purpose of this register is to provide additional info to platform driver through the atribute related capability register. Signed-off-by: Vadim Pasternak Signed-off-by: Darren Hart (VMware) --- include/linux/platform_data/mlxreg.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/mlxreg.h b/include/linux/platform_data/mlxreg.h index d823713f94ec..1b2f86f96743 100644 --- a/include/linux/platform_data/mlxreg.h +++ b/include/linux/platform_data/mlxreg.h @@ -61,6 +61,7 @@ struct mlxreg_hotplug_device { * @reg: attribute register; * @mask: attribute access mask; * @bit: attribute effective bit; + * @capability: attribute capability register; * @mode: access mode; * @np - pointer to node platform associated with attribute; * @hpdev - hotplug device data; @@ -72,6 +73,7 @@ struct mlxreg_core_data { u32 reg; u32 mask; u32 bit; + u32 capability; umode_t mode; struct device_node *np; struct mlxreg_hotplug_device hpdev; -- cgit v1.2.3 From a7b76c8857692b0fce063b94ed83da11c396d341 Mon Sep 17 00:00:00 2001 From: Jiong Wang Date: Sat, 26 Jan 2019 12:26:05 -0500 Subject: bpf: JIT blinds support JMP32 This patch adds JIT blinds support for JMP32. Like BPF_JMP_REG/IMM, JMP32 version are needed for building raw bpf insn. They are added to both include/linux/filter.h and tools/include/linux/filter.h. Reviewed-by: Jakub Kicinski Signed-off-by: Jiong Wang Signed-off-by: Alexei Starovoitov --- include/linux/filter.h | 20 ++++++++++++++++++++ kernel/bpf/core.c | 21 +++++++++++++++++++++ tools/include/linux/filter.h | 20 ++++++++++++++++++++ 3 files changed, 61 insertions(+) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index be9af6b4a9e4..e4b473f85b46 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -277,6 +277,26 @@ struct sock_reuseport; .off = OFF, \ .imm = IMM }) +/* Like BPF_JMP_REG, but with 32-bit wide operands for comparison. */ + +#define BPF_JMP32_REG(OP, DST, SRC, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_JMP32 | BPF_OP(OP) | BPF_X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = 0 }) + +/* Like BPF_JMP_IMM, but with 32-bit wide operands for comparison. */ + +#define BPF_JMP32_IMM(OP, DST, IMM, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_JMP32 | BPF_OP(OP) | BPF_K, \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = OFF, \ + .imm = IMM }) + /* Unconditional jumps, goto pc + off16 */ #define BPF_JMP_A(OFF) \ diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index bba11c2565ee..a7bcb23bee84 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -949,6 +949,27 @@ static int bpf_jit_blind_insn(const struct bpf_insn *from, *to++ = BPF_JMP_REG(from->code, from->dst_reg, BPF_REG_AX, off); break; + case BPF_JMP32 | BPF_JEQ | BPF_K: + case BPF_JMP32 | BPF_JNE | BPF_K: + case BPF_JMP32 | BPF_JGT | BPF_K: + case BPF_JMP32 | BPF_JLT | BPF_K: + case BPF_JMP32 | BPF_JGE | BPF_K: + case BPF_JMP32 | BPF_JLE | BPF_K: + case BPF_JMP32 | BPF_JSGT | BPF_K: + case BPF_JMP32 | BPF_JSLT | BPF_K: + case BPF_JMP32 | BPF_JSGE | BPF_K: + case BPF_JMP32 | BPF_JSLE | BPF_K: + case BPF_JMP32 | BPF_JSET | BPF_K: + /* Accommodate for extra offset in case of a backjump. */ + off = from->off; + if (off < 0) + off -= 2; + *to++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm); + *to++ = BPF_ALU32_IMM(BPF_XOR, BPF_REG_AX, imm_rnd); + *to++ = BPF_JMP32_REG(from->code, from->dst_reg, BPF_REG_AX, + off); + break; + case BPF_LD | BPF_IMM | BPF_DW: *to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ aux[1].imm); *to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd); diff --git a/tools/include/linux/filter.h b/tools/include/linux/filter.h index af55acf73e75..cce0b02c0e28 100644 --- a/tools/include/linux/filter.h +++ b/tools/include/linux/filter.h @@ -199,6 +199,16 @@ .off = OFF, \ .imm = 0 }) +/* Like BPF_JMP_REG, but with 32-bit wide operands for comparison. */ + +#define BPF_JMP32_REG(OP, DST, SRC, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_JMP32 | BPF_OP(OP) | BPF_X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = 0 }) + /* Conditional jumps against immediates, if (dst_reg 'op' imm32) goto pc + off16 */ #define BPF_JMP_IMM(OP, DST, IMM, OFF) \ @@ -209,6 +219,16 @@ .off = OFF, \ .imm = IMM }) +/* Like BPF_JMP_IMM, but with 32-bit wide operands for comparison. */ + +#define BPF_JMP32_IMM(OP, DST, IMM, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_JMP32 | BPF_OP(OP) | BPF_K, \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = OFF, \ + .imm = IMM }) + /* Unconditional jumps, goto pc + off16 */ #define BPF_JMP_A(OFF) \ -- cgit v1.2.3 From 8d5d0cfb63cbcb4005e19a332b31d687b1d01e58 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Mon, 3 Dec 2018 09:56:23 +0000 Subject: sched/topology: Introduce a sysctl for Energy Aware Scheduling In its current state, Energy Aware Scheduling (EAS) starts automatically on asymmetric platforms having an Energy Model (EM). However, there are users who want to have an EM (for thermal management for example), but don't want EAS with it. In order to let users disable EAS explicitly, introduce a new sysctl called 'sched_energy_aware'. It is enabled by default so that EAS can start automatically on platforms where it makes sense. Flipping it to 0 rebuilds the scheduling domains and disables EAS. Signed-off-by: Quentin Perret Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: adharmap@codeaurora.org Cc: chris.redpath@arm.com Cc: currojerez@riseup.net Cc: dietmar.eggemann@arm.com Cc: edubezval@gmail.com Cc: gregkh@linuxfoundation.org Cc: javi.merino@kernel.org Cc: joel@joelfernandes.org Cc: juri.lelli@redhat.com Cc: morten.rasmussen@arm.com Cc: patrick.bellasi@arm.com Cc: pkondeti@codeaurora.org Cc: rjw@rjwysocki.net Cc: skannan@codeaurora.org Cc: smuckle@google.com Cc: srinivas.pandruvada@linux.intel.com Cc: thara.gopinath@linaro.org Cc: tkjos@google.com Cc: valentin.schneider@arm.com Cc: vincent.guittot@linaro.org Cc: viresh.kumar@linaro.org Link: https://lkml.kernel.org/r/20181203095628.11858-11-quentin.perret@arm.com Signed-off-by: Ingo Molnar --- Documentation/sysctl/kernel.txt | 12 ++++++++++++ include/linux/sched/sysctl.h | 7 +++++++ kernel/sched/topology.c | 29 +++++++++++++++++++++++++++++ kernel/sysctl.c | 11 +++++++++++ 4 files changed, 59 insertions(+) (limited to 'include/linux') diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt index c0527d8a468a..379063e58326 100644 --- a/Documentation/sysctl/kernel.txt +++ b/Documentation/sysctl/kernel.txt @@ -79,6 +79,7 @@ show up in /proc/sys/kernel: - reboot-cmd [ SPARC only ] - rtsig-max - rtsig-nr +- sched_energy_aware - seccomp/ ==> Documentation/userspace-api/seccomp_filter.rst - sem - sem_next_id [ sysv ipc ] @@ -890,6 +891,17 @@ rtsig-nr shows the number of RT signals currently queued. ============================================================== +sched_energy_aware: + +Enables/disables Energy Aware Scheduling (EAS). EAS starts +automatically on platforms where it can run (that is, +platforms with asymmetric CPU topologies and having an Energy +Model available). If your platform happens to meet the +requirements for EAS but you do not want to use it, change +this value to 0. + +============================================================== + sched_schedstats: Enables/disables scheduler statistics. Enabling this feature diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index a9c32daeb9d8..99ce6d728df7 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -83,4 +83,11 @@ extern int sysctl_schedstats(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); +#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) +extern unsigned int sysctl_sched_energy_aware; +extern int sched_energy_aware_handler(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos); +#endif + #endif /* _LINUX_SCHED_SYSCTL_H */ diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c index 3f35ba1d8fde..50c3fc316c54 100644 --- a/kernel/sched/topology.c +++ b/kernel/sched/topology.c @@ -203,9 +203,35 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent) DEFINE_STATIC_KEY_FALSE(sched_energy_present); #if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) +unsigned int sysctl_sched_energy_aware = 1; DEFINE_MUTEX(sched_energy_mutex); bool sched_energy_update; +#ifdef CONFIG_PROC_SYSCTL +int sched_energy_aware_handler(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + int ret, state; + + if (write && !capable(CAP_SYS_ADMIN)) + return -EPERM; + + ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); + if (!ret && write) { + state = static_branch_unlikely(&sched_energy_present); + if (state != sysctl_sched_energy_aware) { + mutex_lock(&sched_energy_mutex); + sched_energy_update = 1; + rebuild_sched_domains(); + sched_energy_update = 0; + mutex_unlock(&sched_energy_mutex); + } + } + + return ret; +} +#endif + static void free_pd(struct perf_domain *pd) { struct perf_domain *tmp; @@ -322,6 +348,9 @@ static bool build_perf_domains(const struct cpumask *cpu_map) struct cpufreq_policy *policy; struct cpufreq_governor *gov; + if (!sysctl_sched_energy_aware) + goto free; + /* EAS is enabled for asymmetric CPU capacity topologies. */ if (!per_cpu(sd_asym_cpucapacity, cpu)) { if (sched_debug()) { diff --git a/kernel/sysctl.c b/kernel/sysctl.c index ba4d9e85feb8..987ae08147bf 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -467,6 +467,17 @@ static struct ctl_table kern_table[] = { .extra1 = &one, }, #endif +#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) + { + .procname = "sched_energy_aware", + .data = &sysctl_sched_energy_aware, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = sched_energy_aware_handler, + .extra1 = &zero, + .extra2 = &one, + }, +#endif #ifdef CONFIG_PROVE_LOCKING { .procname = "prove_locking", -- cgit v1.2.3 From fdce60787f6215607dc7ac910cbaf4416684b589 Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Thu, 13 Dec 2018 12:22:32 +0100 Subject: reset: sunxi: declare sun6i_reset_init in a header file Avoid declaring extern functions in c files. To make sure function definition and usage don't get out of sync, declare sun6i_reset_init in a common header. Suggested-by: Stephen Rothwell Signed-off-by: Philipp Zabel --- arch/arm/mach-sunxi/sunxi.c | 2 +- drivers/reset/reset-sunxi.c | 1 + include/linux/reset/sunxi.h | 7 +++++++ 3 files changed, 9 insertions(+), 1 deletion(-) create mode 100644 include/linux/reset/sunxi.h (limited to 'include/linux') diff --git a/arch/arm/mach-sunxi/sunxi.c b/arch/arm/mach-sunxi/sunxi.c index 8a7f301839c2..933b6930f024 100644 --- a/arch/arm/mach-sunxi/sunxi.c +++ b/arch/arm/mach-sunxi/sunxi.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -37,7 +38,6 @@ static const char * const sun6i_board_dt_compat[] = { NULL, }; -extern void __init sun6i_reset_init(void); static void __init sun6i_timer_init(void) { of_clk_init(NULL); diff --git a/drivers/reset/reset-sunxi.c b/drivers/reset/reset-sunxi.c index db9a1a75523f..b06d724d8f21 100644 --- a/drivers/reset/reset-sunxi.c +++ b/drivers/reset/reset-sunxi.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include diff --git a/include/linux/reset/sunxi.h b/include/linux/reset/sunxi.h new file mode 100644 index 000000000000..1ad7fffb413e --- /dev/null +++ b/include/linux/reset/sunxi.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __LINUX_RESET_SUNXI_H__ +#define __LINUX_RESET_SUNXI_H__ + +void __init sun6i_reset_init(void); + +#endif /* __LINUX_RESET_SUNXI_H__ */ -- cgit v1.2.3 From cdbeb315ed8dcc142a68054899cedd6e4f1fea3f Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Thu, 13 Dec 2018 12:24:36 +0100 Subject: reset: socfpga: declare socfpga_reset_init in a header file Avoid declaring extern functions in c files. To make sure function definition and usage don't get out of sync, declare socfpga_reset_init in a common header. Suggested-by: Stephen Rothwell Signed-off-by: Philipp Zabel Acked-by: Dinh Nguyen --- arch/arm/mach-socfpga/socfpga.c | 3 +-- drivers/reset/reset-socfpga.c | 2 +- include/linux/reset/socfpga.h | 7 +++++++ 3 files changed, 9 insertions(+), 3 deletions(-) create mode 100644 include/linux/reset/socfpga.h (limited to 'include/linux') diff --git a/arch/arm/mach-socfpga/socfpga.c b/arch/arm/mach-socfpga/socfpga.c index afd98971d903..816da0eb6616 100644 --- a/arch/arm/mach-socfpga/socfpga.c +++ b/arch/arm/mach-socfpga/socfpga.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -32,8 +33,6 @@ void __iomem *rst_manager_base_addr; void __iomem *sdr_ctl_base_addr; unsigned long socfpga_cpu1start_addr; -extern void __init socfpga_reset_init(void); - static void __init socfpga_sysmgr_init(void) { struct device_node *np; diff --git a/drivers/reset/reset-socfpga.c b/drivers/reset/reset-socfpga.c index 318cfc51c441..96953992c2bb 100644 --- a/drivers/reset/reset-socfpga.c +++ b/drivers/reset/reset-socfpga.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -18,7 +19,6 @@ #include "reset-simple.h" #define SOCFPGA_NR_BANKS 8 -void __init socfpga_reset_init(void); static int a10_reset_init(struct device_node *np) { diff --git a/include/linux/reset/socfpga.h b/include/linux/reset/socfpga.h new file mode 100644 index 000000000000..b11a2047c342 --- /dev/null +++ b/include/linux/reset/socfpga.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __LINUX_RESET_SOCFPGA_H__ +#define __LINUX_RESET_SOCFPGA_H__ + +void __init socfpga_reset_init(void); + +#endif /* __LINUX_RESET_SOCFPGA_H__ */ -- cgit v1.2.3 From 83f529281d7aa42b10c2c5cb64fcbd2c7cab4409 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 27 Jan 2019 19:18:57 +0100 Subject: netfilter: ipv4: remove useless export_symbol Only one caller; place it where needed and get rid of the EXPORT_SYMBOL. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_ipv4.h | 6 ------ net/ipv4/netfilter.c | 18 ------------------ net/netfilter/utils.c | 19 +++++++++++++++++++ 3 files changed, 19 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv4.h b/include/linux/netfilter_ipv4.h index 95ab5cc64422..082e2c41b7ff 100644 --- a/include/linux/netfilter_ipv4.h +++ b/include/linux/netfilter_ipv4.h @@ -25,7 +25,6 @@ __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook, unsigned int dataoff, u_int8_t protocol); int nf_ip_route(struct net *net, struct dst_entry **dst, struct flowi *fl, bool strict); -int nf_ip_reroute(struct sk_buff *skb, const struct nf_queue_entry *entry); #else static inline __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook, unsigned int dataoff, u_int8_t protocol) @@ -37,11 +36,6 @@ static inline int nf_ip_route(struct net *net, struct dst_entry **dst, { return -EOPNOTSUPP; } -static inline int nf_ip_reroute(struct sk_buff *skb, - const struct nf_queue_entry *entry) -{ - return -EOPNOTSUPP; -} #endif /* CONFIG_INET */ #endif /*__LINUX_IP_NETFILTER_H*/ diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index 8d2e5dc9a827..a058213b77a7 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -80,24 +80,6 @@ int ip_route_me_harder(struct net *net, struct sk_buff *skb, unsigned int addr_t } EXPORT_SYMBOL(ip_route_me_harder); -int nf_ip_reroute(struct sk_buff *skb, const struct nf_queue_entry *entry) -{ - const struct ip_rt_info *rt_info = nf_queue_entry_reroute(entry); - - if (entry->state.hook == NF_INET_LOCAL_OUT) { - const struct iphdr *iph = ip_hdr(skb); - - if (!(iph->tos == rt_info->tos && - skb->mark == rt_info->mark && - iph->daddr == rt_info->daddr && - iph->saddr == rt_info->saddr)) - return ip_route_me_harder(entry->state.net, skb, - RTN_UNSPEC); - } - return 0; -} -EXPORT_SYMBOL_GPL(nf_ip_reroute); - int nf_ip_route(struct net *net, struct dst_entry **dst, struct flowi *fl, bool strict __always_unused) { diff --git a/net/netfilter/utils.c b/net/netfilter/utils.c index e8da9a9bba73..55af9f247993 100644 --- a/net/netfilter/utils.c +++ b/net/netfilter/utils.c @@ -180,6 +180,25 @@ int nf_route(struct net *net, struct dst_entry **dst, struct flowi *fl, } EXPORT_SYMBOL_GPL(nf_route); +static int nf_ip_reroute(struct sk_buff *skb, const struct nf_queue_entry *entry) +{ +#ifdef CONFIG_INET + const struct ip_rt_info *rt_info = nf_queue_entry_reroute(entry); + + if (entry->state.hook == NF_INET_LOCAL_OUT) { + const struct iphdr *iph = ip_hdr(skb); + + if (!(iph->tos == rt_info->tos && + skb->mark == rt_info->mark && + iph->daddr == rt_info->daddr && + iph->saddr == rt_info->saddr)) + return ip_route_me_harder(entry->state.net, skb, + RTN_UNSPEC); + } +#endif + return 0; +} + int nf_reroute(struct sk_buff *skb, struct nf_queue_entry *entry) { const struct nf_ipv6_ops *v6ops; -- cgit v1.2.3 From 87eff9af7efb154cc4a940ed12efc803a0bf3fba Mon Sep 17 00:00:00 2001 From: Vladimir Zapolskiy Date: Tue, 22 Jan 2019 23:18:21 +0200 Subject: pinctrl: remove pinctrl/machine.h inclusion from pinctrl/pinconf.h The change adds explicit inclusion of linux/pinctrl/machine.h header to the only needed pinctrl-madera-core.c file, and therefore inclusion of pinctrl/machine.h header from pinctrl/pinconf.h can be removed. The change is preparatory to a follow-up reversal of commit f07512e615dd ("pinctrl/pinconfig: add debug interface"). Signed-off-by: Vladimir Zapolskiy Cc: Charles Keepax Reviewed-by Richard Fitzgerald Signed-off-by: Linus Walleij --- drivers/pinctrl/cirrus/pinctrl-madera-core.c | 1 + include/linux/pinctrl/pinconf.h | 2 -- 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/pinctrl/cirrus/pinctrl-madera-core.c b/drivers/pinctrl/cirrus/pinctrl-madera-core.c index a5dda832024a..7c9694593f79 100644 --- a/drivers/pinctrl/cirrus/pinctrl-madera-core.c +++ b/drivers/pinctrl/cirrus/pinctrl-madera-core.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include diff --git a/include/linux/pinctrl/pinconf.h b/include/linux/pinctrl/pinconf.h index 8dd85d302b90..109468d9d849 100644 --- a/include/linux/pinctrl/pinconf.h +++ b/include/linux/pinctrl/pinconf.h @@ -14,8 +14,6 @@ #ifdef CONFIG_PINCONF -#include - struct pinctrl_dev; struct seq_file; -- cgit v1.2.3 From e73339037f6b6d65e84f5fd42e56dd3cdf0d9e9c Mon Sep 17 00:00:00 2001 From: Vladimir Zapolskiy Date: Tue, 22 Jan 2019 23:18:22 +0200 Subject: pinctrl: remove unused 'pinconf-config' debugfs interface The main goal of the change is to remove .pin_config_dbg_parse_modify callback before a driver with its support appears. So far the in-kernel interface did not attract any users since its introduction 5 years ago. Originally .pin_config_dbg_parse_modify callback and the associated 'pinconf-config' debugfs file were introduced in commit f07512e615dd ("pinctrl/pinconfig: add debug interface"), a short description of 'pinconf-config' usage for debugging can be expressed this way: Write to 'pinconf-config' (see pinconf_dbg_config_write() function): % echo -n modify $map_type $device_name $state_name $pin_name $config > \ /sys/kernel/debug/pinctrl/$pinctrl/pinconf-config It supposes to update a global (therefore single!) 'pinconf_dbg_conf' variable with an alternative setting, the arguments should match an existing pinconf device and some registered pinctrl mapping 'map': * $map_type is either 'config_pin' or 'config_group', it should match 'map->type' value of PIN_MAP_TYPE_CONFIGS_PIN or PIN_MAP_TYPE_CONFIGS_GROUP accordingly, * $device_name should match 'map->dev_name' string value, * $state_name should match 'map->name' string value, * $pin_name should match 'map->data.configs.group_or_pin' string value, If all above has matched, then $config is a new value to be set by calling pinconfops->pin_config_dbg_parse_modify(pctldev, config, matched_config). After a successful write into 'pinconf-config' a user can read the file to get information about that single modified pin configuration. The fact is .pin_config_dbg_parse_modify callback has never been defined in 'struct pinconf_ops' of any pinconf driver, thus an actual modification of a pin or group state on any present pinconf controller does not happen, and it declares that all related code is no more than dead code. I discovered the issue while attempting to add .pin_config_dbg_parse_modify support in some drivers and found that too short 'MAX_NAME_LEN' set by drivers/pinctrl/pinconf.c:372:#define MAX_NAME_LEN 15 is practically insufficient to store a regular pinctrl device name, which are like 'e6060000.pin-controller-sh-pfc' or pin names like 'MX6QDL_PAD_ENET_REF_CLK', thus it is another indicator that the code is barely usable, insufficiently tested and unprepossessing. Of course it might be possible to increase MAX_NAME_LEN, and then add .pin_config_dbg_parse_modify callbacks to the drivers, but the whole idea of such a limited debug option looks inviable. A more flexible way to functionally substitute the original approach is to implicitly or explicitly use pinctrl_select_state() function whenever needed. Signed-off-by: Vladimir Zapolskiy Cc: Laurent Meunier Cc: Masahiro Yamada Cc: Russell King Signed-off-by: Linus Walleij --- drivers/pinctrl/pinconf.c | 222 ---------------------------------------- include/linux/pinctrl/pinconf.h | 4 - 2 files changed, 226 deletions(-) (limited to 'include/linux') diff --git a/drivers/pinctrl/pinconf.c b/drivers/pinctrl/pinconf.c index 2c7229380f08..2678603df14b 100644 --- a/drivers/pinctrl/pinconf.c +++ b/drivers/pinctrl/pinconf.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include @@ -369,225 +368,6 @@ static int pinconf_groups_show(struct seq_file *s, void *what) DEFINE_SHOW_ATTRIBUTE(pinconf_pins); DEFINE_SHOW_ATTRIBUTE(pinconf_groups); -#define MAX_NAME_LEN 15 - -struct dbg_cfg { - enum pinctrl_map_type map_type; - char dev_name[MAX_NAME_LEN + 1]; - char state_name[MAX_NAME_LEN + 1]; - char pin_name[MAX_NAME_LEN + 1]; -}; - -/* - * Goal is to keep this structure as global in order to simply read the - * pinconf-config file after a write to check config is as expected - */ -static struct dbg_cfg pinconf_dbg_conf; - -/** - * pinconf_dbg_config_print() - display the pinctrl config from the pinctrl - * map, of the dev/pin/state that was last written to pinconf-config file. - * @s: string filled in with config description - * @d: not used - */ -static int pinconf_dbg_config_print(struct seq_file *s, void *d) -{ - struct pinctrl_maps *maps_node; - const struct pinctrl_map *map; - const struct pinctrl_map *found = NULL; - struct pinctrl_dev *pctldev; - struct dbg_cfg *dbg = &pinconf_dbg_conf; - int i; - - mutex_lock(&pinctrl_maps_mutex); - - /* Parse the pinctrl map and look for the elected pin/state */ - for_each_maps(maps_node, i, map) { - if (map->type != dbg->map_type) - continue; - if (strcmp(map->dev_name, dbg->dev_name)) - continue; - if (strcmp(map->name, dbg->state_name)) - continue; - - if (!strcmp(map->data.configs.group_or_pin, dbg->pin_name)) { - /* We found the right pin */ - found = map; - break; - } - } - - if (!found) { - seq_printf(s, "No config found for dev/state/pin, expected:\n"); - seq_printf(s, "Searched dev:%s\n", dbg->dev_name); - seq_printf(s, "Searched state:%s\n", dbg->state_name); - seq_printf(s, "Searched pin:%s\n", dbg->pin_name); - seq_printf(s, "Use: modify config_pin "\ - " \n"); - goto exit; - } - - pctldev = get_pinctrl_dev_from_devname(found->ctrl_dev_name); - seq_printf(s, "Dev %s has config of %s in state %s:\n", - dbg->dev_name, dbg->pin_name, dbg->state_name); - pinconf_show_config(s, pctldev, found->data.configs.configs, - found->data.configs.num_configs); - -exit: - mutex_unlock(&pinctrl_maps_mutex); - - return 0; -} - -/** - * pinconf_dbg_config_write() - modify the pinctrl config in the pinctrl - * map, of a dev/pin/state entry based on user entries to pinconf-config - * @user_buf: contains the modification request with expected format: - * modify - * modify is literal string, alternatives like add/delete not supported yet - * is the configuration to be changed. Supported configs are - * "config_pin" or "config_group", alternatives like config_mux are not - * supported yet. - * are values that should match the pinctrl-maps - * reflects the new config and is driver dependent - */ -static ssize_t pinconf_dbg_config_write(struct file *file, - const char __user *user_buf, size_t count, loff_t *ppos) -{ - struct pinctrl_maps *maps_node; - const struct pinctrl_map *map; - const struct pinctrl_map *found = NULL; - struct pinctrl_dev *pctldev; - const struct pinconf_ops *confops = NULL; - struct dbg_cfg *dbg = &pinconf_dbg_conf; - const struct pinctrl_map_configs *configs; - char config[MAX_NAME_LEN + 1]; - char buf[128]; - char *b = &buf[0]; - int buf_size; - char *token; - int i; - - /* Get userspace string and assure termination */ - buf_size = min(count, sizeof(buf) - 1); - if (copy_from_user(buf, user_buf, buf_size)) - return -EFAULT; - buf[buf_size] = 0; - - /* - * need to parse entry and extract parameters: - * modify configs_pin devicename state pinname newvalue - */ - - /* Get arg: 'modify' */ - token = strsep(&b, " "); - if (!token) - return -EINVAL; - if (strcmp(token, "modify")) - return -EINVAL; - - /* - * Get arg type: "config_pin" and "config_group" - * types are supported so far - */ - token = strsep(&b, " "); - if (!token) - return -EINVAL; - if (!strcmp(token, "config_pin")) - dbg->map_type = PIN_MAP_TYPE_CONFIGS_PIN; - else if (!strcmp(token, "config_group")) - dbg->map_type = PIN_MAP_TYPE_CONFIGS_GROUP; - else - return -EINVAL; - - /* get arg 'device_name' */ - token = strsep(&b, " "); - if (!token) - return -EINVAL; - if (strlen(token) >= MAX_NAME_LEN) - return -EINVAL; - strncpy(dbg->dev_name, token, MAX_NAME_LEN); - - /* get arg 'state_name' */ - token = strsep(&b, " "); - if (!token) - return -EINVAL; - if (strlen(token) >= MAX_NAME_LEN) - return -EINVAL; - strncpy(dbg->state_name, token, MAX_NAME_LEN); - - /* get arg 'pin_name' */ - token = strsep(&b, " "); - if (!token) - return -EINVAL; - if (strlen(token) >= MAX_NAME_LEN) - return -EINVAL; - strncpy(dbg->pin_name, token, MAX_NAME_LEN); - - /* get new_value of config' */ - token = strsep(&b, " "); - if (!token) - return -EINVAL; - if (strlen(token) >= MAX_NAME_LEN) - return -EINVAL; - strncpy(config, token, MAX_NAME_LEN); - - mutex_lock(&pinctrl_maps_mutex); - - /* Parse the pinctrl map and look for the selected dev/state/pin */ - for_each_maps(maps_node, i, map) { - if (strcmp(map->dev_name, dbg->dev_name)) - continue; - if (map->type != dbg->map_type) - continue; - if (strcmp(map->name, dbg->state_name)) - continue; - - /* we found the right pin / state, so overwrite config */ - if (!strcmp(map->data.configs.group_or_pin, dbg->pin_name)) { - found = map; - break; - } - } - - if (!found) { - count = -EINVAL; - goto exit; - } - - pctldev = get_pinctrl_dev_from_devname(found->ctrl_dev_name); - if (pctldev) - confops = pctldev->desc->confops; - - if (confops && confops->pin_config_dbg_parse_modify) { - configs = &found->data.configs; - for (i = 0; i < configs->num_configs; i++) { - confops->pin_config_dbg_parse_modify(pctldev, - config, - &configs->configs[i]); - } - } - -exit: - mutex_unlock(&pinctrl_maps_mutex); - - return count; -} - -static int pinconf_dbg_config_open(struct inode *inode, struct file *file) -{ - return single_open(file, pinconf_dbg_config_print, inode->i_private); -} - -static const struct file_operations pinconf_dbg_pinconfig_fops = { - .open = pinconf_dbg_config_open, - .write = pinconf_dbg_config_write, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, - .owner = THIS_MODULE, -}; - void pinconf_init_device_debugfs(struct dentry *devroot, struct pinctrl_dev *pctldev) { @@ -595,8 +375,6 @@ void pinconf_init_device_debugfs(struct dentry *devroot, devroot, pctldev, &pinconf_pins_fops); debugfs_create_file("pinconf-groups", S_IFREG | S_IRUGO, devroot, pctldev, &pinconf_groups_fops); - debugfs_create_file("pinconf-config", (S_IRUGO | S_IWUSR | S_IWGRP), - devroot, pctldev, &pinconf_dbg_pinconfig_fops); } #endif diff --git a/include/linux/pinctrl/pinconf.h b/include/linux/pinctrl/pinconf.h index 109468d9d849..93c9dd133e9d 100644 --- a/include/linux/pinctrl/pinconf.h +++ b/include/linux/pinctrl/pinconf.h @@ -29,7 +29,6 @@ struct seq_file; * @pin_config_group_get: get configurations for an entire pin group; should * return -ENOTSUPP and -EINVAL using the same rules as pin_config_get. * @pin_config_group_set: configure all pins in a group - * @pin_config_dbg_parse_modify: optional debugfs to modify a pin configuration * @pin_config_dbg_show: optional debugfs display hook that will provide * per-device info for a certain pin in debugfs * @pin_config_group_dbg_show: optional debugfs display hook that will provide @@ -55,9 +54,6 @@ struct pinconf_ops { unsigned selector, unsigned long *configs, unsigned num_configs); - int (*pin_config_dbg_parse_modify) (struct pinctrl_dev *pctldev, - const char *arg, - unsigned long *config); void (*pin_config_dbg_show) (struct pinctrl_dev *pctldev, struct seq_file *s, unsigned offset); -- cgit v1.2.3 From 64515dc899df898991b2b7e56f69f56f014ea888 Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Mon, 28 Jan 2019 19:27:55 +0200 Subject: qed: Add infrastructure for error detection and recovery This patch adds the detection and handling of a parity error ("process kill event"), including the update of the protocol drivers, and the prevention of any HW access that will lead to device access towards the host while recovery is in progress. It also provides the means for the protocol drivers to trigger a recovery process on their decision. Signed-off-by: Tomer Tayar Signed-off-by: Ariel Elior Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed.h | 4 ++ drivers/net/ethernet/qlogic/qed/qed_dev.c | 41 +++++++---- drivers/net/ethernet/qlogic/qed/qed_hsi.h | 2 +- drivers/net/ethernet/qlogic/qed/qed_hw.c | 11 +++ drivers/net/ethernet/qlogic/qed/qed_main.c | 30 ++++++++ drivers/net/ethernet/qlogic/qed/qed_mcp.c | 94 ++++++++++++++++++++++++++ drivers/net/ethernet/qlogic/qed/qed_mcp.h | 32 +++++++++ drivers/net/ethernet/qlogic/qed/qed_reg_addr.h | 2 + drivers/net/ethernet/qlogic/qed/qed_spq.c | 22 ++++++ drivers/net/ethernet/qlogic/qed/qed_sriov.c | 9 ++- include/linux/qed/qed_if.h | 20 ++++++ 11 files changed, 251 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h index b352e313e1f6..3b0955d34716 100644 --- a/drivers/net/ethernet/qlogic/qed/qed.h +++ b/drivers/net/ethernet/qlogic/qed/qed.h @@ -804,6 +804,9 @@ struct qed_dev { u32 mcp_nvm_resp; + /* Recovery */ + bool recov_in_prog; + /* Linux specific here */ struct qede_dev *edev; struct pci_dev *pdev; @@ -943,6 +946,7 @@ void qed_link_update(struct qed_hwfn *hwfn, struct qed_ptt *ptt); u32 qed_unzip_data(struct qed_hwfn *p_hwfn, u32 input_len, u8 *input_buf, u32 max_size, u8 *unzip_buf); +void qed_schedule_recovery_handler(struct qed_hwfn *p_hwfn); void qed_get_protocol_stats(struct qed_dev *cdev, enum qed_mcp_protocol_type type, union qed_mcp_protocol_stats *stats); diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c index fa5f07e65672..b17003d9066c 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dev.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c @@ -2140,6 +2140,11 @@ int qed_hw_init(struct qed_dev *cdev, struct qed_hw_init_params *p_params) "Load request was sent. Load code: 0x%x\n", load_code); + /* Only relevant for recovery: + * Clear the indication after LOAD_REQ is responded by the MFW. + */ + cdev->recov_in_prog = false; + qed_mcp_set_capabilities(p_hwfn, p_hwfn->p_main_ptt); qed_reset_mb_shadow(p_hwfn, p_hwfn->p_main_ptt); @@ -2291,6 +2296,9 @@ static void qed_hw_timers_stop(struct qed_dev *cdev, qed_wr(p_hwfn, p_ptt, TM_REG_PF_ENABLE_CONN, 0x0); qed_wr(p_hwfn, p_ptt, TM_REG_PF_ENABLE_TASK, 0x0); + if (cdev->recov_in_prog) + return; + for (i = 0; i < QED_HW_STOP_RETRY_LIMIT; i++) { if ((!qed_rd(p_hwfn, p_ptt, TM_REG_PF_SCAN_ACTIVE_CONN)) && @@ -2353,12 +2361,14 @@ int qed_hw_stop(struct qed_dev *cdev) p_hwfn->hw_init_done = false; /* Send unload command to MCP */ - rc = qed_mcp_unload_req(p_hwfn, p_ptt); - if (rc) { - DP_NOTICE(p_hwfn, - "Failed sending a UNLOAD_REQ command. rc = %d.\n", - rc); - rc2 = -EINVAL; + if (!cdev->recov_in_prog) { + rc = qed_mcp_unload_req(p_hwfn, p_ptt); + if (rc) { + DP_NOTICE(p_hwfn, + "Failed sending a UNLOAD_REQ command. rc = %d.\n", + rc); + rc2 = -EINVAL; + } } qed_slowpath_irq_sync(p_hwfn); @@ -2400,16 +2410,18 @@ int qed_hw_stop(struct qed_dev *cdev) qed_wr(p_hwfn, p_ptt, DORQ_REG_PF_DB_ENABLE, 0); qed_wr(p_hwfn, p_ptt, QM_REG_PF_EN, 0); - qed_mcp_unload_done(p_hwfn, p_ptt); - if (rc) { - DP_NOTICE(p_hwfn, - "Failed sending a UNLOAD_DONE command. rc = %d.\n", - rc); - rc2 = -EINVAL; + if (!cdev->recov_in_prog) { + rc = qed_mcp_unload_done(p_hwfn, p_ptt); + if (rc) { + DP_NOTICE(p_hwfn, + "Failed sending a UNLOAD_DONE command. rc = %d.\n", + rc); + rc2 = -EINVAL; + } } } - if (IS_PF(cdev)) { + if (IS_PF(cdev) && !cdev->recov_in_prog) { p_hwfn = QED_LEADING_HWFN(cdev); p_ptt = QED_LEADING_HWFN(cdev)->p_main_ptt; @@ -3459,6 +3471,7 @@ static int qed_hw_prepare_single(struct qed_hwfn *p_hwfn, void __iomem *p_doorbells, enum qed_pci_personality personality) { + struct qed_dev *cdev = p_hwfn->cdev; int rc = 0; /* Split PCI bars evenly between hwfns */ @@ -3511,7 +3524,7 @@ static int qed_hw_prepare_single(struct qed_hwfn *p_hwfn, /* Sending a mailbox to the MFW should be done after qed_get_hw_info() * is called as it sets the ports number in an engine. */ - if (IS_LEAD_HWFN(p_hwfn)) { + if (IS_LEAD_HWFN(p_hwfn) && !cdev->recov_in_prog) { rc = qed_mcp_initiate_pf_flr(p_hwfn, p_hwfn->p_main_ptt); if (rc) DP_NOTICE(p_hwfn, "Failed to initiate PF FLR\n"); diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h index b13cfb449d8f..417121e74ee9 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h +++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h @@ -12827,7 +12827,7 @@ enum MFW_DRV_MSG_TYPE { MFW_DRV_MSG_LLDP_DATA_UPDATED, MFW_DRV_MSG_DCBX_REMOTE_MIB_UPDATED, MFW_DRV_MSG_DCBX_OPERATIONAL_MIB_UPDATED, - MFW_DRV_MSG_RESERVED4, + MFW_DRV_MSG_ERROR_RECOVERY, MFW_DRV_MSG_BW_UPDATE, MFW_DRV_MSG_S_TAG_UPDATE, MFW_DRV_MSG_GET_LAN_STATS, diff --git a/drivers/net/ethernet/qlogic/qed/qed_hw.c b/drivers/net/ethernet/qlogic/qed/qed_hw.c index 70504dcf4087..72ec1c6bdf70 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_hw.c +++ b/drivers/net/ethernet/qlogic/qed/qed_hw.c @@ -703,6 +703,17 @@ static int qed_dmae_execute_command(struct qed_hwfn *p_hwfn, int qed_status = 0; u32 offset = 0; + if (p_hwfn->cdev->recov_in_prog) { + DP_VERBOSE(p_hwfn, + NETIF_MSG_HW, + "Recovery is in progress. Avoid DMAE transaction [{src: addr 0x%llx, type %d}, {dst: addr 0x%llx, type %d}, size %d].\n", + src_addr, src_type, dst_addr, dst_type, + size_in_dwords); + + /* Let the flow complete w/o any error handling */ + return 0; + } + qed_dmae_opcode(p_hwfn, (src_type == QED_DMAE_ADDRESS_GRC), (dst_type == QED_DMAE_ADDRESS_GRC), diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index 6adf5bda9811..b47352643fb5 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -359,6 +359,8 @@ static struct qed_dev *qed_probe(struct pci_dev *pdev, qed_init_dp(cdev, params->dp_module, params->dp_level); + cdev->recov_in_prog = params->recov_in_prog; + rc = qed_init_pci(cdev, pdev); if (rc) { DP_ERR(cdev, "init pci failed\n"); @@ -2203,6 +2205,15 @@ static int qed_nvm_get_image(struct qed_dev *cdev, enum qed_nvm_images type, return qed_mcp_get_nvm_image(hwfn, type, buf, len); } +void qed_schedule_recovery_handler(struct qed_hwfn *p_hwfn) +{ + struct qed_common_cb_ops *ops = p_hwfn->cdev->protocol_ops.common; + void *cookie = p_hwfn->cdev->ops_cookie; + + if (ops && ops->schedule_recovery_handler) + ops->schedule_recovery_handler(cookie); +} + static int qed_set_coalesce(struct qed_dev *cdev, u16 rx_coal, u16 tx_coal, void *handle) { @@ -2226,6 +2237,23 @@ static int qed_set_led(struct qed_dev *cdev, enum qed_led_mode mode) return status; } +static int qed_recovery_process(struct qed_dev *cdev) +{ + struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev); + struct qed_ptt *p_ptt; + int rc = 0; + + p_ptt = qed_ptt_acquire(p_hwfn); + if (!p_ptt) + return -EAGAIN; + + rc = qed_start_recovery_process(p_hwfn, p_ptt); + + qed_ptt_release(p_hwfn, p_ptt); + + return rc; +} + static int qed_update_wol(struct qed_dev *cdev, bool enabled) { struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev); @@ -2380,6 +2408,8 @@ const struct qed_common_ops qed_common_ops_pass = { .nvm_get_image = &qed_nvm_get_image, .set_coalesce = &qed_set_coalesce, .set_led = &qed_set_led, + .recovery_process = &qed_recovery_process, + .recovery_prolog = &qed_recovery_prolog, .update_drv_state = &qed_update_drv_state, .update_mac = &qed_update_mac, .update_mtu = &qed_update_mtu, diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c index 1024484d7dd8..bb8541847aa5 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c @@ -1549,6 +1549,60 @@ int qed_mcp_set_link(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, bool b_up) return 0; } +u32 qed_get_process_kill_counter(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt) +{ + u32 path_offsize_addr, path_offsize, path_addr, proc_kill_cnt; + + if (IS_VF(p_hwfn->cdev)) + return -EINVAL; + + path_offsize_addr = SECTION_OFFSIZE_ADDR(p_hwfn->mcp_info->public_base, + PUBLIC_PATH); + path_offsize = qed_rd(p_hwfn, p_ptt, path_offsize_addr); + path_addr = SECTION_ADDR(path_offsize, QED_PATH_ID(p_hwfn)); + + proc_kill_cnt = qed_rd(p_hwfn, p_ptt, + path_addr + + offsetof(struct public_path, process_kill)) & + PROCESS_KILL_COUNTER_MASK; + + return proc_kill_cnt; +} + +static void qed_mcp_handle_process_kill(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt) +{ + struct qed_dev *cdev = p_hwfn->cdev; + u32 proc_kill_cnt; + + /* Prevent possible attentions/interrupts during the recovery handling + * and till its load phase, during which they will be re-enabled. + */ + qed_int_igu_disable_int(p_hwfn, p_ptt); + + DP_NOTICE(p_hwfn, "Received a process kill indication\n"); + + /* The following operations should be done once, and thus in CMT mode + * are carried out by only the first HW function. + */ + if (p_hwfn != QED_LEADING_HWFN(cdev)) + return; + + if (cdev->recov_in_prog) { + DP_NOTICE(p_hwfn, + "Ignoring the indication since a recovery process is already in progress\n"); + return; + } + + cdev->recov_in_prog = true; + + proc_kill_cnt = qed_get_process_kill_counter(p_hwfn, p_ptt); + DP_NOTICE(p_hwfn, "Process kill counter: %d\n", proc_kill_cnt); + + qed_schedule_recovery_handler(p_hwfn); +} + static void qed_mcp_send_protocol_stats(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, enum MFW_DRV_MSG_TYPE type) @@ -1779,6 +1833,9 @@ int qed_mcp_handle_events(struct qed_hwfn *p_hwfn, case MFW_DRV_MSG_TRANSCEIVER_STATE_CHANGE: qed_mcp_handle_transceiver_change(p_hwfn, p_ptt); break; + case MFW_DRV_MSG_ERROR_RECOVERY: + qed_mcp_handle_process_kill(p_hwfn, p_ptt); + break; case MFW_DRV_MSG_GET_LAN_STATS: case MFW_DRV_MSG_GET_FCOE_STATS: case MFW_DRV_MSG_GET_ISCSI_STATS: @@ -2324,6 +2381,43 @@ int qed_mcp_get_flash_size(struct qed_hwfn *p_hwfn, return 0; } +int qed_start_recovery_process(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) +{ + struct qed_dev *cdev = p_hwfn->cdev; + + if (cdev->recov_in_prog) { + DP_NOTICE(p_hwfn, + "Avoid triggering a recovery since such a process is already in progress\n"); + return -EAGAIN; + } + + DP_NOTICE(p_hwfn, "Triggering a recovery process\n"); + qed_wr(p_hwfn, p_ptt, MISC_REG_AEU_GENERAL_ATTN_35, 0x1); + + return 0; +} + +#define QED_RECOVERY_PROLOG_SLEEP_MS 100 + +int qed_recovery_prolog(struct qed_dev *cdev) +{ + struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev); + struct qed_ptt *p_ptt = p_hwfn->p_main_ptt; + int rc; + + /* Allow ongoing PCIe transactions to complete */ + msleep(QED_RECOVERY_PROLOG_SLEEP_MS); + + /* Clear the PF's internal FID_enable in the PXP */ + rc = qed_pglueb_set_pfid_enable(p_hwfn, p_ptt, false); + if (rc) + DP_NOTICE(p_hwfn, + "qed_pglueb_set_pfid_enable() failed. rc = %d.\n", + rc); + + return rc; +} + static int qed_mcp_config_vf_msix_bb(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, u8 vf_id, u8 num) diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.h b/drivers/net/ethernet/qlogic/qed/qed_mcp.h index 387c5e649136..6e1d72a669ae 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.h +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.h @@ -440,6 +440,38 @@ qed_mcp_send_drv_version(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, struct qed_mcp_drv_version *p_ver); +/** + * @brief Read the MFW process kill counter + * + * @param p_hwfn + * @param p_ptt + * + * @return u32 + */ +u32 qed_get_process_kill_counter(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt); + +/** + * @brief Trigger a recovery process + * + * @param p_hwfn + * @param p_ptt + * + * @return int + */ +int qed_start_recovery_process(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt); + +/** + * @brief A recovery handler must call this function as its first step. + * It is assumed that the handler is not run from an interrupt context. + * + * @param cdev + * @param p_ptt + * + * @return int + */ +int qed_recovery_prolog(struct qed_dev *cdev); + /** * @brief Notify MFW about the change in base device properties * diff --git a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h index 8939ed6e08b7..5ce825ca5f24 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h +++ b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h @@ -518,6 +518,8 @@ 0x180824UL #define MISC_REG_AEU_GENERAL_ATTN_0 \ 0x008400UL +#define MISC_REG_AEU_GENERAL_ATTN_35 \ + 0x00848cUL #define CAU_REG_SB_ADDR_MEMORY \ 0x1c8000UL #define CAU_REG_SB_VAR_MEMORY \ diff --git a/drivers/net/ethernet/qlogic/qed/qed_spq.c b/drivers/net/ethernet/qlogic/qed/qed_spq.c index eb88bbc6b193..3e0f7c46bb1b 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_spq.c +++ b/drivers/net/ethernet/qlogic/qed/qed_spq.c @@ -790,6 +790,17 @@ static int qed_spq_pend_post(struct qed_hwfn *p_hwfn) SPQ_HIGH_PRI_RESERVE_DEFAULT); } +static void qed_spq_recov_set_ret_code(struct qed_spq_entry *p_ent, + u8 *fw_return_code) +{ + if (!fw_return_code) + return; + + if (p_ent->elem.hdr.protocol_id == PROTOCOLID_ROCE || + p_ent->elem.hdr.protocol_id == PROTOCOLID_IWARP) + *fw_return_code = RDMA_RETURN_OK; +} + /* Avoid overriding of SPQ entries when getting out-of-order completions, by * marking the completions in a bitmap and increasing the chain consumer only * for the first successive completed entries. @@ -825,6 +836,17 @@ int qed_spq_post(struct qed_hwfn *p_hwfn, return -EINVAL; } + if (p_hwfn->cdev->recov_in_prog) { + DP_VERBOSE(p_hwfn, + QED_MSG_SPQ, + "Recovery is in progress. Skip spq post [cmd %02x protocol %02x]\n", + p_ent->elem.hdr.cmd_id, p_ent->elem.hdr.protocol_id); + + /* Let the flow complete w/o any error handling */ + qed_spq_recov_set_ret_code(p_ent, fw_return_code); + return 0; + } + /* Complete the entry */ rc = qed_spq_fill_entry(p_hwfn, p_ent); diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.c b/drivers/net/ethernet/qlogic/qed/qed_sriov.c index ca6290fa0f30..71e28be58102 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sriov.c +++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.c @@ -4447,6 +4447,13 @@ int qed_sriov_disable(struct qed_dev *cdev, bool pci_enabled) if (cdev->p_iov_info && cdev->p_iov_info->num_vfs && pci_enabled) pci_disable_sriov(cdev->pdev); + if (cdev->recov_in_prog) { + DP_VERBOSE(cdev, + QED_MSG_IOV, + "Skip SRIOV disable operations in the device since a recovery is in progress\n"); + goto out; + } + for_each_hwfn(cdev, i) { struct qed_hwfn *hwfn = &cdev->hwfns[i]; struct qed_ptt *ptt = qed_ptt_acquire(hwfn); @@ -4486,7 +4493,7 @@ int qed_sriov_disable(struct qed_dev *cdev, bool pci_enabled) qed_ptt_release(hwfn, ptt); } - +out: qed_iov_set_vfs_to_disable(cdev, false); return 0; diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index 5f818fda96bd..35170f74ed80 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -763,6 +763,7 @@ struct qed_probe_params { u32 dp_module; u8 dp_level; bool is_vf; + bool recov_in_prog; }; #define QED_DRV_VER_STR_SIZE 12 @@ -809,6 +810,7 @@ struct qed_common_cb_ops { void (*arfs_filter_op)(void *dev, void *fltr, u8 fw_rc); void (*link_update)(void *dev, struct qed_link_output *link); + void (*schedule_recovery_handler)(void *dev); void (*dcbx_aen)(void *dev, struct qed_dcbx_get *get, u32 mib_type); void (*get_generic_tlv_data)(void *dev, struct qed_generic_tlvs *data); void (*get_protocol_tlv_data)(void *dev, void *data); @@ -1056,6 +1058,24 @@ struct qed_common_ops { int (*db_recovery_del)(struct qed_dev *cdev, void __iomem *db_addr, void *db_data); +/** + * @brief recovery_process - Trigger a recovery process + * + * @param cdev + * + * @return 0 on success, error otherwise. + */ + int (*recovery_process)(struct qed_dev *cdev); + +/** + * @brief recovery_prolog - Execute the prolog operations of a recovery process + * + * @param cdev + * + * @return 0 on success, error otherwise. + */ + int (*recovery_prolog)(struct qed_dev *cdev); + /** * @brief update_drv_state - API to inform the change in the driver state. * -- cgit v1.2.3 From ccc67ef50b9085b895738d7720840eb6fe98745e Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Mon, 28 Jan 2019 19:27:56 +0200 Subject: qede: Error recovery process This patch adds the error recovery process in the qede driver. The process includes a partial/customized driver unload and load, which allows it to look like a short suspend period to the kernel while preserving the net devices' state. Signed-off-by: Tomer Tayar Signed-off-by: Ariel Elior Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qede/qede.h | 3 + drivers/net/ethernet/qlogic/qede/qede_main.c | 292 ++++++++++++++++++++++----- drivers/net/ethernet/qlogic/qede/qede_rdma.c | 63 ++++-- include/linux/qed/qede_rdma.h | 10 +- 4 files changed, 294 insertions(+), 74 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h index 613249d1e967..843416404aeb 100644 --- a/drivers/net/ethernet/qlogic/qede/qede.h +++ b/drivers/net/ethernet/qlogic/qede/qede.h @@ -162,6 +162,7 @@ struct qede_rdma_dev { struct list_head entry; struct list_head rdma_event_list; struct workqueue_struct *rdma_wq; + bool exp_recovery; }; struct qede_ptp; @@ -264,6 +265,7 @@ struct qede_dev { enum QEDE_STATE { QEDE_STATE_CLOSED, QEDE_STATE_OPEN, + QEDE_STATE_RECOVERY, }; #define HILO_U64(hi, lo) ((((u64)(hi)) << 32) + (lo)) @@ -462,6 +464,7 @@ struct qede_fastpath { #define QEDE_CSUM_UNNECESSARY BIT(1) #define QEDE_TUNN_CSUM_UNNECESSARY BIT(2) +#define QEDE_SP_RECOVERY 0 #define QEDE_SP_RX_MODE 1 #ifdef CONFIG_RFS_ACCEL diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index 5a74fcbdbc2b..6b4d96635238 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -133,23 +133,12 @@ static int qede_probe(struct pci_dev *pdev, const struct pci_device_id *id); static void qede_remove(struct pci_dev *pdev); static void qede_shutdown(struct pci_dev *pdev); static void qede_link_update(void *dev, struct qed_link_output *link); +static void qede_schedule_recovery_handler(void *dev); +static void qede_recovery_handler(struct qede_dev *edev); static void qede_get_eth_tlv_data(void *edev, void *data); static void qede_get_generic_tlv_data(void *edev, struct qed_generic_tlvs *data); -/* The qede lock is used to protect driver state change and driver flows that - * are not reentrant. - */ -void __qede_lock(struct qede_dev *edev) -{ - mutex_lock(&edev->qede_lock); -} - -void __qede_unlock(struct qede_dev *edev) -{ - mutex_unlock(&edev->qede_lock); -} - #ifdef CONFIG_QED_SRIOV static int qede_set_vf_vlan(struct net_device *ndev, int vf, u16 vlan, u8 qos, __be16 vlan_proto) @@ -231,6 +220,7 @@ static struct qed_eth_cb_ops qede_ll_ops = { .arfs_filter_op = qede_arfs_filter_op, #endif .link_update = qede_link_update, + .schedule_recovery_handler = qede_schedule_recovery_handler, .get_generic_tlv_data = qede_get_generic_tlv_data, .get_protocol_tlv_data = qede_get_eth_tlv_data, }, @@ -950,11 +940,57 @@ err: return -ENOMEM; } +/* The qede lock is used to protect driver state change and driver flows that + * are not reentrant. + */ +void __qede_lock(struct qede_dev *edev) +{ + mutex_lock(&edev->qede_lock); +} + +void __qede_unlock(struct qede_dev *edev) +{ + mutex_unlock(&edev->qede_lock); +} + +/* This version of the lock should be used when acquiring the RTNL lock is also + * needed in addition to the internal qede lock. + */ +void qede_lock(struct qede_dev *edev) +{ + rtnl_lock(); + __qede_lock(edev); +} + +void qede_unlock(struct qede_dev *edev) +{ + __qede_unlock(edev); + rtnl_unlock(); +} + static void qede_sp_task(struct work_struct *work) { struct qede_dev *edev = container_of(work, struct qede_dev, sp_task.work); + /* The locking scheme depends on the specific flag: + * In case of QEDE_SP_RECOVERY, acquiring the RTNL lock is required to + * ensure that ongoing flows are ended and new ones are not started. + * In other cases - only the internal qede lock should be acquired. + */ + + if (test_and_clear_bit(QEDE_SP_RECOVERY, &edev->sp_flags)) { +#ifdef CONFIG_QED_SRIOV + /* SRIOV must be disabled outside the lock to avoid a deadlock. + * The recovery of the active VFs is currently not supported. + */ + qede_sriov_configure(edev->pdev, 0); +#endif + qede_lock(edev); + qede_recovery_handler(edev); + qede_unlock(edev); + } + __qede_lock(edev); if (test_and_clear_bit(QEDE_SP_RX_MODE, &edev->sp_flags)) @@ -1031,6 +1067,7 @@ static void qede_log_probe(struct qede_dev *edev) enum qede_probe_mode { QEDE_PROBE_NORMAL, + QEDE_PROBE_RECOVERY, }; static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level, @@ -1051,6 +1088,7 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level, probe_params.dp_module = dp_module; probe_params.dp_level = dp_level; probe_params.is_vf = is_vf; + probe_params.recov_in_prog = (mode == QEDE_PROBE_RECOVERY); cdev = qed_ops->common->probe(pdev, &probe_params); if (!cdev) { rc = -ENODEV; @@ -1078,11 +1116,20 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level, if (rc) goto err2; - edev = qede_alloc_etherdev(cdev, pdev, &dev_info, dp_module, - dp_level); - if (!edev) { - rc = -ENOMEM; - goto err2; + if (mode != QEDE_PROBE_RECOVERY) { + edev = qede_alloc_etherdev(cdev, pdev, &dev_info, dp_module, + dp_level); + if (!edev) { + rc = -ENOMEM; + goto err2; + } + } else { + struct net_device *ndev = pci_get_drvdata(pdev); + + edev = netdev_priv(ndev); + edev->cdev = cdev; + memset(&edev->stats, 0, sizeof(edev->stats)); + memcpy(&edev->dev_info, &dev_info, sizeof(dev_info)); } if (is_vf) @@ -1090,28 +1137,31 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level, qede_init_ndev(edev); - rc = qede_rdma_dev_add(edev); + rc = qede_rdma_dev_add(edev, (mode == QEDE_PROBE_RECOVERY)); if (rc) goto err3; - /* Prepare the lock prior to the registration of the netdev, - * as once it's registered we might reach flows requiring it - * [it's even possible to reach a flow needing it directly - * from there, although it's unlikely]. - */ - INIT_DELAYED_WORK(&edev->sp_task, qede_sp_task); - mutex_init(&edev->qede_lock); - rc = register_netdev(edev->ndev); - if (rc) { - DP_NOTICE(edev, "Cannot register net-device\n"); - goto err4; + if (mode != QEDE_PROBE_RECOVERY) { + /* Prepare the lock prior to the registration of the netdev, + * as once it's registered we might reach flows requiring it + * [it's even possible to reach a flow needing it directly + * from there, although it's unlikely]. + */ + INIT_DELAYED_WORK(&edev->sp_task, qede_sp_task); + mutex_init(&edev->qede_lock); + + rc = register_netdev(edev->ndev); + if (rc) { + DP_NOTICE(edev, "Cannot register net-device\n"); + goto err4; + } } edev->ops->common->set_name(cdev, edev->ndev->name); /* PTP not supported on VFs */ if (!is_vf) - qede_ptp_enable(edev, true); + qede_ptp_enable(edev, (mode == QEDE_PROBE_NORMAL)); edev->ops->register_ops(cdev, &qede_ll_ops, edev); @@ -1126,7 +1176,7 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level, return 0; err4: - qede_rdma_dev_remove(edev); + qede_rdma_dev_remove(edev, (mode == QEDE_PROBE_RECOVERY)); err3: free_netdev(edev->ndev); err2: @@ -1162,6 +1212,7 @@ static int qede_probe(struct pci_dev *pdev, const struct pci_device_id *id) enum qede_remove_mode { QEDE_REMOVE_NORMAL, + QEDE_REMOVE_RECOVERY, }; static void __qede_remove(struct pci_dev *pdev, enum qede_remove_mode mode) @@ -1172,15 +1223,19 @@ static void __qede_remove(struct pci_dev *pdev, enum qede_remove_mode mode) DP_INFO(edev, "Starting qede_remove\n"); - qede_rdma_dev_remove(edev); - unregister_netdev(ndev); - cancel_delayed_work_sync(&edev->sp_task); + qede_rdma_dev_remove(edev, (mode == QEDE_REMOVE_RECOVERY)); - qede_ptp_disable(edev); + if (mode != QEDE_REMOVE_RECOVERY) { + unregister_netdev(ndev); - edev->ops->common->set_power_state(cdev, PCI_D0); + cancel_delayed_work_sync(&edev->sp_task); - pci_set_drvdata(pdev, NULL); + edev->ops->common->set_power_state(cdev, PCI_D0); + + pci_set_drvdata(pdev, NULL); + } + + qede_ptp_disable(edev); /* Use global ops since we've freed edev */ qed_ops->common->slowpath_stop(cdev); @@ -1194,7 +1249,8 @@ static void __qede_remove(struct pci_dev *pdev, enum qede_remove_mode mode) * [e.g., QED register callbacks] won't break anything when * accessing the netdevice. */ - free_netdev(ndev); + if (mode != QEDE_REMOVE_RECOVERY) + free_netdev(ndev); dev_info(&pdev->dev, "Ending qede_remove successfully\n"); } @@ -1539,6 +1595,58 @@ static int qede_alloc_mem_load(struct qede_dev *edev) return 0; } +static void qede_empty_tx_queue(struct qede_dev *edev, + struct qede_tx_queue *txq) +{ + unsigned int pkts_compl = 0, bytes_compl = 0; + struct netdev_queue *netdev_txq; + int rc, len = 0; + + netdev_txq = netdev_get_tx_queue(edev->ndev, txq->ndev_txq_id); + + while (qed_chain_get_cons_idx(&txq->tx_pbl) != + qed_chain_get_prod_idx(&txq->tx_pbl)) { + DP_VERBOSE(edev, NETIF_MSG_IFDOWN, + "Freeing a packet on tx queue[%d]: chain_cons 0x%x, chain_prod 0x%x\n", + txq->index, qed_chain_get_cons_idx(&txq->tx_pbl), + qed_chain_get_prod_idx(&txq->tx_pbl)); + + rc = qede_free_tx_pkt(edev, txq, &len); + if (rc) { + DP_NOTICE(edev, + "Failed to free a packet on tx queue[%d]: chain_cons 0x%x, chain_prod 0x%x\n", + txq->index, + qed_chain_get_cons_idx(&txq->tx_pbl), + qed_chain_get_prod_idx(&txq->tx_pbl)); + break; + } + + bytes_compl += len; + pkts_compl++; + txq->sw_tx_cons++; + } + + netdev_tx_completed_queue(netdev_txq, pkts_compl, bytes_compl); +} + +static void qede_empty_tx_queues(struct qede_dev *edev) +{ + int i; + + for_each_queue(i) + if (edev->fp_array[i].type & QEDE_FASTPATH_TX) { + int cos; + + for_each_cos_in_txq(edev, cos) { + struct qede_fastpath *fp; + + fp = &edev->fp_array[i]; + qede_empty_tx_queue(edev, + &fp->txq[cos]); + } + } +} + /* This function inits fp content and resets the SB, RXQ and TXQ structures */ static void qede_init_fp(struct qede_dev *edev) { @@ -2053,6 +2161,7 @@ out: enum qede_unload_mode { QEDE_UNLOAD_NORMAL, + QEDE_UNLOAD_RECOVERY, }; static void qede_unload(struct qede_dev *edev, enum qede_unload_mode mode, @@ -2068,7 +2177,8 @@ static void qede_unload(struct qede_dev *edev, enum qede_unload_mode mode, clear_bit(QEDE_FLAGS_LINK_REQUESTED, &edev->flags); - edev->state = QEDE_STATE_CLOSED; + if (mode != QEDE_UNLOAD_RECOVERY) + edev->state = QEDE_STATE_CLOSED; qede_rdma_dev_event_close(edev); @@ -2076,17 +2186,20 @@ static void qede_unload(struct qede_dev *edev, enum qede_unload_mode mode, netif_tx_disable(edev->ndev); netif_carrier_off(edev->ndev); - /* Reset the link */ - memset(&link_params, 0, sizeof(link_params)); - link_params.link_up = false; - edev->ops->common->set_link(edev->cdev, &link_params); - rc = qede_stop_queues(edev); - if (rc) { - qede_sync_free_irqs(edev); - goto out; - } + if (mode != QEDE_UNLOAD_RECOVERY) { + /* Reset the link */ + memset(&link_params, 0, sizeof(link_params)); + link_params.link_up = false; + edev->ops->common->set_link(edev->cdev, &link_params); - DP_INFO(edev, "Stopped Queues\n"); + rc = qede_stop_queues(edev); + if (rc) { + qede_sync_free_irqs(edev); + goto out; + } + + DP_INFO(edev, "Stopped Queues\n"); + } qede_vlan_mark_nonconfigured(edev); edev->ops->fastpath_stop(edev->cdev); @@ -2102,18 +2215,26 @@ static void qede_unload(struct qede_dev *edev, enum qede_unload_mode mode, qede_napi_disable_remove(edev); + if (mode == QEDE_UNLOAD_RECOVERY) + qede_empty_tx_queues(edev); + qede_free_mem_load(edev); qede_free_fp_array(edev); out: if (!is_locked) __qede_unlock(edev); + + if (mode != QEDE_UNLOAD_RECOVERY) + DP_NOTICE(edev, "Link is down\n"); + DP_INFO(edev, "Ending qede unload\n"); } enum qede_load_mode { QEDE_LOAD_NORMAL, QEDE_LOAD_RELOAD, + QEDE_LOAD_RECOVERY, }; static int qede_load(struct qede_dev *edev, enum qede_load_mode mode, @@ -2293,6 +2414,77 @@ static void qede_link_update(void *dev, struct qed_link_output *link) } } +static void qede_schedule_recovery_handler(void *dev) +{ + struct qede_dev *edev = dev; + + if (edev->state == QEDE_STATE_RECOVERY) { + DP_NOTICE(edev, + "Avoid scheduling a recovery handling since already in recovery state\n"); + return; + } + + set_bit(QEDE_SP_RECOVERY, &edev->sp_flags); + schedule_delayed_work(&edev->sp_task, 0); + + DP_INFO(edev, "Scheduled a recovery handler\n"); +} + +static void qede_recovery_failed(struct qede_dev *edev) +{ + netdev_err(edev->ndev, "Recovery handling has failed. Power cycle is needed.\n"); + + netif_device_detach(edev->ndev); + + if (edev->cdev) + edev->ops->common->set_power_state(edev->cdev, PCI_D3hot); +} + +static void qede_recovery_handler(struct qede_dev *edev) +{ + u32 curr_state = edev->state; + int rc; + + DP_NOTICE(edev, "Starting a recovery process\n"); + + /* No need to acquire first the qede_lock since is done by qede_sp_task + * before calling this function. + */ + edev->state = QEDE_STATE_RECOVERY; + + edev->ops->common->recovery_prolog(edev->cdev); + + if (curr_state == QEDE_STATE_OPEN) + qede_unload(edev, QEDE_UNLOAD_RECOVERY, true); + + __qede_remove(edev->pdev, QEDE_REMOVE_RECOVERY); + + rc = __qede_probe(edev->pdev, edev->dp_module, edev->dp_level, + IS_VF(edev), QEDE_PROBE_RECOVERY); + if (rc) { + edev->cdev = NULL; + goto err; + } + + if (curr_state == QEDE_STATE_OPEN) { + rc = qede_load(edev, QEDE_LOAD_RECOVERY, true); + if (rc) + goto err; + + qede_config_rx_mode(edev->ndev); + udp_tunnel_get_rx_info(edev->ndev); + } + + edev->state = curr_state; + + DP_NOTICE(edev, "Recovery handling is done\n"); + + return; + +err: + qede_recovery_failed(edev); +} + static bool qede_is_txq_full(struct qede_dev *edev, struct qede_tx_queue *txq) { struct netdev_queue *netdev_txq; diff --git a/drivers/net/ethernet/qlogic/qede/qede_rdma.c b/drivers/net/ethernet/qlogic/qede/qede_rdma.c index 1900bf7e67d1..ffabc2d2f082 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_rdma.c +++ b/drivers/net/ethernet/qlogic/qede/qede_rdma.c @@ -50,6 +50,8 @@ static void _qede_rdma_dev_add(struct qede_dev *edev) if (!qedr_drv) return; + /* Leftovers from previous error recovery */ + edev->rdma_info.exp_recovery = false; edev->rdma_info.qedr_dev = qedr_drv->add(edev->cdev, edev->pdev, edev->ndev); } @@ -87,21 +89,26 @@ static void qede_rdma_destroy_wq(struct qede_dev *edev) destroy_workqueue(edev->rdma_info.rdma_wq); } -int qede_rdma_dev_add(struct qede_dev *edev) +int qede_rdma_dev_add(struct qede_dev *edev, bool recovery) { - int rc = 0; + int rc; - if (qede_rdma_supported(edev)) { - rc = qede_rdma_create_wq(edev); - if (rc) - return rc; + if (!qede_rdma_supported(edev)) + return 0; - INIT_LIST_HEAD(&edev->rdma_info.entry); - mutex_lock(&qedr_dev_list_lock); - list_add_tail(&edev->rdma_info.entry, &qedr_dev_list); - _qede_rdma_dev_add(edev); - mutex_unlock(&qedr_dev_list_lock); - } + /* Cannot start qedr while recovering since it wasn't fully stopped */ + if (recovery) + return 0; + + rc = qede_rdma_create_wq(edev); + if (rc) + return rc; + + INIT_LIST_HEAD(&edev->rdma_info.entry); + mutex_lock(&qedr_dev_list_lock); + list_add_tail(&edev->rdma_info.entry, &qedr_dev_list); + _qede_rdma_dev_add(edev); + mutex_unlock(&qedr_dev_list_lock); return rc; } @@ -110,19 +117,30 @@ static void _qede_rdma_dev_remove(struct qede_dev *edev) { if (qedr_drv && qedr_drv->remove && edev->rdma_info.qedr_dev) qedr_drv->remove(edev->rdma_info.qedr_dev); - edev->rdma_info.qedr_dev = NULL; } -void qede_rdma_dev_remove(struct qede_dev *edev) +void qede_rdma_dev_remove(struct qede_dev *edev, bool recovery) { if (!qede_rdma_supported(edev)) return; - qede_rdma_destroy_wq(edev); - mutex_lock(&qedr_dev_list_lock); - _qede_rdma_dev_remove(edev); - list_del(&edev->rdma_info.entry); - mutex_unlock(&qedr_dev_list_lock); + /* Cannot remove qedr while recovering since it wasn't fully stopped */ + if (!recovery) { + qede_rdma_destroy_wq(edev); + mutex_lock(&qedr_dev_list_lock); + if (!edev->rdma_info.exp_recovery) + _qede_rdma_dev_remove(edev); + edev->rdma_info.qedr_dev = NULL; + list_del(&edev->rdma_info.entry); + mutex_unlock(&qedr_dev_list_lock); + } else { + if (!edev->rdma_info.exp_recovery) { + mutex_lock(&qedr_dev_list_lock); + _qede_rdma_dev_remove(edev); + mutex_unlock(&qedr_dev_list_lock); + } + edev->rdma_info.exp_recovery = true; + } } static void _qede_rdma_dev_open(struct qede_dev *edev) @@ -204,7 +222,8 @@ void qede_rdma_unregister_driver(struct qedr_driver *drv) mutex_lock(&qedr_dev_list_lock); list_for_each_entry(edev, &qedr_dev_list, rdma_info.entry) { - if (edev->rdma_info.qedr_dev) + /* If device has experienced recovery it was already removed */ + if (edev->rdma_info.qedr_dev && !edev->rdma_info.exp_recovery) _qede_rdma_dev_remove(edev); } qedr_drv = NULL; @@ -284,6 +303,10 @@ static void qede_rdma_add_event(struct qede_dev *edev, { struct qede_rdma_event_work *event_node; + /* If a recovery was experienced avoid adding the event */ + if (edev->rdma_info.exp_recovery) + return; + if (!edev->rdma_info.qedr_dev) return; diff --git a/include/linux/qed/qede_rdma.h b/include/linux/qed/qede_rdma.h index 9904617a9730..5a00c7a473bf 100644 --- a/include/linux/qed/qede_rdma.h +++ b/include/linux/qed/qede_rdma.h @@ -74,21 +74,23 @@ void qede_rdma_unregister_driver(struct qedr_driver *drv); bool qede_rdma_supported(struct qede_dev *dev); #if IS_ENABLED(CONFIG_QED_RDMA) -int qede_rdma_dev_add(struct qede_dev *dev); +int qede_rdma_dev_add(struct qede_dev *dev, bool recovery); void qede_rdma_dev_event_open(struct qede_dev *dev); void qede_rdma_dev_event_close(struct qede_dev *dev); -void qede_rdma_dev_remove(struct qede_dev *dev); +void qede_rdma_dev_remove(struct qede_dev *dev, bool recovery); void qede_rdma_event_changeaddr(struct qede_dev *edr); #else -static inline int qede_rdma_dev_add(struct qede_dev *dev) +static inline int qede_rdma_dev_add(struct qede_dev *dev, + bool recovery) { return 0; } static inline void qede_rdma_dev_event_open(struct qede_dev *dev) {} static inline void qede_rdma_dev_event_close(struct qede_dev *dev) {} -static inline void qede_rdma_dev_remove(struct qede_dev *dev) {} +static inline void qede_rdma_dev_remove(struct qede_dev *dev, + bool recovery) {} static inline void qede_rdma_event_changeaddr(struct qede_dev *edr) {} #endif #endif -- cgit v1.2.3 From c8aa703822bf811269975cf7251b5eaad4c38e9c Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 28 Jan 2019 08:53:53 -0800 Subject: net/flow_dissector: move bpf case into __skb_flow_bpf_dissect This way, we can reuse it for flow dissector in BPF_PROG_TEST_RUN. No functional changes. Signed-off-by: Stanislav Fomichev Acked-by: Song Liu Signed-off-by: Daniel Borkmann --- include/linux/skbuff.h | 5 +++ net/core/flow_dissector.c | 92 +++++++++++++++++++++++++++-------------------- 2 files changed, 59 insertions(+), 38 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 93f56fddd92a..be762fc34ff3 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1221,6 +1221,11 @@ static inline int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr) } #endif +struct bpf_flow_keys; +bool __skb_flow_bpf_dissect(struct bpf_prog *prog, + const struct sk_buff *skb, + struct flow_dissector *flow_dissector, + struct bpf_flow_keys *flow_keys); bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_dissector *flow_dissector, void *target_container, diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 9f2840510e63..bb1a54747d64 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -683,6 +683,46 @@ static void __skb_flow_bpf_to_target(const struct bpf_flow_keys *flow_keys, } } +bool __skb_flow_bpf_dissect(struct bpf_prog *prog, + const struct sk_buff *skb, + struct flow_dissector *flow_dissector, + struct bpf_flow_keys *flow_keys) +{ + struct bpf_skb_data_end cb_saved; + struct bpf_skb_data_end *cb; + u32 result; + + /* Note that even though the const qualifier is discarded + * throughout the execution of the BPF program, all changes(the + * control block) are reverted after the BPF program returns. + * Therefore, __skb_flow_dissect does not alter the skb. + */ + + cb = (struct bpf_skb_data_end *)skb->cb; + + /* Save Control Block */ + memcpy(&cb_saved, cb, sizeof(cb_saved)); + memset(cb, 0, sizeof(*cb)); + + /* Pass parameters to the BPF program */ + memset(flow_keys, 0, sizeof(*flow_keys)); + cb->qdisc_cb.flow_keys = flow_keys; + flow_keys->nhoff = skb_network_offset(skb); + flow_keys->thoff = flow_keys->nhoff; + + bpf_compute_data_pointers((struct sk_buff *)skb); + result = BPF_PROG_RUN(prog, skb); + + /* Restore state */ + memcpy(cb, &cb_saved, sizeof(cb_saved)); + + flow_keys->nhoff = clamp_t(u16, flow_keys->nhoff, 0, skb->len); + flow_keys->thoff = clamp_t(u16, flow_keys->thoff, + flow_keys->nhoff, skb->len); + + return result == BPF_OK; +} + /** * __skb_flow_dissect - extract the flow_keys struct and return it * @skb: sk_buff to extract the flow from, can be NULL if the rest are specified @@ -714,7 +754,6 @@ bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_dissector_key_vlan *key_vlan; enum flow_dissect_ret fdret; enum flow_dissector_key_id dissector_vlan = FLOW_DISSECTOR_KEY_MAX; - struct bpf_prog *attached = NULL; int num_hdrs = 0; u8 ip_proto = 0; bool ret; @@ -754,53 +793,30 @@ bool __skb_flow_dissect(const struct sk_buff *skb, FLOW_DISSECTOR_KEY_BASIC, target_container); - rcu_read_lock(); if (skb) { + struct bpf_flow_keys flow_keys; + struct bpf_prog *attached = NULL; + + rcu_read_lock(); + if (skb->dev) attached = rcu_dereference(dev_net(skb->dev)->flow_dissector_prog); else if (skb->sk) attached = rcu_dereference(sock_net(skb->sk)->flow_dissector_prog); else WARN_ON_ONCE(1); - } - if (attached) { - /* Note that even though the const qualifier is discarded - * throughout the execution of the BPF program, all changes(the - * control block) are reverted after the BPF program returns. - * Therefore, __skb_flow_dissect does not alter the skb. - */ - struct bpf_flow_keys flow_keys = {}; - struct bpf_skb_data_end cb_saved; - struct bpf_skb_data_end *cb; - u32 result; - - cb = (struct bpf_skb_data_end *)skb->cb; - - /* Save Control Block */ - memcpy(&cb_saved, cb, sizeof(cb_saved)); - memset(cb, 0, sizeof(cb_saved)); - /* Pass parameters to the BPF program */ - cb->qdisc_cb.flow_keys = &flow_keys; - flow_keys.nhoff = nhoff; - flow_keys.thoff = nhoff; - - bpf_compute_data_pointers((struct sk_buff *)skb); - result = BPF_PROG_RUN(attached, skb); - - /* Restore state */ - memcpy(cb, &cb_saved, sizeof(cb_saved)); - - flow_keys.nhoff = clamp_t(u16, flow_keys.nhoff, 0, skb->len); - flow_keys.thoff = clamp_t(u16, flow_keys.thoff, - flow_keys.nhoff, skb->len); - - __skb_flow_bpf_to_target(&flow_keys, flow_dissector, - target_container); + if (attached) { + ret = __skb_flow_bpf_dissect(attached, skb, + flow_dissector, + &flow_keys); + __skb_flow_bpf_to_target(&flow_keys, flow_dissector, + target_container); + rcu_read_unlock(); + return ret; + } rcu_read_unlock(); - return result == BPF_OK; } - rcu_read_unlock(); if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { -- cgit v1.2.3 From b7a1848e8398b8396c990279e6a10272d818577e Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 28 Jan 2019 08:53:54 -0800 Subject: bpf: add BPF_PROG_TEST_RUN support for flow dissector The input is packet data, the output is struct bpf_flow_key. This should make it easy to test flow dissector programs without elaborate setup. Signed-off-by: Stanislav Fomichev Acked-by: Song Liu Signed-off-by: Daniel Borkmann --- include/linux/bpf.h | 3 ++ net/bpf/test_run.c | 82 +++++++++++++++++++++++++++++++++++++++++++++++++++++ net/core/filter.c | 1 + 3 files changed, 86 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 3851529062ec..0394f1f9213b 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -404,6 +404,9 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); +int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, + const union bpf_attr *kattr, + union bpf_attr __user *uattr); /* an array of programs to be executed under rcu_lock. * diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index fa2644d276ef..2c5172b33209 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -240,3 +240,85 @@ out: kfree(data); return ret; } + +int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, + const union bpf_attr *kattr, + union bpf_attr __user *uattr) +{ + u32 size = kattr->test.data_size_in; + u32 repeat = kattr->test.repeat; + struct bpf_flow_keys flow_keys; + u64 time_start, time_spent = 0; + struct bpf_skb_data_end *cb; + u32 retval, duration; + struct sk_buff *skb; + struct sock *sk; + void *data; + int ret; + u32 i; + + if (prog->type != BPF_PROG_TYPE_FLOW_DISSECTOR) + return -EINVAL; + + data = bpf_test_init(kattr, size, NET_SKB_PAD + NET_IP_ALIGN, + SKB_DATA_ALIGN(sizeof(struct skb_shared_info))); + if (IS_ERR(data)) + return PTR_ERR(data); + + sk = kzalloc(sizeof(*sk), GFP_USER); + if (!sk) { + kfree(data); + return -ENOMEM; + } + sock_net_set(sk, current->nsproxy->net_ns); + sock_init_data(NULL, sk); + + skb = build_skb(data, 0); + if (!skb) { + kfree(data); + kfree(sk); + return -ENOMEM; + } + skb->sk = sk; + + skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN); + __skb_put(skb, size); + skb->protocol = eth_type_trans(skb, + current->nsproxy->net_ns->loopback_dev); + skb_reset_network_header(skb); + + cb = (struct bpf_skb_data_end *)skb->cb; + cb->qdisc_cb.flow_keys = &flow_keys; + + if (!repeat) + repeat = 1; + + time_start = ktime_get_ns(); + for (i = 0; i < repeat; i++) { + preempt_disable(); + rcu_read_lock(); + retval = __skb_flow_bpf_dissect(prog, skb, + &flow_keys_dissector, + &flow_keys); + rcu_read_unlock(); + preempt_enable(); + + if (need_resched()) { + if (signal_pending(current)) + break; + time_spent += ktime_get_ns() - time_start; + cond_resched(); + time_start = ktime_get_ns(); + } + } + time_spent += ktime_get_ns() - time_start; + do_div(time_spent, repeat); + duration = time_spent > U32_MAX ? U32_MAX : (u32)time_spent; + + ret = bpf_test_finish(kattr, uattr, &flow_keys, sizeof(flow_keys), + retval, duration); + + kfree_skb(skb); + kfree(sk); + return ret; +} diff --git a/net/core/filter.c b/net/core/filter.c index 8e587dd1da20..8ce421796ac6 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -7711,6 +7711,7 @@ const struct bpf_verifier_ops flow_dissector_verifier_ops = { }; const struct bpf_prog_ops flow_dissector_prog_ops = { + .test_run = bpf_prog_test_run_flow_dissector, }; int sk_detach_filter(struct sock *sk) -- cgit v1.2.3 From 2b6e492467c78183bb629bb0a100ea3509b615a5 Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Wed, 23 Jan 2019 17:44:16 +0300 Subject: device property: Fix the length used in PROPERTY_ENTRY_STRING() With string type property entries we need to use sizeof(const char *) instead of the number of characters as the length of the entry. If the string was shorter then sizeof(const char *), attempts to read it would have failed with -EOVERFLOW. The problem has been hidden because all build-in string properties have had a string longer then 8 characters until now. Fixes: a85f42047533 ("device property: helper macros for property entry creation") Cc: 4.5+ # 4.5+ Signed-off-by: Heikki Krogerus Reviewed-by: Andy Shevchenko Signed-off-by: Rafael J. Wysocki --- include/linux/property.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/property.h b/include/linux/property.h index 3789ec755fb6..65d3420dd5d1 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -258,7 +258,7 @@ struct property_entry { #define PROPERTY_ENTRY_STRING(_name_, _val_) \ (struct property_entry) { \ .name = _name_, \ - .length = sizeof(_val_), \ + .length = sizeof(const char *), \ .type = DEV_PROP_STRING, \ { .value = { .str = _val_ } }, \ } -- cgit v1.2.3 From 625c85a62cb7d3c79f6e16de3cfa972033658250 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 25 Jan 2019 12:53:07 +0530 Subject: cpufreq: Use struct kobj_attribute instead of struct global_attr The cpufreq_global_kobject is created using kobject_create_and_add() helper, which assigns the kobj_type as dynamic_kobj_ktype and show/store routines are set to kobj_attr_show() and kobj_attr_store(). These routines pass struct kobj_attribute as an argument to the show/store callbacks. But all the cpufreq files created using the cpufreq_global_kobject expect the argument to be of type struct attribute. Things work fine currently as no one accesses the "attr" argument. We may not see issues even if the argument is used, as struct kobj_attribute has struct attribute as its first element and so they will both get same address. But this is logically incorrect and we should rather use struct kobj_attribute instead of struct global_attr in the cpufreq core and drivers and the show/store callbacks should take struct kobj_attribute as argument instead. This bug is caught using CFI CLANG builds in android kernel which catches mismatch in function prototypes for such callbacks. Reported-by: Donghee Han Reported-by: Sangkyu Kim Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 6 +++--- drivers/cpufreq/intel_pstate.c | 23 ++++++++++++----------- include/linux/cpufreq.h | 12 ++---------- 3 files changed, 17 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index a8fa684f5f90..3eff158d9750 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -545,13 +545,13 @@ EXPORT_SYMBOL_GPL(cpufreq_policy_transition_delay_us); * SYSFS INTERFACE * *********************************************************************/ static ssize_t show_boost(struct kobject *kobj, - struct attribute *attr, char *buf) + struct kobj_attribute *attr, char *buf) { return sprintf(buf, "%d\n", cpufreq_driver->boost_enabled); } -static ssize_t store_boost(struct kobject *kobj, struct attribute *attr, - const char *buf, size_t count) +static ssize_t store_boost(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count) { int ret, enable; diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index dd66decf2087..5ab6a4fe93aa 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -895,7 +895,7 @@ static void intel_pstate_update_policies(void) /************************** sysfs begin ************************/ #define show_one(file_name, object) \ static ssize_t show_##file_name \ - (struct kobject *kobj, struct attribute *attr, char *buf) \ + (struct kobject *kobj, struct kobj_attribute *attr, char *buf) \ { \ return sprintf(buf, "%u\n", global.object); \ } @@ -904,7 +904,7 @@ static ssize_t intel_pstate_show_status(char *buf); static int intel_pstate_update_status(const char *buf, size_t size); static ssize_t show_status(struct kobject *kobj, - struct attribute *attr, char *buf) + struct kobj_attribute *attr, char *buf) { ssize_t ret; @@ -915,7 +915,7 @@ static ssize_t show_status(struct kobject *kobj, return ret; } -static ssize_t store_status(struct kobject *a, struct attribute *b, +static ssize_t store_status(struct kobject *a, struct kobj_attribute *b, const char *buf, size_t count) { char *p = memchr(buf, '\n', count); @@ -929,7 +929,7 @@ static ssize_t store_status(struct kobject *a, struct attribute *b, } static ssize_t show_turbo_pct(struct kobject *kobj, - struct attribute *attr, char *buf) + struct kobj_attribute *attr, char *buf) { struct cpudata *cpu; int total, no_turbo, turbo_pct; @@ -955,7 +955,7 @@ static ssize_t show_turbo_pct(struct kobject *kobj, } static ssize_t show_num_pstates(struct kobject *kobj, - struct attribute *attr, char *buf) + struct kobj_attribute *attr, char *buf) { struct cpudata *cpu; int total; @@ -976,7 +976,7 @@ static ssize_t show_num_pstates(struct kobject *kobj, } static ssize_t show_no_turbo(struct kobject *kobj, - struct attribute *attr, char *buf) + struct kobj_attribute *attr, char *buf) { ssize_t ret; @@ -998,7 +998,7 @@ static ssize_t show_no_turbo(struct kobject *kobj, return ret; } -static ssize_t store_no_turbo(struct kobject *a, struct attribute *b, +static ssize_t store_no_turbo(struct kobject *a, struct kobj_attribute *b, const char *buf, size_t count) { unsigned int input; @@ -1045,7 +1045,7 @@ static ssize_t store_no_turbo(struct kobject *a, struct attribute *b, return count; } -static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b, +static ssize_t store_max_perf_pct(struct kobject *a, struct kobj_attribute *b, const char *buf, size_t count) { unsigned int input; @@ -1075,7 +1075,7 @@ static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b, return count; } -static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b, +static ssize_t store_min_perf_pct(struct kobject *a, struct kobj_attribute *b, const char *buf, size_t count) { unsigned int input; @@ -1107,12 +1107,13 @@ static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b, } static ssize_t show_hwp_dynamic_boost(struct kobject *kobj, - struct attribute *attr, char *buf) + struct kobj_attribute *attr, char *buf) { return sprintf(buf, "%u\n", hwp_boost); } -static ssize_t store_hwp_dynamic_boost(struct kobject *a, struct attribute *b, +static ssize_t store_hwp_dynamic_boost(struct kobject *a, + struct kobj_attribute *b, const char *buf, size_t count) { unsigned int input; diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index bd7fbd6a4478..c19142911554 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -254,20 +254,12 @@ __ATTR(_name, 0644, show_##_name, store_##_name) static struct freq_attr _name = \ __ATTR(_name, 0200, NULL, store_##_name) -struct global_attr { - struct attribute attr; - ssize_t (*show)(struct kobject *kobj, - struct attribute *attr, char *buf); - ssize_t (*store)(struct kobject *a, struct attribute *b, - const char *c, size_t count); -}; - #define define_one_global_ro(_name) \ -static struct global_attr _name = \ +static struct kobj_attribute _name = \ __ATTR(_name, 0444, show_##_name, NULL) #define define_one_global_rw(_name) \ -static struct global_attr _name = \ +static struct kobj_attribute _name = \ __ATTR(_name, 0644, show_##_name, store_##_name) -- cgit v1.2.3 From bc3843d4d357061d92e7800c7da342e2d068772c Mon Sep 17 00:00:00 2001 From: Nava kishore Manne Date: Fri, 25 Jan 2019 13:16:52 +0530 Subject: firmware: xilinx: Add reset API's This Patch Adds reset API's to support release, assert and status functionalities by using firmware interface. Signed-off-by: Nava kishore Manne Signed-off-by: Michal Simek --- drivers/firmware/xilinx/zynqmp.c | 40 +++++++++++ include/linux/firmware/xlnx-zynqmp.h | 136 +++++++++++++++++++++++++++++++++++ 2 files changed, 176 insertions(+) (limited to 'include/linux') diff --git a/drivers/firmware/xilinx/zynqmp.c b/drivers/firmware/xilinx/zynqmp.c index 9a1c72a9280f..70b50377ae5f 100644 --- a/drivers/firmware/xilinx/zynqmp.c +++ b/drivers/firmware/xilinx/zynqmp.c @@ -469,6 +469,44 @@ static int zynqmp_pm_ioctl(u32 node_id, u32 ioctl_id, u32 arg1, u32 arg2, arg1, arg2, out); } +/** + * zynqmp_pm_reset_assert - Request setting of reset (1 - assert, 0 - release) + * @reset: Reset to be configured + * @assert_flag: Flag stating should reset be asserted (1) or + * released (0) + * + * Return: Returns status, either success or error+reason + */ +static int zynqmp_pm_reset_assert(const enum zynqmp_pm_reset reset, + const enum zynqmp_pm_reset_action assert_flag) +{ + return zynqmp_pm_invoke_fn(PM_RESET_ASSERT, reset, assert_flag, + 0, 0, NULL); +} + +/** + * zynqmp_pm_reset_get_status - Get status of the reset + * @reset: Reset whose status should be returned + * @status: Returned status + * + * Return: Returns status, either success or error+reason + */ +static int zynqmp_pm_reset_get_status(const enum zynqmp_pm_reset reset, + u32 *status) +{ + u32 ret_payload[PAYLOAD_ARG_CNT]; + int ret; + + if (!status) + return -EINVAL; + + ret = zynqmp_pm_invoke_fn(PM_RESET_GET_STATUS, reset, 0, + 0, 0, ret_payload); + *status = ret_payload[1]; + + return ret; +} + static const struct zynqmp_eemi_ops eemi_ops = { .get_api_version = zynqmp_pm_get_api_version, .query_data = zynqmp_pm_query_data, @@ -482,6 +520,8 @@ static const struct zynqmp_eemi_ops eemi_ops = { .clock_setparent = zynqmp_pm_clock_setparent, .clock_getparent = zynqmp_pm_clock_getparent, .ioctl = zynqmp_pm_ioctl, + .reset_assert = zynqmp_pm_reset_assert, + .reset_get_status = zynqmp_pm_reset_get_status, }; /** diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h index 3c3c28eff56a..07c587a0b06e 100644 --- a/include/linux/firmware/xlnx-zynqmp.h +++ b/include/linux/firmware/xlnx-zynqmp.h @@ -34,6 +34,8 @@ enum pm_api_id { PM_GET_API_VERSION = 1, + PM_RESET_ASSERT = 17, + PM_RESET_GET_STATUS, PM_IOCTL = 34, PM_QUERY_DATA, PM_CLOCK_ENABLE, @@ -75,6 +77,137 @@ enum pm_query_id { PM_QID_CLOCK_GET_NUM_CLOCKS = 12, }; +enum zynqmp_pm_reset_action { + PM_RESET_ACTION_RELEASE, + PM_RESET_ACTION_ASSERT, + PM_RESET_ACTION_PULSE, +}; + +enum zynqmp_pm_reset { + ZYNQMP_PM_RESET_START = 1000, + ZYNQMP_PM_RESET_PCIE_CFG = ZYNQMP_PM_RESET_START, + ZYNQMP_PM_RESET_PCIE_BRIDGE, + ZYNQMP_PM_RESET_PCIE_CTRL, + ZYNQMP_PM_RESET_DP, + ZYNQMP_PM_RESET_SWDT_CRF, + ZYNQMP_PM_RESET_AFI_FM5, + ZYNQMP_PM_RESET_AFI_FM4, + ZYNQMP_PM_RESET_AFI_FM3, + ZYNQMP_PM_RESET_AFI_FM2, + ZYNQMP_PM_RESET_AFI_FM1, + ZYNQMP_PM_RESET_AFI_FM0, + ZYNQMP_PM_RESET_GDMA, + ZYNQMP_PM_RESET_GPU_PP1, + ZYNQMP_PM_RESET_GPU_PP0, + ZYNQMP_PM_RESET_GPU, + ZYNQMP_PM_RESET_GT, + ZYNQMP_PM_RESET_SATA, + ZYNQMP_PM_RESET_ACPU3_PWRON, + ZYNQMP_PM_RESET_ACPU2_PWRON, + ZYNQMP_PM_RESET_ACPU1_PWRON, + ZYNQMP_PM_RESET_ACPU0_PWRON, + ZYNQMP_PM_RESET_APU_L2, + ZYNQMP_PM_RESET_ACPU3, + ZYNQMP_PM_RESET_ACPU2, + ZYNQMP_PM_RESET_ACPU1, + ZYNQMP_PM_RESET_ACPU0, + ZYNQMP_PM_RESET_DDR, + ZYNQMP_PM_RESET_APM_FPD, + ZYNQMP_PM_RESET_SOFT, + ZYNQMP_PM_RESET_GEM0, + ZYNQMP_PM_RESET_GEM1, + ZYNQMP_PM_RESET_GEM2, + ZYNQMP_PM_RESET_GEM3, + ZYNQMP_PM_RESET_QSPI, + ZYNQMP_PM_RESET_UART0, + ZYNQMP_PM_RESET_UART1, + ZYNQMP_PM_RESET_SPI0, + ZYNQMP_PM_RESET_SPI1, + ZYNQMP_PM_RESET_SDIO0, + ZYNQMP_PM_RESET_SDIO1, + ZYNQMP_PM_RESET_CAN0, + ZYNQMP_PM_RESET_CAN1, + ZYNQMP_PM_RESET_I2C0, + ZYNQMP_PM_RESET_I2C1, + ZYNQMP_PM_RESET_TTC0, + ZYNQMP_PM_RESET_TTC1, + ZYNQMP_PM_RESET_TTC2, + ZYNQMP_PM_RESET_TTC3, + ZYNQMP_PM_RESET_SWDT_CRL, + ZYNQMP_PM_RESET_NAND, + ZYNQMP_PM_RESET_ADMA, + ZYNQMP_PM_RESET_GPIO, + ZYNQMP_PM_RESET_IOU_CC, + ZYNQMP_PM_RESET_TIMESTAMP, + ZYNQMP_PM_RESET_RPU_R50, + ZYNQMP_PM_RESET_RPU_R51, + ZYNQMP_PM_RESET_RPU_AMBA, + ZYNQMP_PM_RESET_OCM, + ZYNQMP_PM_RESET_RPU_PGE, + ZYNQMP_PM_RESET_USB0_CORERESET, + ZYNQMP_PM_RESET_USB1_CORERESET, + ZYNQMP_PM_RESET_USB0_HIBERRESET, + ZYNQMP_PM_RESET_USB1_HIBERRESET, + ZYNQMP_PM_RESET_USB0_APB, + ZYNQMP_PM_RESET_USB1_APB, + ZYNQMP_PM_RESET_IPI, + ZYNQMP_PM_RESET_APM_LPD, + ZYNQMP_PM_RESET_RTC, + ZYNQMP_PM_RESET_SYSMON, + ZYNQMP_PM_RESET_AFI_FM6, + ZYNQMP_PM_RESET_LPD_SWDT, + ZYNQMP_PM_RESET_FPD, + ZYNQMP_PM_RESET_RPU_DBG1, + ZYNQMP_PM_RESET_RPU_DBG0, + ZYNQMP_PM_RESET_DBG_LPD, + ZYNQMP_PM_RESET_DBG_FPD, + ZYNQMP_PM_RESET_APLL, + ZYNQMP_PM_RESET_DPLL, + ZYNQMP_PM_RESET_VPLL, + ZYNQMP_PM_RESET_IOPLL, + ZYNQMP_PM_RESET_RPLL, + ZYNQMP_PM_RESET_GPO3_PL_0, + ZYNQMP_PM_RESET_GPO3_PL_1, + ZYNQMP_PM_RESET_GPO3_PL_2, + ZYNQMP_PM_RESET_GPO3_PL_3, + ZYNQMP_PM_RESET_GPO3_PL_4, + ZYNQMP_PM_RESET_GPO3_PL_5, + ZYNQMP_PM_RESET_GPO3_PL_6, + ZYNQMP_PM_RESET_GPO3_PL_7, + ZYNQMP_PM_RESET_GPO3_PL_8, + ZYNQMP_PM_RESET_GPO3_PL_9, + ZYNQMP_PM_RESET_GPO3_PL_10, + ZYNQMP_PM_RESET_GPO3_PL_11, + ZYNQMP_PM_RESET_GPO3_PL_12, + ZYNQMP_PM_RESET_GPO3_PL_13, + ZYNQMP_PM_RESET_GPO3_PL_14, + ZYNQMP_PM_RESET_GPO3_PL_15, + ZYNQMP_PM_RESET_GPO3_PL_16, + ZYNQMP_PM_RESET_GPO3_PL_17, + ZYNQMP_PM_RESET_GPO3_PL_18, + ZYNQMP_PM_RESET_GPO3_PL_19, + ZYNQMP_PM_RESET_GPO3_PL_20, + ZYNQMP_PM_RESET_GPO3_PL_21, + ZYNQMP_PM_RESET_GPO3_PL_22, + ZYNQMP_PM_RESET_GPO3_PL_23, + ZYNQMP_PM_RESET_GPO3_PL_24, + ZYNQMP_PM_RESET_GPO3_PL_25, + ZYNQMP_PM_RESET_GPO3_PL_26, + ZYNQMP_PM_RESET_GPO3_PL_27, + ZYNQMP_PM_RESET_GPO3_PL_28, + ZYNQMP_PM_RESET_GPO3_PL_29, + ZYNQMP_PM_RESET_GPO3_PL_30, + ZYNQMP_PM_RESET_GPO3_PL_31, + ZYNQMP_PM_RESET_RPU_LS, + ZYNQMP_PM_RESET_PS_ONLY, + ZYNQMP_PM_RESET_PL, + ZYNQMP_PM_RESET_PS_PL0, + ZYNQMP_PM_RESET_PS_PL1, + ZYNQMP_PM_RESET_PS_PL2, + ZYNQMP_PM_RESET_PS_PL3, + ZYNQMP_PM_RESET_END = ZYNQMP_PM_RESET_PS_PL3 +}; + /** * struct zynqmp_pm_query_data - PM query data * @qid: query ID @@ -102,6 +235,9 @@ struct zynqmp_eemi_ops { int (*clock_setparent)(u32 clock_id, u32 parent_id); int (*clock_getparent)(u32 clock_id, u32 *parent_id); int (*ioctl)(u32 node_id, u32 ioctl_id, u32 arg1, u32 arg2, u32 *out); + int (*reset_assert)(const enum zynqmp_pm_reset reset, + const enum zynqmp_pm_reset_action assert_flag); + int (*reset_get_status)(const enum zynqmp_pm_reset reset, u32 *status); }; #if IS_REACHABLE(CONFIG_ARCH_ZYNQMP) -- cgit v1.2.3 From 15917dc02841862840efcbfe1da0830f88078b5c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 19 Dec 2018 13:04:41 +0100 Subject: sched: Remove stale PF_MUTEX_TESTER bit The RTMUTEX tester was removed long ago but the PF bit stayed around. Remove it and free up the space. Signed-off-by: Thomas Gleixner --- include/linux/sched.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index d2f90fa92468..e2bba022827d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1409,7 +1409,6 @@ extern struct pid *cad_pid; #define PF_UMH 0x02000000 /* I'm an Usermodehelper process */ #define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_allowed */ #define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */ -#define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */ #define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezable */ #define PF_SUSPEND_TASK 0x80000000 /* This thread called freeze_processes() and should not be frozen */ -- cgit v1.2.3 From 71368af9027f18fe5d1c6f372cfdff7e4bde8b48 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Wed, 16 Jan 2019 17:01:36 -0500 Subject: x86/speculation: Add PR_SPEC_DISABLE_NOEXEC With the default SPEC_STORE_BYPASS_SECCOMP/SPEC_STORE_BYPASS_PRCTL mode, the TIF_SSBD bit will be inherited when a new task is fork'ed or cloned. It will also remain when a new program is execve'ed. Only certain class of applications (like Java) that can run on behalf of multiple users on a single thread will require disabling speculative store bypass for security purposes. Those applications will call prctl(2) at startup time to disable SSB. They won't rely on the fact the SSB might have been disabled. Other applications that don't need SSBD will just move on without checking if SSBD has been turned on or not. The fact that the TIF_SSBD is inherited across execve(2) boundary will cause performance of applications that don't need SSBD but their predecessors have SSBD on to be unwittingly impacted especially if they write to memory a lot. To remedy this problem, a new PR_SPEC_DISABLE_NOEXEC argument for the PR_SET_SPECULATION_CTRL option of prctl(2) is added to allow applications to specify that the SSBD feature bit on the task structure should be cleared whenever a new program is being execve'ed. Suggested-by: Thomas Gleixner Signed-off-by: Waiman Long Signed-off-by: Thomas Gleixner Cc: Borislav Petkov Cc: Jonathan Corbet Cc: linux-doc@vger.kernel.org Cc: "H. Peter Anvin" Cc: Andi Kleen Cc: David Woodhouse Cc: Jiri Kosina Cc: Josh Poimboeuf Cc: Tim Chen Cc: KarimAllah Ahmed Cc: Peter Zijlstra Cc: Konrad Rzeszutek Wilk Link: https://lkml.kernel.org/r/1547676096-3281-1-git-send-email-longman@redhat.com --- Documentation/userspace-api/spec_ctrl.rst | 27 +++++++++++++++------------ arch/x86/kernel/cpu/bugs.c | 12 ++++++++++++ arch/x86/kernel/process.c | 12 ++++++++++++ include/linux/sched.h | 5 +++++ include/uapi/linux/prctl.h | 1 + tools/include/uapi/linux/prctl.h | 1 + 6 files changed, 46 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/Documentation/userspace-api/spec_ctrl.rst b/Documentation/userspace-api/spec_ctrl.rst index c4dbe6f7cdae..1129c7550a48 100644 --- a/Documentation/userspace-api/spec_ctrl.rst +++ b/Documentation/userspace-api/spec_ctrl.rst @@ -28,18 +28,20 @@ PR_GET_SPECULATION_CTRL returns the state of the speculation misfeature which is selected with arg2 of prctl(2). The return value uses bits 0-3 with the following meaning: -==== ===================== =================================================== -Bit Define Description -==== ===================== =================================================== -0 PR_SPEC_PRCTL Mitigation can be controlled per task by - PR_SET_SPECULATION_CTRL. -1 PR_SPEC_ENABLE The speculation feature is enabled, mitigation is - disabled. -2 PR_SPEC_DISABLE The speculation feature is disabled, mitigation is - enabled. -3 PR_SPEC_FORCE_DISABLE Same as PR_SPEC_DISABLE, but cannot be undone. A - subsequent prctl(..., PR_SPEC_ENABLE) will fail. -==== ===================== =================================================== +==== ====================== ================================================== +Bit Define Description +==== ====================== ================================================== +0 PR_SPEC_PRCTL Mitigation can be controlled per task by + PR_SET_SPECULATION_CTRL. +1 PR_SPEC_ENABLE The speculation feature is enabled, mitigation is + disabled. +2 PR_SPEC_DISABLE The speculation feature is disabled, mitigation is + enabled. +3 PR_SPEC_FORCE_DISABLE Same as PR_SPEC_DISABLE, but cannot be undone. A + subsequent prctl(..., PR_SPEC_ENABLE) will fail. +4 PR_SPEC_DISABLE_NOEXEC Same as PR_SPEC_DISABLE, but the state will be + cleared on :manpage:`execve(2)`. +==== ====================== ================================================== If all bits are 0 the CPU is not affected by the speculation misfeature. @@ -92,6 +94,7 @@ Speculation misfeature controls * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_ENABLE, 0, 0); * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_DISABLE, 0, 0); * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_FORCE_DISABLE, 0, 0); + * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_DISABLE_NOEXEC, 0, 0); - PR_SPEC_INDIR_BRANCH: Indirect Branch Speculation in User Processes (Mitigate Spectre V2 style attacks against user processes) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 1de0f4170178..2faeaf46347a 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -798,15 +798,25 @@ static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl) if (task_spec_ssb_force_disable(task)) return -EPERM; task_clear_spec_ssb_disable(task); + task_clear_spec_ssb_noexec(task); task_update_spec_tif(task); break; case PR_SPEC_DISABLE: task_set_spec_ssb_disable(task); + task_clear_spec_ssb_noexec(task); task_update_spec_tif(task); break; case PR_SPEC_FORCE_DISABLE: task_set_spec_ssb_disable(task); task_set_spec_ssb_force_disable(task); + task_clear_spec_ssb_noexec(task); + task_update_spec_tif(task); + break; + case PR_SPEC_DISABLE_NOEXEC: + if (task_spec_ssb_force_disable(task)) + return -EPERM; + task_set_spec_ssb_disable(task); + task_set_spec_ssb_noexec(task); task_update_spec_tif(task); break; default: @@ -885,6 +895,8 @@ static int ssb_prctl_get(struct task_struct *task) case SPEC_STORE_BYPASS_PRCTL: if (task_spec_ssb_force_disable(task)) return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE; + if (task_spec_ssb_noexec(task)) + return PR_SPEC_PRCTL | PR_SPEC_DISABLE_NOEXEC; if (task_spec_ssb_disable(task)) return PR_SPEC_PRCTL | PR_SPEC_DISABLE; return PR_SPEC_PRCTL | PR_SPEC_ENABLE; diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 90ae0ca51083..58ac7be52c7a 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -255,6 +255,18 @@ void arch_setup_new_exec(void) /* If cpuid was previously disabled for this task, re-enable it. */ if (test_thread_flag(TIF_NOCPUID)) enable_cpuid(); + + /* + * Don't inherit TIF_SSBD across exec boundary when + * PR_SPEC_DISABLE_NOEXEC is used. + */ + if (test_thread_flag(TIF_SSBD) && + task_spec_ssb_noexec(current)) { + clear_thread_flag(TIF_SSBD); + task_clear_spec_ssb_disable(current); + task_clear_spec_ssb_noexec(current); + speculation_ctrl_update(task_thread_info(current)->flags); + } } static inline void switch_to_bitmap(struct thread_struct *prev, diff --git a/include/linux/sched.h b/include/linux/sched.h index d2f90fa92468..fc836dc71bba 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1459,6 +1459,7 @@ static inline bool is_percpu_thread(void) #define PFA_SPEC_SSB_FORCE_DISABLE 4 /* Speculative Store Bypass force disabled*/ #define PFA_SPEC_IB_DISABLE 5 /* Indirect branch speculation restricted */ #define PFA_SPEC_IB_FORCE_DISABLE 6 /* Indirect branch speculation permanently restricted */ +#define PFA_SPEC_SSB_NOEXEC 7 /* Speculative Store Bypass clear on execve() */ #define TASK_PFA_TEST(name, func) \ static inline bool task_##func(struct task_struct *p) \ @@ -1487,6 +1488,10 @@ TASK_PFA_TEST(SPEC_SSB_DISABLE, spec_ssb_disable) TASK_PFA_SET(SPEC_SSB_DISABLE, spec_ssb_disable) TASK_PFA_CLEAR(SPEC_SSB_DISABLE, spec_ssb_disable) +TASK_PFA_TEST(SPEC_SSB_NOEXEC, spec_ssb_noexec) +TASK_PFA_SET(SPEC_SSB_NOEXEC, spec_ssb_noexec) +TASK_PFA_CLEAR(SPEC_SSB_NOEXEC, spec_ssb_noexec) + TASK_PFA_TEST(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable) TASK_PFA_SET(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable) diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index b4875a93363a..094bb03b9cc2 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -219,6 +219,7 @@ struct prctl_mm_map { # define PR_SPEC_ENABLE (1UL << 1) # define PR_SPEC_DISABLE (1UL << 2) # define PR_SPEC_FORCE_DISABLE (1UL << 3) +# define PR_SPEC_DISABLE_NOEXEC (1UL << 4) /* Reset arm64 pointer authentication keys */ #define PR_PAC_RESET_KEYS 54 diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h index b4875a93363a..094bb03b9cc2 100644 --- a/tools/include/uapi/linux/prctl.h +++ b/tools/include/uapi/linux/prctl.h @@ -219,6 +219,7 @@ struct prctl_mm_map { # define PR_SPEC_ENABLE (1UL << 1) # define PR_SPEC_DISABLE (1UL << 2) # define PR_SPEC_FORCE_DISABLE (1UL << 3) +# define PR_SPEC_DISABLE_NOEXEC (1UL << 4) /* Reset arm64 pointer authentication keys */ #define PR_PAC_RESET_KEYS 54 -- cgit v1.2.3 From fab940755d1d78377901450b6ee7c77356e06821 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sun, 27 Jan 2019 14:03:57 +0100 Subject: x86/hw_breakpoints, kprobes: Remove kprobes ifdeffery Remove the ifdeffery in the breakpoint parsing arch_build_bp_info() by adding a within_kprobe_blacklist() stub for the !CONFIG_KPROBES case. It is returning true when kprobes are not enabled to mean that any address is within the kprobes blacklist on such kernels and thus not allow kernel breakpoints on non-kprobes kernels. Signed-off-by: Borislav Petkov Acked-by: Masami Hiramatsu Cc: Anil S Keshavamurthy Cc: "David S. Miller" Cc: Frederic Weisbecker Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: "Naveen N. Rao" Cc: Thomas Gleixner Link: https://lkml.kernel.org/r/20190127131237.4557-1-bp@alien8.de --- arch/x86/kernel/hw_breakpoint.c | 4 ---- include/linux/kprobes.h | 5 +++++ 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index 7d6f91f2869a..ff9bfd40429e 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -261,12 +261,8 @@ static int arch_build_bp_info(struct perf_event *bp, * allow kernel breakpoints at all. */ if (attr->bp_addr >= TASK_SIZE_MAX) { -#ifdef CONFIG_KPROBES if (within_kprobe_blacklist(attr->bp_addr)) return -EINVAL; -#else - return -EINVAL; -#endif } hw->type = X86_BREAKPOINT_EXECUTE; diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index e07e91daaacc..201f0f2683f2 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -442,6 +442,11 @@ static inline int enable_kprobe(struct kprobe *kp) { return -ENOSYS; } + +static inline bool within_kprobe_blacklist(unsigned long addr) +{ + return true; +} #endif /* CONFIG_KPROBES */ static inline int disable_kretprobe(struct kretprobe *rp) { -- cgit v1.2.3 From 5c238a8b599f1ae25eaeb08ad0e9e13e2b9eb023 Mon Sep 17 00:00:00 2001 From: Amit Kucheria Date: Wed, 30 Jan 2019 10:52:01 +0530 Subject: cpufreq: Auto-register the driver as a thermal cooling device if asked All cpufreq drivers do similar things to register as a cooling device. Provide a cpufreq driver flag so drivers can just ask the cpufreq core to register the cooling device on their behalf. This allows us to get rid of duplicated code in the drivers. In order to allow this, we add a struct thermal_cooling_device pointer to struct cpufreq_policy so that drivers don't need to store it in a private data structure. Suggested-by: Stephen Boyd Suggested-by: Viresh Kumar Signed-off-by: Amit Kucheria Reviewed-by: Matthias Kaehlcke Tested-by: Matthias Kaehlcke Acked-by: Viresh Kumar Reviewed-by: Daniel Lezcano Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 11 +++++++++++ include/linux/cpufreq.h | 9 +++++++++ 2 files changed, 20 insertions(+) (limited to 'include/linux') diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 3eff158d9750..96a69c67a545 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -19,6 +19,7 @@ #include #include +#include #include #include #include @@ -1316,6 +1317,10 @@ static int cpufreq_online(unsigned int cpu) if (cpufreq_driver->ready) cpufreq_driver->ready(policy); + if (IS_ENABLED(CONFIG_CPU_THERMAL) && + cpufreq_driver->flags & CPUFREQ_IS_COOLING_DEV) + policy->cdev = of_cpufreq_cooling_register(policy); + pr_debug("initialization complete\n"); return 0; @@ -1403,6 +1408,12 @@ static int cpufreq_offline(unsigned int cpu) goto unlock; } + if (IS_ENABLED(CONFIG_CPU_THERMAL) && + cpufreq_driver->flags & CPUFREQ_IS_COOLING_DEV) { + cpufreq_cooling_unregister(policy->cdev); + policy->cdev = NULL; + } + if (cpufreq_driver->stop_cpu) cpufreq_driver->stop_cpu(policy); diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index c19142911554..9db074ecbbd7 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -151,6 +151,9 @@ struct cpufreq_policy { /* For cpufreq driver's internal use */ void *driver_data; + + /* Pointer to the cooling device if used for thermal mitigation */ + struct thermal_cooling_device *cdev; }; /* Only for ACPI */ @@ -378,6 +381,12 @@ struct cpufreq_driver { */ #define CPUFREQ_NO_AUTO_DYNAMIC_SWITCHING BIT(6) +/* + * Set by drivers that want the core to automatically register the cpufreq + * driver as a thermal cooling device. + */ +#define CPUFREQ_IS_COOLING_DEV BIT(7) + int cpufreq_register_driver(struct cpufreq_driver *driver_data); int cpufreq_unregister_driver(struct cpufreq_driver *driver_data); -- cgit v1.2.3 From 9bc61ab18b1d41f26dc06b9e6d3c203e65f83fe6 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sun, 4 Nov 2018 03:19:03 -0500 Subject: vfs: Introduce fs_context, switch vfs_kern_mount() to it. Introduce a filesystem context concept to be used during superblock creation for mount and superblock reconfiguration for remount. This is allocated at the beginning of the mount procedure and into it is placed: (1) Filesystem type. (2) Namespaces. (3) Source/Device names (there may be multiple). (4) Superblock flags (SB_*). (5) Security details. (6) Filesystem-specific data, as set by the mount options. Accessor functions are then provided to set up a context, parameterise it from monolithic mount data (the data page passed to mount(2)) and tear it down again. A legacy wrapper is provided that implements what will be the basic operations, wrapping access to filesystems that aren't yet aware of the fs_context. Finally, vfs_kern_mount() is changed to make use of the fs_context and mount_fs() is replaced by vfs_get_tree(), called from vfs_kern_mount(). [AV -- add missing kstrdup()] [AV -- put_cred() can be unconditional - fc->cred can't be NULL] [AV -- take legacy_validate() contents into legacy_parse_monolithic()] [AV -- merge KERNEL_MOUNT and USER_MOUNT] [AV -- don't unlock superblock on success return from vfs_get_tree()] [AV -- kill 'reference' argument of init_fs_context()] Signed-off-by: David Howells Co-developed-by: Al Viro Signed-off-by: Al Viro --- fs/Makefile | 3 +- fs/fs_context.c | 182 +++++++++++++++++++++++++++++++++++++++++++++ fs/internal.h | 9 ++- fs/namespace.c | 46 ++++++++---- fs/super.c | 50 ++++++------- include/linux/fs_context.h | 64 ++++++++++++++++ 6 files changed, 310 insertions(+), 44 deletions(-) create mode 100644 fs/fs_context.c create mode 100644 include/linux/fs_context.h (limited to 'include/linux') diff --git a/fs/Makefile b/fs/Makefile index 293733f61594..5563cf34f7c2 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -12,7 +12,8 @@ obj-y := open.o read_write.o file_table.o super.o \ attr.o bad_inode.o file.o filesystems.o namespace.o \ seq_file.o xattr.o libfs.o fs-writeback.o \ pnode.o splice.o sync.o utimes.o d_path.o \ - stack.o fs_struct.o statfs.o fs_pin.o nsfs.o + stack.o fs_struct.o statfs.o fs_pin.o nsfs.o \ + fs_context.o ifeq ($(CONFIG_BLOCK),y) obj-y += buffer.o block_dev.o direct-io.o mpage.o diff --git a/fs/fs_context.c b/fs/fs_context.c new file mode 100644 index 000000000000..4294091b689d --- /dev/null +++ b/fs/fs_context.c @@ -0,0 +1,182 @@ +/* Provide a way to create a superblock configuration context within the kernel + * that allows a superblock to be set up prior to mounting. + * + * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "mount.h" +#include "internal.h" + +struct legacy_fs_context { + char *legacy_data; /* Data page for legacy filesystems */ + size_t data_size; +}; + +static int legacy_init_fs_context(struct fs_context *fc); + +/** + * alloc_fs_context - Create a filesystem context. + * @fs_type: The filesystem type. + * @reference: The dentry from which this one derives (or NULL) + * @sb_flags: Filesystem/superblock flags (SB_*) + * @sb_flags_mask: Applicable members of @sb_flags + * @purpose: The purpose that this configuration shall be used for. + * + * Open a filesystem and create a mount context. The mount context is + * initialised with the supplied flags and, if a submount/automount from + * another superblock (referred to by @reference) is supplied, may have + * parameters such as namespaces copied across from that superblock. + */ +static struct fs_context *alloc_fs_context(struct file_system_type *fs_type, + struct dentry *reference, + unsigned int sb_flags, + unsigned int sb_flags_mask, + enum fs_context_purpose purpose) +{ + struct fs_context *fc; + int ret = -ENOMEM; + + fc = kzalloc(sizeof(struct fs_context), GFP_KERNEL); + if (!fc) + return ERR_PTR(-ENOMEM); + + fc->purpose = purpose; + fc->sb_flags = sb_flags; + fc->sb_flags_mask = sb_flags_mask; + fc->fs_type = get_filesystem(fs_type); + fc->cred = get_current_cred(); + fc->net_ns = get_net(current->nsproxy->net_ns); + + switch (purpose) { + case FS_CONTEXT_FOR_MOUNT: + fc->user_ns = get_user_ns(fc->cred->user_ns); + break; + } + + ret = legacy_init_fs_context(fc); + if (ret < 0) + goto err_fc; + fc->need_free = true; + return fc; + +err_fc: + put_fs_context(fc); + return ERR_PTR(ret); +} + +struct fs_context *fs_context_for_mount(struct file_system_type *fs_type, + unsigned int sb_flags) +{ + return alloc_fs_context(fs_type, NULL, sb_flags, 0, + FS_CONTEXT_FOR_MOUNT); +} +EXPORT_SYMBOL(fs_context_for_mount); + +static void legacy_fs_context_free(struct fs_context *fc); +/** + * put_fs_context - Dispose of a superblock configuration context. + * @fc: The context to dispose of. + */ +void put_fs_context(struct fs_context *fc) +{ + struct super_block *sb; + + if (fc->root) { + sb = fc->root->d_sb; + dput(fc->root); + fc->root = NULL; + deactivate_super(sb); + } + + if (fc->need_free) + legacy_fs_context_free(fc); + + security_free_mnt_opts(&fc->security); + if (fc->net_ns) + put_net(fc->net_ns); + put_user_ns(fc->user_ns); + put_cred(fc->cred); + kfree(fc->subtype); + put_filesystem(fc->fs_type); + kfree(fc->source); + kfree(fc); +} +EXPORT_SYMBOL(put_fs_context); + +/* + * Free the config for a filesystem that doesn't support fs_context. + */ +static void legacy_fs_context_free(struct fs_context *fc) +{ + kfree(fc->fs_private); +} + +/* + * Add monolithic mount data. + */ +static int legacy_parse_monolithic(struct fs_context *fc, void *data) +{ + struct legacy_fs_context *ctx = fc->fs_private; + ctx->legacy_data = data; + if (!ctx->legacy_data) + return 0; + if (fc->fs_type->fs_flags & FS_BINARY_MOUNTDATA) + return 0; + return security_sb_eat_lsm_opts(ctx->legacy_data, &fc->security); +} + +/* + * Get a mountable root with the legacy mount command. + */ +int legacy_get_tree(struct fs_context *fc) +{ + struct legacy_fs_context *ctx = fc->fs_private; + struct super_block *sb; + struct dentry *root; + + root = fc->fs_type->mount(fc->fs_type, fc->sb_flags, + fc->source, ctx->legacy_data); + if (IS_ERR(root)) + return PTR_ERR(root); + + sb = root->d_sb; + BUG_ON(!sb); + + fc->root = root; + return 0; +} + +/* + * Initialise a legacy context for a filesystem that doesn't support + * fs_context. + */ +static int legacy_init_fs_context(struct fs_context *fc) +{ + fc->fs_private = kzalloc(sizeof(struct legacy_fs_context), GFP_KERNEL); + if (!fc->fs_private) + return -ENOMEM; + return 0; +} + +int parse_monolithic_mount_data(struct fs_context *fc, void *data) +{ + return legacy_parse_monolithic(fc, data); +} diff --git a/fs/internal.h b/fs/internal.h index d410186bc369..f85c3212d25d 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -17,6 +17,7 @@ struct linux_binprm; struct path; struct mount; struct shrink_control; +struct fs_context; /* * block_dev.c @@ -51,6 +52,12 @@ int __generic_write_end(struct inode *inode, loff_t pos, unsigned copied, */ extern void __init chrdev_init(void); +/* + * fs_context.c + */ +extern int legacy_get_tree(struct fs_context *fc); +extern int parse_monolithic_mount_data(struct fs_context *, void *); + /* * namei.c */ @@ -101,8 +108,6 @@ extern struct file *alloc_empty_file_noaccount(int, const struct cred *); */ extern int do_remount_sb(struct super_block *, int, void *, int); extern bool trylock_super(struct super_block *sb); -extern struct dentry *mount_fs(struct file_system_type *, - int, const char *, void *); extern struct super_block *user_get_super(dev_t); /* diff --git a/fs/namespace.c b/fs/namespace.c index f0b8a8ca08df..3f2fd7a34733 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -27,6 +27,7 @@ #include #include #include +#include #include "pnode.h" #include "internal.h" @@ -940,36 +941,53 @@ static struct mount *skip_mnt_tree(struct mount *p) return p; } -struct vfsmount * -vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data) +struct vfsmount *vfs_kern_mount(struct file_system_type *type, + int flags, const char *name, + void *data) { + struct fs_context *fc; struct mount *mnt; - struct dentry *root; + int ret = 0; if (!type) return ERR_PTR(-ENODEV); + fc = fs_context_for_mount(type, flags); + if (IS_ERR(fc)) + return ERR_CAST(fc); + + if (name) { + fc->source = kstrdup(name, GFP_KERNEL); + if (!fc->source) + ret = -ENOMEM; + } + if (!ret) + ret = parse_monolithic_mount_data(fc, data); + if (!ret) + ret = vfs_get_tree(fc); + if (ret) { + put_fs_context(fc); + return ERR_PTR(ret); + } + up_write(&fc->root->d_sb->s_umount); mnt = alloc_vfsmnt(name); - if (!mnt) + if (!mnt) { + put_fs_context(fc); return ERR_PTR(-ENOMEM); + } if (flags & SB_KERNMOUNT) mnt->mnt.mnt_flags = MNT_INTERNAL; - root = mount_fs(type, flags, name, data); - if (IS_ERR(root)) { - mnt_free_id(mnt); - free_vfsmnt(mnt); - return ERR_CAST(root); - } - - mnt->mnt.mnt_root = root; - mnt->mnt.mnt_sb = root->d_sb; + atomic_inc(&fc->root->d_sb->s_active); + mnt->mnt.mnt_root = dget(fc->root); + mnt->mnt.mnt_sb = fc->root->d_sb; mnt->mnt_mountpoint = mnt->mnt.mnt_root; mnt->mnt_parent = mnt; lock_mount_hash(); - list_add_tail(&mnt->mnt_instance, &root->d_sb->s_mounts); + list_add_tail(&mnt->mnt_instance, &fc->root->d_sb->s_mounts); unlock_mount_hash(); + put_fs_context(fc); return &mnt->mnt; } EXPORT_SYMBOL_GPL(vfs_kern_mount); diff --git a/fs/super.c b/fs/super.c index 48e25eba8465..fc3887277ad1 100644 --- a/fs/super.c +++ b/fs/super.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include "internal.h" @@ -1241,27 +1242,24 @@ struct dentry *mount_single(struct file_system_type *fs_type, } EXPORT_SYMBOL(mount_single); -struct dentry * -mount_fs(struct file_system_type *type, int flags, const char *name, void *data) +/** + * vfs_get_tree - Get the mountable root + * @fc: The superblock configuration context. + * + * The filesystem is invoked to get or create a superblock which can then later + * be used for mounting. The filesystem places a pointer to the root to be + * used for mounting in @fc->root. + */ +int vfs_get_tree(struct fs_context *fc) { - struct dentry *root; struct super_block *sb; - int error = -ENOMEM; - void *sec_opts = NULL; + int error; - if (data && !(type->fs_flags & FS_BINARY_MOUNTDATA)) { - error = security_sb_eat_lsm_opts(data, &sec_opts); - if (error) - return ERR_PTR(error); - } + error = legacy_get_tree(fc); + if (error < 0) + return error; - root = type->mount(type, flags, name, data); - if (IS_ERR(root)) { - error = PTR_ERR(root); - goto out_free_secdata; - } - sb = root->d_sb; - BUG_ON(!sb); + sb = fc->root->d_sb; WARN_ON(!sb->s_bdi); /* @@ -1273,11 +1271,11 @@ mount_fs(struct file_system_type *type, int flags, const char *name, void *data) smp_wmb(); sb->s_flags |= SB_BORN; - error = security_sb_set_mnt_opts(sb, sec_opts, 0, NULL); + error = security_sb_set_mnt_opts(sb, fc->security, 0, NULL); if (error) goto out_sb; - if (!(flags & (MS_KERNMOUNT|MS_SUBMOUNT))) { + if (!(fc->sb_flags & (MS_KERNMOUNT|MS_SUBMOUNT))) { error = security_sb_kern_mount(sb); if (error) goto out_sb; @@ -1290,18 +1288,16 @@ mount_fs(struct file_system_type *type, int flags, const char *name, void *data) * violate this rule. */ WARN((sb->s_maxbytes < 0), "%s set sb->s_maxbytes to " - "negative value (%lld)\n", type->name, sb->s_maxbytes); + "negative value (%lld)\n", fc->fs_type->name, sb->s_maxbytes); - up_write(&sb->s_umount); - security_free_mnt_opts(&sec_opts); - return root; + return 0; out_sb: - dput(root); + dput(fc->root); + fc->root = NULL; deactivate_locked_super(sb); -out_free_secdata: - security_free_mnt_opts(&sec_opts); - return ERR_PTR(error); + return error; } +EXPORT_SYMBOL(vfs_get_tree); /* * Setup private BDI for given superblock. It gets automatically cleaned up diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h new file mode 100644 index 000000000000..9805514444c9 --- /dev/null +++ b/include/linux/fs_context.h @@ -0,0 +1,64 @@ +/* Filesystem superblock creation and reconfiguration context. + * + * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#ifndef _LINUX_FS_CONTEXT_H +#define _LINUX_FS_CONTEXT_H + +#include +#include +#include + +struct cred; +struct dentry; +struct file_operations; +struct file_system_type; +struct net; +struct user_namespace; + +enum fs_context_purpose { + FS_CONTEXT_FOR_MOUNT, /* New superblock for explicit mount */ +}; + +/* + * Filesystem context for holding the parameters used in the creation or + * reconfiguration of a superblock. + * + * Superblock creation fills in ->root whereas reconfiguration begins with this + * already set. + * + * See Documentation/filesystems/mounting.txt + */ +struct fs_context { + struct file_system_type *fs_type; + void *fs_private; /* The filesystem's context */ + struct dentry *root; /* The root and superblock */ + struct user_namespace *user_ns; /* The user namespace for this mount */ + struct net *net_ns; /* The network namespace for this mount */ + const struct cred *cred; /* The mounter's credentials */ + const char *source; /* The source name (eg. dev path) */ + const char *subtype; /* The subtype to set on the superblock */ + void *security; /* Linux S&M options */ + unsigned int sb_flags; /* Proposed superblock flags (SB_*) */ + unsigned int sb_flags_mask; /* Superblock flags that were changed */ + enum fs_context_purpose purpose:8; + bool need_free:1; /* Need to call ops->free() */ +}; + +/* + * fs_context manipulation functions. + */ +extern struct fs_context *fs_context_for_mount(struct file_system_type *fs_type, + unsigned int sb_flags); + +extern int vfs_get_tree(struct fs_context *fc); +extern void put_fs_context(struct fs_context *fc); + +#endif /* _LINUX_FS_CONTEXT_H */ -- cgit v1.2.3 From 8f2918898eb5fe25845dde7f4a77bda0e2966e05 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 4 Nov 2018 06:48:34 -0500 Subject: new helpers: vfs_create_mount(), fc_mount() Create a new helper, vfs_create_mount(), that creates a detached vfsmount object from an fs_context that has a superblock attached to it. Almost all uses will be paired with immediately preceding vfs_get_tree(); add a helper for such combination. Switch vfs_kern_mount() to use this. NOTE: mild behaviour change; passing NULL as 'device name' to something like procfs will change /proc/*/mountstats - "device none" instead on "no device". That is consistent with /proc/mounts et.al. [do'h - EXPORT_SYMBOL_GPL slipped in by mistake; removed] [AV -- remove confused comment from vfs_create_mount()] [AV -- removed the second argument] Reviewed-by: David Howells Signed-off-by: Al Viro --- fs/namespace.c | 76 +++++++++++++++++++++++++++++++++++---------------- include/linux/mount.h | 3 ++ 2 files changed, 55 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/fs/namespace.c b/fs/namespace.c index 3f2fd7a34733..156771f5745a 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -941,12 +941,59 @@ static struct mount *skip_mnt_tree(struct mount *p) return p; } +/** + * vfs_create_mount - Create a mount for a configured superblock + * @fc: The configuration context with the superblock attached + * + * Create a mount to an already configured superblock. If necessary, the + * caller should invoke vfs_get_tree() before calling this. + * + * Note that this does not attach the mount to anything. + */ +struct vfsmount *vfs_create_mount(struct fs_context *fc) +{ + struct mount *mnt; + + if (!fc->root) + return ERR_PTR(-EINVAL); + + mnt = alloc_vfsmnt(fc->source ?: "none"); + if (!mnt) + return ERR_PTR(-ENOMEM); + + if (fc->sb_flags & SB_KERNMOUNT) + mnt->mnt.mnt_flags = MNT_INTERNAL; + + atomic_inc(&fc->root->d_sb->s_active); + mnt->mnt.mnt_sb = fc->root->d_sb; + mnt->mnt.mnt_root = dget(fc->root); + mnt->mnt_mountpoint = mnt->mnt.mnt_root; + mnt->mnt_parent = mnt; + + lock_mount_hash(); + list_add_tail(&mnt->mnt_instance, &mnt->mnt.mnt_sb->s_mounts); + unlock_mount_hash(); + return &mnt->mnt; +} +EXPORT_SYMBOL(vfs_create_mount); + +struct vfsmount *fc_mount(struct fs_context *fc) +{ + int err = vfs_get_tree(fc); + if (!err) { + up_write(&fc->root->d_sb->s_umount); + return vfs_create_mount(fc); + } + return ERR_PTR(err); +} +EXPORT_SYMBOL(fc_mount); + struct vfsmount *vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data) { struct fs_context *fc; - struct mount *mnt; + struct vfsmount *mnt; int ret = 0; if (!type) @@ -964,31 +1011,12 @@ struct vfsmount *vfs_kern_mount(struct file_system_type *type, if (!ret) ret = parse_monolithic_mount_data(fc, data); if (!ret) - ret = vfs_get_tree(fc); - if (ret) { - put_fs_context(fc); - return ERR_PTR(ret); - } - up_write(&fc->root->d_sb->s_umount); - mnt = alloc_vfsmnt(name); - if (!mnt) { - put_fs_context(fc); - return ERR_PTR(-ENOMEM); - } - - if (flags & SB_KERNMOUNT) - mnt->mnt.mnt_flags = MNT_INTERNAL; + mnt = fc_mount(fc); + else + mnt = ERR_PTR(ret); - atomic_inc(&fc->root->d_sb->s_active); - mnt->mnt.mnt_root = dget(fc->root); - mnt->mnt.mnt_sb = fc->root->d_sb; - mnt->mnt_mountpoint = mnt->mnt.mnt_root; - mnt->mnt_parent = mnt; - lock_mount_hash(); - list_add_tail(&mnt->mnt_instance, &fc->root->d_sb->s_mounts); - unlock_mount_hash(); put_fs_context(fc); - return &mnt->mnt; + return mnt; } EXPORT_SYMBOL_GPL(vfs_kern_mount); diff --git a/include/linux/mount.h b/include/linux/mount.h index 037eed52164b..9197ddbf35fb 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -21,6 +21,7 @@ struct super_block; struct vfsmount; struct dentry; struct mnt_namespace; +struct fs_context; #define MNT_NOSUID 0x01 #define MNT_NODEV 0x02 @@ -88,6 +89,8 @@ struct path; extern struct vfsmount *clone_private_mount(const struct path *path); struct file_system_type; +extern struct vfsmount *fc_mount(struct fs_context *fc); +extern struct vfsmount *vfs_create_mount(struct fs_context *fc); extern struct vfsmount *vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data); -- cgit v1.2.3 From a0c9a8b8fd9fd572b0d60276beb2142c8f59f9b8 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 4 Nov 2018 07:18:51 -0500 Subject: teach vfs_get_tree() to handle subtype, switch do_new_mount() to it Roll the handling of subtypes into do_new_mount() and vfs_get_tree(). The former determines any subtype string and hangs it off the fs_context; the latter applies it. Make do_new_mount() create, parameterise and commit an fs_context and create a mount for itself rather than calling vfs_kern_mount(). [AV -- missing kstrdup()] [AV -- ... and no kstrdup() if we get to setting ->s_submount - we simply transfer it from fc, leaving NULL behind] [AV -- constify ->s_submount, while we are at it] Reviewed-by: David Howells Signed-off-by: Al Viro --- fs/namespace.c | 77 ++++++++++++++++++++++++++++++++---------------------- fs/super.c | 5 ++++ include/linux/fs.h | 2 +- 3 files changed, 52 insertions(+), 32 deletions(-) (limited to 'include/linux') diff --git a/fs/namespace.c b/fs/namespace.c index 156771f5745a..0354cb6ac2d3 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2479,29 +2479,6 @@ out: return err; } -static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype) -{ - int err; - const char *subtype = strchr(fstype, '.'); - if (subtype) { - subtype++; - err = -EINVAL; - if (!subtype[0]) - goto err; - } else - subtype = ""; - - mnt->mnt_sb->s_subtype = kstrdup(subtype, GFP_KERNEL); - err = -ENOMEM; - if (!mnt->mnt_sb->s_subtype) - goto err; - return mnt; - - err: - mntput(mnt); - return ERR_PTR(err); -} - /* * add a mount into a namespace's mount tree */ @@ -2557,7 +2534,9 @@ static int do_new_mount(struct path *path, const char *fstype, int sb_flags, { struct file_system_type *type; struct vfsmount *mnt; - int err; + struct fs_context *fc; + const char *subtype = NULL; + int err = 0; if (!fstype) return -EINVAL; @@ -2566,23 +2545,59 @@ static int do_new_mount(struct path *path, const char *fstype, int sb_flags, if (!type) return -ENODEV; - mnt = vfs_kern_mount(type, sb_flags, name, data); - if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) && - !mnt->mnt_sb->s_subtype) - mnt = fs_set_subtype(mnt, fstype); + if (type->fs_flags & FS_HAS_SUBTYPE) { + subtype = strchr(fstype, '.'); + if (subtype) { + subtype++; + if (!*subtype) { + put_filesystem(type); + return -EINVAL; + } + } else { + subtype = ""; + } + } + fc = fs_context_for_mount(type, sb_flags); put_filesystem(type); - if (IS_ERR(mnt)) - return PTR_ERR(mnt); + if (IS_ERR(fc)) + return PTR_ERR(fc); + + if (subtype) { + fc->subtype = kstrdup(subtype, GFP_KERNEL); + if (!fc->subtype) + err = -ENOMEM; + } + if (!err && name) { + fc->source = kstrdup(name, GFP_KERNEL); + if (!fc->source) + err = -ENOMEM; + } + if (!err) + err = parse_monolithic_mount_data(fc, data); + if (!err) + err = vfs_get_tree(fc); + if (err) + goto out; + + up_write(&fc->root->d_sb->s_umount); + mnt = vfs_create_mount(fc); + if (IS_ERR(mnt)) { + err = PTR_ERR(mnt); + goto out; + } if (mount_too_revealing(mnt, &mnt_flags)) { mntput(mnt); - return -EPERM; + err = -EPERM; + goto out; } err = do_add_mount(real_mount(mnt), path, mnt_flags); if (err) mntput(mnt); +out: + put_fs_context(fc); return err; } diff --git a/fs/super.c b/fs/super.c index fc3887277ad1..b91b6df05b67 100644 --- a/fs/super.c +++ b/fs/super.c @@ -1262,6 +1262,11 @@ int vfs_get_tree(struct fs_context *fc) sb = fc->root->d_sb; WARN_ON(!sb->s_bdi); + if (fc->subtype && !sb->s_subtype) { + sb->s_subtype = fc->subtype; + fc->subtype = NULL; + } + /* * Write barrier is for super_cache_count(). We place it before setting * SB_BORN as the data dependency between the two functions is the diff --git a/include/linux/fs.h b/include/linux/fs.h index 811c77743dad..36fff12ab890 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1447,7 +1447,7 @@ struct super_block { * Filesystem subtype. If non-empty the filesystem type field * in /proc/mounts will be "type.subtype" */ - char *s_subtype; + const char *s_subtype; const struct dentry_operations *s_d_op; /* default d_op for dentries */ -- cgit v1.2.3 From 8d0347f6c3a9d4953ddd636a31c6584da082e084 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sun, 4 Nov 2018 09:28:36 -0500 Subject: convert do_remount_sb() to fs_context Replace do_remount_sb() with a function, reconfigure_super(), that's fs_context aware. The fs_context is expected to be parameterised already and have ->root pointing to the superblock to be reconfigured. A legacy wrapper is provided that is intended to be called from the fs_context ops when those appear, but for now is called directly from reconfigure_super(). This wrapper invokes the ->remount_fs() superblock op for the moment. It is intended that the remount_fs() op will be phased out. The fs_context->purpose is set to FS_CONTEXT_FOR_RECONFIGURE to indicate that the context is being used for reconfiguration. do_umount_root() is provided to consolidate remount-to-R/O for umount and emergency remount by creating a context and invoking reconfiguration. do_remount(), do_umount() and do_emergency_remount_callback() are switched to use the new process. [AV -- fold UMOUNT and EMERGENCY_REMOUNT in; fixes the umount / bug, gets rid of pointless complexity] [AV -- set ->net_ns in all cases; nfs remount will need that] [AV -- shift security_sb_remount() call into reconfigure_super(); the callers that didn't do security_sb_remount() have NULL fc->security anyway, so it's a no-op for them] Signed-off-by: David Howells Co-developed-by: Al Viro Signed-off-by: Al Viro --- fs/fs_context.c | 35 ++++++++++++++- fs/internal.h | 3 +- fs/namespace.c | 61 ++++++++++++++++---------- fs/super.c | 107 +++++++++++++++++++++++++++++++-------------- include/linux/fs.h | 1 + include/linux/fs_context.h | 4 ++ 6 files changed, 152 insertions(+), 59 deletions(-) (limited to 'include/linux') diff --git a/fs/fs_context.c b/fs/fs_context.c index 857cd46a687b..5e2c3aba1dd8 100644 --- a/fs/fs_context.c +++ b/fs/fs_context.c @@ -69,6 +69,13 @@ static struct fs_context *alloc_fs_context(struct file_system_type *fs_type, case FS_CONTEXT_FOR_MOUNT: fc->user_ns = get_user_ns(fc->cred->user_ns); break; + case FS_CONTEXT_FOR_RECONFIGURE: + /* We don't pin any namespaces as the superblock's + * subscriptions cannot be changed at this point. + */ + atomic_inc(&reference->d_sb->s_active); + fc->root = dget(reference); + break; } ret = legacy_init_fs_context(fc); @@ -90,6 +97,15 @@ struct fs_context *fs_context_for_mount(struct file_system_type *fs_type, } EXPORT_SYMBOL(fs_context_for_mount); +struct fs_context *fs_context_for_reconfigure(struct dentry *dentry, + unsigned int sb_flags, + unsigned int sb_flags_mask) +{ + return alloc_fs_context(dentry->d_sb->s_type, dentry, sb_flags, + sb_flags_mask, FS_CONTEXT_FOR_RECONFIGURE); +} +EXPORT_SYMBOL(fs_context_for_reconfigure); + void fc_drop_locked(struct fs_context *fc) { struct super_block *sb = fc->root->d_sb; @@ -99,6 +115,7 @@ void fc_drop_locked(struct fs_context *fc) } static void legacy_fs_context_free(struct fs_context *fc); + /** * put_fs_context - Dispose of a superblock configuration context. * @fc: The context to dispose of. @@ -118,8 +135,7 @@ void put_fs_context(struct fs_context *fc) legacy_fs_context_free(fc); security_free_mnt_opts(&fc->security); - if (fc->net_ns) - put_net(fc->net_ns); + put_net(fc->net_ns); put_user_ns(fc->user_ns); put_cred(fc->cred); kfree(fc->subtype); @@ -172,6 +188,21 @@ int legacy_get_tree(struct fs_context *fc) return 0; } +/* + * Handle remount. + */ +int legacy_reconfigure(struct fs_context *fc) +{ + struct legacy_fs_context *ctx = fc->fs_private; + struct super_block *sb = fc->root->d_sb; + + if (!sb->s_op->remount_fs) + return 0; + + return sb->s_op->remount_fs(sb, &fc->sb_flags, + ctx ? ctx->legacy_data : NULL); +} + /* * Initialise a legacy context for a filesystem that doesn't support * fs_context. diff --git a/fs/internal.h b/fs/internal.h index 6af26d897034..016a5b8dd305 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -56,6 +56,7 @@ extern void __init chrdev_init(void); * fs_context.c */ extern int legacy_get_tree(struct fs_context *fc); +extern int legacy_reconfigure(struct fs_context *fc); extern int parse_monolithic_mount_data(struct fs_context *, void *); extern void fc_drop_locked(struct fs_context *); @@ -107,7 +108,7 @@ extern struct file *alloc_empty_file_noaccount(int, const struct cred *); /* * super.c */ -extern int do_remount_sb(struct super_block *, int, void *, int); +extern int reconfigure_super(struct fs_context *); extern bool trylock_super(struct super_block *sb); extern struct super_block *user_get_super(dev_t); diff --git a/fs/namespace.c b/fs/namespace.c index 750500c6c33d..931228d8518a 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1489,6 +1489,29 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how) static void shrink_submounts(struct mount *mnt); +static int do_umount_root(struct super_block *sb) +{ + int ret = 0; + + down_write(&sb->s_umount); + if (!sb_rdonly(sb)) { + struct fs_context *fc; + + fc = fs_context_for_reconfigure(sb->s_root, SB_RDONLY, + SB_RDONLY); + if (IS_ERR(fc)) { + ret = PTR_ERR(fc); + } else { + ret = parse_monolithic_mount_data(fc, NULL); + if (!ret) + ret = reconfigure_super(fc); + put_fs_context(fc); + } + } + up_write(&sb->s_umount); + return ret; +} + static int do_umount(struct mount *mnt, int flags) { struct super_block *sb = mnt->mnt.mnt_sb; @@ -1554,11 +1577,7 @@ static int do_umount(struct mount *mnt, int flags) */ if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN)) return -EPERM; - down_write(&sb->s_umount); - if (!sb_rdonly(sb)) - retval = do_remount_sb(sb, SB_RDONLY, NULL, 0); - up_write(&sb->s_umount); - return retval; + return do_umount_root(sb); } namespace_lock(); @@ -2367,7 +2386,7 @@ static int do_remount(struct path *path, int ms_flags, int sb_flags, int err; struct super_block *sb = path->mnt->mnt_sb; struct mount *mnt = real_mount(path->mnt); - void *sec_opts = NULL; + struct fs_context *fc; if (!check_mnt(mnt)) return -EINVAL; @@ -2378,24 +2397,22 @@ static int do_remount(struct path *path, int ms_flags, int sb_flags, if (!can_change_locked_flags(mnt, mnt_flags)) return -EPERM; - if (data && !(sb->s_type->fs_flags & FS_BINARY_MOUNTDATA)) { - err = security_sb_eat_lsm_opts(data, &sec_opts); - if (err) - return err; - } - err = security_sb_remount(sb, sec_opts); - security_free_mnt_opts(&sec_opts); - if (err) - return err; + fc = fs_context_for_reconfigure(path->dentry, sb_flags, MS_RMT_MASK); + if (IS_ERR(fc)) + return PTR_ERR(fc); - down_write(&sb->s_umount); - err = -EPERM; - if (ns_capable(sb->s_user_ns, CAP_SYS_ADMIN)) { - err = do_remount_sb(sb, sb_flags, data, 0); - if (!err) - set_mount_attributes(mnt, mnt_flags); + err = parse_monolithic_mount_data(fc, data); + if (!err) { + down_write(&sb->s_umount); + err = -EPERM; + if (ns_capable(sb->s_user_ns, CAP_SYS_ADMIN)) { + err = reconfigure_super(fc); + if (!err) + set_mount_attributes(mnt, mnt_flags); + } + up_write(&sb->s_umount); } - up_write(&sb->s_umount); + put_fs_context(fc); return err; } diff --git a/fs/super.c b/fs/super.c index 11e2a6cb3baf..50553233dd15 100644 --- a/fs/super.c +++ b/fs/super.c @@ -836,28 +836,35 @@ rescan: } /** - * do_remount_sb - asks filesystem to change mount options. - * @sb: superblock in question - * @sb_flags: revised superblock flags - * @data: the rest of options - * @force: whether or not to force the change + * reconfigure_super - asks filesystem to change superblock parameters + * @fc: The superblock and configuration * - * Alters the mount options of a mounted file system. + * Alters the configuration parameters of a live superblock. */ -int do_remount_sb(struct super_block *sb, int sb_flags, void *data, int force) +int reconfigure_super(struct fs_context *fc) { + struct super_block *sb = fc->root->d_sb; int retval; - int remount_ro; + bool remount_ro = false; + bool force = fc->sb_flags & SB_FORCE; + if (fc->sb_flags_mask & ~MS_RMT_MASK) + return -EINVAL; if (sb->s_writers.frozen != SB_UNFROZEN) return -EBUSY; + retval = security_sb_remount(sb, fc->security); + if (retval) + return retval; + + if (fc->sb_flags_mask & SB_RDONLY) { #ifdef CONFIG_BLOCK - if (!(sb_flags & SB_RDONLY) && bdev_read_only(sb->s_bdev)) - return -EACCES; + if (!(fc->sb_flags & SB_RDONLY) && bdev_read_only(sb->s_bdev)) + return -EACCES; #endif - remount_ro = (sb_flags & SB_RDONLY) && !sb_rdonly(sb); + remount_ro = (fc->sb_flags & SB_RDONLY) && !sb_rdonly(sb); + } if (remount_ro) { if (!hlist_empty(&sb->s_pins)) { @@ -868,13 +875,14 @@ int do_remount_sb(struct super_block *sb, int sb_flags, void *data, int force) return 0; if (sb->s_writers.frozen != SB_UNFROZEN) return -EBUSY; - remount_ro = (sb_flags & SB_RDONLY) && !sb_rdonly(sb); + remount_ro = !sb_rdonly(sb); } } shrink_dcache_sb(sb); - /* If we are remounting RDONLY and current sb is read/write, - make sure there are no rw files opened */ + /* If we are reconfiguring to RDONLY and current sb is read/write, + * make sure there are no files open for writing. + */ if (remount_ro) { if (force) { sb->s_readonly_remount = 1; @@ -886,17 +894,17 @@ int do_remount_sb(struct super_block *sb, int sb_flags, void *data, int force) } } - if (sb->s_op->remount_fs) { - retval = sb->s_op->remount_fs(sb, &sb_flags, data); - if (retval) { - if (!force) - goto cancel_readonly; - /* If forced remount, go ahead despite any errors */ - WARN(1, "forced remount of a %s fs returned %i\n", - sb->s_type->name, retval); - } + retval = legacy_reconfigure(fc); + if (retval) { + if (!force) + goto cancel_readonly; + /* If forced remount, go ahead despite any errors */ + WARN(1, "forced remount of a %s fs returned %i\n", + sb->s_type->name, retval); } - sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | (sb_flags & MS_RMT_MASK); + + WRITE_ONCE(sb->s_flags, ((sb->s_flags & ~fc->sb_flags_mask) | + (fc->sb_flags & fc->sb_flags_mask))); /* Needs to be ordered wrt mnt_is_readonly() */ smp_wmb(); sb->s_readonly_remount = 0; @@ -923,10 +931,15 @@ static void do_emergency_remount_callback(struct super_block *sb) down_write(&sb->s_umount); if (sb->s_root && sb->s_bdev && (sb->s_flags & SB_BORN) && !sb_rdonly(sb)) { - /* - * What lock protects sb->s_flags?? - */ - do_remount_sb(sb, SB_RDONLY, NULL, 1); + struct fs_context *fc; + + fc = fs_context_for_reconfigure(sb->s_root, + SB_RDONLY | SB_FORCE, SB_RDONLY); + if (!IS_ERR(fc)) { + if (parse_monolithic_mount_data(fc, NULL) == 0) + (void)reconfigure_super(fc); + put_fs_context(fc); + } } up_write(&sb->s_umount); } @@ -1213,6 +1226,31 @@ struct dentry *mount_nodev(struct file_system_type *fs_type, } EXPORT_SYMBOL(mount_nodev); +static int reconfigure_single(struct super_block *s, + int flags, void *data) +{ + struct fs_context *fc; + int ret; + + /* The caller really need to be passing fc down into mount_single(), + * then a chunk of this can be removed. [Bollocks -- AV] + * Better yet, reconfiguration shouldn't happen, but rather the second + * mount should be rejected if the parameters are not compatible. + */ + fc = fs_context_for_reconfigure(s->s_root, flags, MS_RMT_MASK); + if (IS_ERR(fc)) + return PTR_ERR(fc); + + ret = parse_monolithic_mount_data(fc, data); + if (ret < 0) + goto out; + + ret = reconfigure_super(fc); +out: + put_fs_context(fc); + return ret; +} + static int compare_single(struct super_block *s, void *p) { return 1; @@ -1230,13 +1268,14 @@ struct dentry *mount_single(struct file_system_type *fs_type, return ERR_CAST(s); if (!s->s_root) { error = fill_super(s, data, flags & SB_SILENT ? 1 : 0); - if (error) { - deactivate_locked_super(s); - return ERR_PTR(error); - } - s->s_flags |= SB_ACTIVE; + if (!error) + s->s_flags |= SB_ACTIVE; } else { - do_remount_sb(s, flags, data, 0); + error = reconfigure_single(s, flags, data); + } + if (unlikely(error)) { + deactivate_locked_super(s); + return ERR_PTR(error); } return dget(s->s_root); } diff --git a/include/linux/fs.h b/include/linux/fs.h index 36fff12ab890..c65d02c5c512 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1337,6 +1337,7 @@ extern int send_sigurg(struct fown_struct *fown); /* These sb flags are internal to the kernel */ #define SB_SUBMOUNT (1<<26) +#define SB_FORCE (1<<27) #define SB_NOSEC (1<<28) #define SB_BORN (1<<29) #define SB_ACTIVE (1<<30) diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h index 9805514444c9..98772f882a3e 100644 --- a/include/linux/fs_context.h +++ b/include/linux/fs_context.h @@ -25,6 +25,7 @@ struct user_namespace; enum fs_context_purpose { FS_CONTEXT_FOR_MOUNT, /* New superblock for explicit mount */ + FS_CONTEXT_FOR_RECONFIGURE, /* Superblock reconfiguration (remount) */ }; /* @@ -57,6 +58,9 @@ struct fs_context { */ extern struct fs_context *fs_context_for_mount(struct file_system_type *fs_type, unsigned int sb_flags); +extern struct fs_context *fs_context_for_reconfigure(struct dentry *dentry, + unsigned int sb_flags, + unsigned int sb_flags_mask); extern int vfs_get_tree(struct fs_context *fc); extern void put_fs_context(struct fs_context *fc); -- cgit v1.2.3 From e1a91586d5da6f879b6dd385a2e7227bf1653570 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 23 Dec 2018 16:25:31 -0500 Subject: fs_context flavour for submounts This is an eventual replacement for vfs_submount() uses. Unlike the "mount" and "remount" cases, the users of that thing are not in VFS - they are buried in various ->d_automount() instances and rather than converting them all at once we introduce the (thankfully small and simple) infrastructure here and deal with the prospective users in afs, nfs, etc. parts of the series. Here we just introduce a new constructor (fs_context_for_submount()) along with the corresponding enum constant to be put into fc->purpose for those. Signed-off-by: Al Viro --- fs/fs_context.c | 10 ++++++++++ include/linux/fs_context.h | 3 +++ 2 files changed, 13 insertions(+) (limited to 'include/linux') diff --git a/fs/fs_context.c b/fs/fs_context.c index 5e2c3aba1dd8..2bd652b6e848 100644 --- a/fs/fs_context.c +++ b/fs/fs_context.c @@ -69,6 +69,9 @@ static struct fs_context *alloc_fs_context(struct file_system_type *fs_type, case FS_CONTEXT_FOR_MOUNT: fc->user_ns = get_user_ns(fc->cred->user_ns); break; + case FS_CONTEXT_FOR_SUBMOUNT: + fc->user_ns = get_user_ns(reference->d_sb->s_user_ns); + break; case FS_CONTEXT_FOR_RECONFIGURE: /* We don't pin any namespaces as the superblock's * subscriptions cannot be changed at this point. @@ -106,6 +109,13 @@ struct fs_context *fs_context_for_reconfigure(struct dentry *dentry, } EXPORT_SYMBOL(fs_context_for_reconfigure); +struct fs_context *fs_context_for_submount(struct file_system_type *type, + struct dentry *reference) +{ + return alloc_fs_context(type, reference, 0, 0, FS_CONTEXT_FOR_SUBMOUNT); +} +EXPORT_SYMBOL(fs_context_for_submount); + void fc_drop_locked(struct fs_context *fc) { struct super_block *sb = fc->root->d_sb; diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h index 98772f882a3e..7feb018c7a9e 100644 --- a/include/linux/fs_context.h +++ b/include/linux/fs_context.h @@ -25,6 +25,7 @@ struct user_namespace; enum fs_context_purpose { FS_CONTEXT_FOR_MOUNT, /* New superblock for explicit mount */ + FS_CONTEXT_FOR_SUBMOUNT, /* New superblock for automatic submount */ FS_CONTEXT_FOR_RECONFIGURE, /* Superblock reconfiguration (remount) */ }; @@ -61,6 +62,8 @@ extern struct fs_context *fs_context_for_mount(struct file_system_type *fs_type, extern struct fs_context *fs_context_for_reconfigure(struct dentry *dentry, unsigned int sb_flags, unsigned int sb_flags_mask); +extern struct fs_context *fs_context_for_submount(struct file_system_type *fs_type, + struct dentry *reference); extern int vfs_get_tree(struct fs_context *fc); extern void put_fs_context(struct fs_context *fc); -- cgit v1.2.3 From f3a09c92018a91ad0981146a4ac59414f814d801 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 23 Dec 2018 18:55:56 -0500 Subject: introduce fs_context methods Signed-off-by: Al Viro --- fs/fs_context.c | 28 ++++++++++++++++++++++------ fs/internal.h | 2 -- fs/super.c | 36 ++++++++++++++++++++++++++++-------- include/linux/fs.h | 2 ++ include/linux/fs_context.h | 13 +++++++++++++ 5 files changed, 65 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/fs/fs_context.c b/fs/fs_context.c index 2bd652b6e848..825d1b2c8807 100644 --- a/fs/fs_context.c +++ b/fs/fs_context.c @@ -51,6 +51,7 @@ static struct fs_context *alloc_fs_context(struct file_system_type *fs_type, unsigned int sb_flags_mask, enum fs_context_purpose purpose) { + int (*init_fs_context)(struct fs_context *); struct fs_context *fc; int ret = -ENOMEM; @@ -81,7 +82,12 @@ static struct fs_context *alloc_fs_context(struct file_system_type *fs_type, break; } - ret = legacy_init_fs_context(fc); + /* TODO: Make all filesystems support this unconditionally */ + init_fs_context = fc->fs_type->init_fs_context; + if (!init_fs_context) + init_fs_context = legacy_init_fs_context; + + ret = init_fs_context(fc); if (ret < 0) goto err_fc; fc->need_free = true; @@ -141,8 +147,8 @@ void put_fs_context(struct fs_context *fc) deactivate_super(sb); } - if (fc->need_free) - legacy_fs_context_free(fc); + if (fc->need_free && fc->ops && fc->ops->free) + fc->ops->free(fc); security_free_mnt_opts(&fc->security); put_net(fc->net_ns); @@ -180,7 +186,7 @@ static int legacy_parse_monolithic(struct fs_context *fc, void *data) /* * Get a mountable root with the legacy mount command. */ -int legacy_get_tree(struct fs_context *fc) +static int legacy_get_tree(struct fs_context *fc) { struct legacy_fs_context *ctx = fc->fs_private; struct super_block *sb; @@ -201,7 +207,7 @@ int legacy_get_tree(struct fs_context *fc) /* * Handle remount. */ -int legacy_reconfigure(struct fs_context *fc) +static int legacy_reconfigure(struct fs_context *fc) { struct legacy_fs_context *ctx = fc->fs_private; struct super_block *sb = fc->root->d_sb; @@ -213,6 +219,13 @@ int legacy_reconfigure(struct fs_context *fc) ctx ? ctx->legacy_data : NULL); } +const struct fs_context_operations legacy_fs_context_ops = { + .free = legacy_fs_context_free, + .parse_monolithic = legacy_parse_monolithic, + .get_tree = legacy_get_tree, + .reconfigure = legacy_reconfigure, +}; + /* * Initialise a legacy context for a filesystem that doesn't support * fs_context. @@ -222,10 +235,13 @@ static int legacy_init_fs_context(struct fs_context *fc) fc->fs_private = kzalloc(sizeof(struct legacy_fs_context), GFP_KERNEL); if (!fc->fs_private) return -ENOMEM; + fc->ops = &legacy_fs_context_ops; return 0; } int parse_monolithic_mount_data(struct fs_context *fc, void *data) { - return legacy_parse_monolithic(fc, data); + int (*monolithic_mount_data)(struct fs_context *, void *); + monolithic_mount_data = fc->ops->parse_monolithic; + return monolithic_mount_data(fc, data); } diff --git a/fs/internal.h b/fs/internal.h index 016a5b8dd305..8f8d07cc433f 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -55,8 +55,6 @@ extern void __init chrdev_init(void); /* * fs_context.c */ -extern int legacy_get_tree(struct fs_context *fc); -extern int legacy_reconfigure(struct fs_context *fc); extern int parse_monolithic_mount_data(struct fs_context *, void *); extern void fc_drop_locked(struct fs_context *); diff --git a/fs/super.c b/fs/super.c index 50553233dd15..76b3181c782d 100644 --- a/fs/super.c +++ b/fs/super.c @@ -894,13 +894,15 @@ int reconfigure_super(struct fs_context *fc) } } - retval = legacy_reconfigure(fc); - if (retval) { - if (!force) - goto cancel_readonly; - /* If forced remount, go ahead despite any errors */ - WARN(1, "forced remount of a %s fs returned %i\n", - sb->s_type->name, retval); + if (fc->ops->reconfigure) { + retval = fc->ops->reconfigure(fc); + if (retval) { + if (!force) + goto cancel_readonly; + /* If forced remount, go ahead despite any errors */ + WARN(1, "forced remount of a %s fs returned %i\n", + sb->s_type->name, retval); + } } WRITE_ONCE(sb->s_flags, ((sb->s_flags & ~fc->sb_flags_mask) | @@ -1294,10 +1296,28 @@ int vfs_get_tree(struct fs_context *fc) struct super_block *sb; int error; - error = legacy_get_tree(fc); + if (fc->fs_type->fs_flags & FS_REQUIRES_DEV && !fc->source) + return -ENOENT; + + if (fc->root) + return -EBUSY; + + /* Get the mountable root in fc->root, with a ref on the root and a ref + * on the superblock. + */ + error = fc->ops->get_tree(fc); if (error < 0) return error; + if (!fc->root) { + pr_err("Filesystem %s get_tree() didn't set fc->root\n", + fc->fs_type->name); + /* We don't know what the locking state of the superblock is - + * if there is a superblock. + */ + BUG(); + } + sb = fc->root->d_sb; WARN_ON(!sb->s_bdi); diff --git a/include/linux/fs.h b/include/linux/fs.h index c65d02c5c512..8d578a9e1e8c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -61,6 +61,7 @@ struct workqueue_struct; struct iov_iter; struct fscrypt_info; struct fscrypt_operations; +struct fs_context; extern void __init inode_init(void); extern void __init inode_init_early(void); @@ -2173,6 +2174,7 @@ struct file_system_type { #define FS_HAS_SUBTYPE 4 #define FS_USERNS_MOUNT 8 /* Can be mounted by userns root */ #define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */ + int (*init_fs_context)(struct fs_context *); struct dentry *(*mount) (struct file_system_type *, int, const char *, void *); void (*kill_sb) (struct super_block *); diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h index 7feb018c7a9e..087c12954360 100644 --- a/include/linux/fs_context.h +++ b/include/linux/fs_context.h @@ -20,8 +20,13 @@ struct cred; struct dentry; struct file_operations; struct file_system_type; +struct mnt_namespace; struct net; +struct pid_namespace; +struct super_block; struct user_namespace; +struct vfsmount; +struct path; enum fs_context_purpose { FS_CONTEXT_FOR_MOUNT, /* New superblock for explicit mount */ @@ -39,6 +44,7 @@ enum fs_context_purpose { * See Documentation/filesystems/mounting.txt */ struct fs_context { + const struct fs_context_operations *ops; struct file_system_type *fs_type; void *fs_private; /* The filesystem's context */ struct dentry *root; /* The root and superblock */ @@ -54,6 +60,13 @@ struct fs_context { bool need_free:1; /* Need to call ops->free() */ }; +struct fs_context_operations { + void (*free)(struct fs_context *fc); + int (*parse_monolithic)(struct fs_context *fc, void *data); + int (*get_tree)(struct fs_context *fc); + int (*reconfigure)(struct fs_context *fc); +}; + /* * fs_context manipulation functions. */ -- cgit v1.2.3 From c6b82263f9c6e745eb4c5dfc2578d147c4cd7604 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 1 Nov 2018 23:07:23 +0000 Subject: vfs: Introduce logging functions Introduce a set of logging functions through which informational messages, warnings and error messages incurred by the mount procedure can be logged and, in a future patch, passed to userspace instead by way of the filesystem configuration context file descriptor. There are four functions: (1) infof(const char *fmt, ...); Logs an informational message. (2) warnf(const char *fmt, ...); Logs a warning message. (3) errorf(const char *fmt, ...); Logs an error message. (4) invalf(const char *fmt, ...); As errof(), but returns -EINVAL so can be used on a return statement. Signed-off-by: David Howells Signed-off-by: Al Viro --- include/linux/fs_context.h | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h index 087c12954360..d208cc40b868 100644 --- a/include/linux/fs_context.h +++ b/include/linux/fs_context.h @@ -81,4 +81,46 @@ extern struct fs_context *fs_context_for_submount(struct file_system_type *fs_ty extern int vfs_get_tree(struct fs_context *fc); extern void put_fs_context(struct fs_context *fc); +#define logfc(FC, FMT, ...) pr_notice(FMT, ## __VA_ARGS__) + +/** + * infof - Store supplementary informational message + * @fc: The context in which to log the informational message + * @fmt: The format string + * + * Store the supplementary informational message for the process if the process + * has enabled the facility. + */ +#define infof(fc, fmt, ...) ({ logfc(fc, fmt, ## __VA_ARGS__); }) + +/** + * warnf - Store supplementary warning message + * @fc: The context in which to log the error message + * @fmt: The format string + * + * Store the supplementary warning message for the process if the process has + * enabled the facility. + */ +#define warnf(fc, fmt, ...) ({ logfc(fc, fmt, ## __VA_ARGS__); }) + +/** + * errorf - Store supplementary error message + * @fc: The context in which to log the error message + * @fmt: The format string + * + * Store the supplementary error message for the process if the process has + * enabled the facility. + */ +#define errorf(fc, fmt, ...) ({ logfc(fc, fmt, ## __VA_ARGS__); }) + +/** + * invalf - Store supplementary invalid argument error message + * @fc: The context in which to log the error message + * @fmt: The format string + * + * Store the supplementary error message for the process if the process has + * enabled the facility and return -EINVAL. + */ +#define invalf(fc, fmt, ...) ({ errorf(fc, fmt, ## __VA_ARGS__); -EINVAL; }) + #endif /* _LINUX_FS_CONTEXT_H */ -- cgit v1.2.3 From b7bb367afa4bf9de60830683305c63030c3e581d Mon Sep 17 00:00:00 2001 From: Jonas Bonn Date: Wed, 30 Jan 2019 09:40:04 +0100 Subject: spi: support inter-word delay requirement for devices Some devices are slow and cannot keep up with the SPI bus and therefore require a short delay between words of the SPI transfer. The example of this that I'm looking at is a SAMA5D2 with a minimum SPI clock of 400kHz talking to an AVR-based SPI slave. The AVR cannot put bytes on the bus fast enough to keep up with the SoC's SPI controller even at the lowest bus speed. This patch introduces the ability to specify a required inter-word delay for SPI devices. It is up to the controller driver to configure itself accordingly in order to introduce the requested delay. Note that, for spi_transfer, there is already a field word_delay that provides similar functionality. This field, however, is specified in clock cycles (and worse, SPI controller cycles, not SCK cycles); that makes this value dependent on the master clock instead of the device clock for which the delay is intended to provide some relief. This patch leaves this old word_delay in place and provides a time-based word_delay_us alongside it; the new field fits in the struct padding so struct size is constant. There is only one in-kernel user of the word_delay field and presumably that driver could be reworked to use the time-based value instead. The time-based delay is limited to 8 bits as these delays are intended to be short. The SAMA5D2 that I've tested this on limits delays to a maximum of ~100us, which is already many word-transfer periods even at the minimum transfer speed supported by the controller. Signed-off-by: Jonas Bonn CC: Mark Brown CC: Rob Herring CC: Mark Rutland CC: linux-spi@vger.kernel.org CC: devicetree@vger.kernel.org Signed-off-by: Mark Brown --- drivers/spi/spi.c | 5 +++++ include/linux/spi/spi.h | 6 ++++++ 2 files changed, 11 insertions(+) (limited to 'include/linux') diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 0e0f2c62973c..2f7176f07591 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -3050,6 +3050,8 @@ static int __spi_validate(struct spi_device *spi, struct spi_message *message) * it is not set for this transfer. * Set transfer tx_nbits and rx_nbits as single transfer default * (SPI_NBITS_SINGLE) if it is not set for this transfer. + * Ensure transfer word_delay is at least as long as that required by + * device itself. */ message->frame_length = 0; list_for_each_entry(xfer, &message->transfers, transfer_list) { @@ -3120,6 +3122,9 @@ static int __spi_validate(struct spi_device *spi, struct spi_message *message) !(spi->mode & SPI_RX_QUAD)) return -EINVAL; } + + if (xfer->word_delay_usecs < spi->word_delay_usecs) + xfer->word_delay_usecs = spi->word_delay_usecs; } message->status = -EINPROGRESS; diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 916bba47d156..662b336aa2e4 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -122,6 +122,8 @@ void spi_statistics_add_transfer_stats(struct spi_statistics *stats, * the spi_master. * @cs_gpiod: gpio descriptor of the chipselect line (optional, NULL when * not using a GPIO line) + * @word_delay_usecs: microsecond delay to be inserted between consecutive + * words of a transfer * * @statistics: statistics for the spi_device * @@ -169,6 +171,7 @@ struct spi_device { const char *driver_override; int cs_gpio; /* LEGACY: chip select gpio */ struct gpio_desc *cs_gpiod; /* chip select gpio desc */ + uint8_t word_delay_usecs; /* inter-word delay */ /* the statistics */ struct spi_statistics statistics; @@ -721,6 +724,8 @@ extern void spi_res_release(struct spi_controller *ctlr, * @delay_usecs: microseconds to delay after this transfer before * (optionally) changing the chipselect status, then starting * the next transfer or completing this @spi_message. + * @word_delay_usecs: microseconds to inter word delay after each word size + * (set by bits_per_word) transmission. * @word_delay: clock cycles to inter word delay after each word size * (set by bits_per_word) transmission. * @transfer_list: transfers are sequenced through @spi_message.transfers @@ -803,6 +808,7 @@ struct spi_transfer { #define SPI_NBITS_DUAL 0x02 /* 2bits transfer */ #define SPI_NBITS_QUAD 0x04 /* 4bits transfer */ u8 bits_per_word; + u8 word_delay_usecs; u16 delay_usecs; u32 speed_hz; u16 word_delay; -- cgit v1.2.3 From 57d4657716aca81ef4d7ec23e8123d26e3d28954 Mon Sep 17 00:00:00 2001 From: Richard Guy Briggs Date: Wed, 23 Jan 2019 13:35:00 -0500 Subject: audit: ignore fcaps on umount Don't fetch fcaps when umount2 is called to avoid a process hang while it waits for the missing resource to (possibly never) re-appear. Note the comment above user_path_mountpoint_at(): * A umount is a special case for path walking. We're not actually interested * in the inode in this situation, and ESTALE errors can be a problem. We * simply want track down the dentry and vfsmount attached at the mountpoint * and avoid revalidating the last component. This can happen on ceph, cifs, 9p, lustre, fuse (gluster) or NFS. Please see the github issue tracker https://github.com/linux-audit/audit-kernel/issues/100 Signed-off-by: Richard Guy Briggs [PM: merge fuzz in audit_log_fcaps()] Signed-off-by: Paul Moore --- fs/namei.c | 2 +- fs/namespace.c | 2 ++ include/linux/audit.h | 15 ++++++++++----- include/linux/namei.h | 3 +++ kernel/audit.c | 10 +++++++++- kernel/audit.h | 2 +- kernel/auditsc.c | 6 +++--- 7 files changed, 29 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/fs/namei.c b/fs/namei.c index 914178cdbe94..87d7710a2e1d 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2720,7 +2720,7 @@ filename_mountpoint(int dfd, struct filename *name, struct path *path, if (unlikely(error == -ESTALE)) error = path_mountpoint(&nd, flags | LOOKUP_REVAL, path); if (likely(!error)) - audit_inode(name, path->dentry, 0); + audit_inode(name, path->dentry, flags & LOOKUP_NO_EVAL); restore_nameidata(); putname(name); return error; diff --git a/fs/namespace.c b/fs/namespace.c index a677b59efd74..e5de0e372df2 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1640,6 +1640,8 @@ int ksys_umount(char __user *name, int flags) if (!(flags & UMOUNT_NOFOLLOW)) lookup_flags |= LOOKUP_FOLLOW; + lookup_flags |= LOOKUP_NO_EVAL; + retval = user_path_mountpoint_at(AT_FDCWD, name, lookup_flags, &path); if (retval) goto out; diff --git a/include/linux/audit.h b/include/linux/audit.h index ecb5d317d6a2..29251b18331a 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -25,6 +25,7 @@ #include #include +#include /* LOOKUP_* */ #include #define AUDIT_INO_UNSET ((unsigned long)-1) @@ -248,6 +249,7 @@ extern void __audit_getname(struct filename *name); #define AUDIT_INODE_PARENT 1 /* dentry represents the parent */ #define AUDIT_INODE_HIDDEN 2 /* audit record should be hidden */ +#define AUDIT_INODE_NOEVAL 4 /* audit record incomplete */ extern void __audit_inode(struct filename *name, const struct dentry *dentry, unsigned int flags); extern void __audit_file(const struct file *); @@ -308,12 +310,15 @@ static inline void audit_getname(struct filename *name) } static inline void audit_inode(struct filename *name, const struct dentry *dentry, - unsigned int parent) { + unsigned int flags) { if (unlikely(!audit_dummy_context())) { - unsigned int flags = 0; - if (parent) - flags |= AUDIT_INODE_PARENT; - __audit_inode(name, dentry, flags); + unsigned int aflags = 0; + + if (flags & LOOKUP_PARENT) + aflags |= AUDIT_INODE_PARENT; + if (flags & LOOKUP_NO_EVAL) + aflags |= AUDIT_INODE_NOEVAL; + __audit_inode(name, dentry, aflags); } } static inline void audit_file(struct file *file) diff --git a/include/linux/namei.h b/include/linux/namei.h index a78606e8e3df..9138b4471dbf 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -24,6 +24,8 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND}; * - internal "there are more path components" flag * - dentry cache is untrusted; force a real lookup * - suppress terminal automount + * - skip revalidation + * - don't fetch xattrs on audit_inode */ #define LOOKUP_FOLLOW 0x0001 #define LOOKUP_DIRECTORY 0x0002 @@ -33,6 +35,7 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND}; #define LOOKUP_REVAL 0x0020 #define LOOKUP_RCU 0x0040 #define LOOKUP_NO_REVAL 0x0080 +#define LOOKUP_NO_EVAL 0x0100 /* * Intent data diff --git a/kernel/audit.c b/kernel/audit.c index 3f3f1888cac7..b7177a8def2e 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -2082,6 +2082,10 @@ void audit_log_cap(struct audit_buffer *ab, char *prefix, kernel_cap_t *cap) static void audit_log_fcaps(struct audit_buffer *ab, struct audit_names *name) { + if (name->fcap_ver == -1) { + audit_log_format(ab, " cap_fe=? cap_fver=? cap_fp=? cap_fi=?"); + return; + } audit_log_cap(ab, "cap_fp", &name->fcap.permitted); audit_log_cap(ab, "cap_fi", &name->fcap.inheritable); audit_log_format(ab, " cap_fe=%d cap_fver=%x cap_frootid=%d", @@ -2114,7 +2118,7 @@ static inline int audit_copy_fcaps(struct audit_names *name, /* Copy inode data into an audit_names. */ void audit_copy_inode(struct audit_names *name, const struct dentry *dentry, - struct inode *inode) + struct inode *inode, unsigned int flags) { name->ino = inode->i_ino; name->dev = inode->i_sb->s_dev; @@ -2123,6 +2127,10 @@ void audit_copy_inode(struct audit_names *name, const struct dentry *dentry, name->gid = inode->i_gid; name->rdev = inode->i_rdev; security_inode_getsecid(inode, &name->osid); + if (flags & AUDIT_INODE_NOEVAL) { + name->fcap_ver = -1; + return; + } audit_copy_fcaps(name, dentry); } diff --git a/kernel/audit.h b/kernel/audit.h index 9acb8691ed87..002f0f7ba732 100644 --- a/kernel/audit.h +++ b/kernel/audit.h @@ -215,7 +215,7 @@ extern void audit_log_session_info(struct audit_buffer *ab); extern void audit_copy_inode(struct audit_names *name, const struct dentry *dentry, - struct inode *inode); + struct inode *inode, unsigned int flags); extern void audit_log_cap(struct audit_buffer *ab, char *prefix, kernel_cap_t *cap); extern void audit_log_name(struct audit_context *context, diff --git a/kernel/auditsc.c b/kernel/auditsc.c index a2696ce790f9..68da71001096 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -1856,7 +1856,7 @@ out: n->type = AUDIT_TYPE_NORMAL; } handle_path(dentry); - audit_copy_inode(n, dentry, inode); + audit_copy_inode(n, dentry, inode, flags & AUDIT_INODE_NOEVAL); } void __audit_file(const struct file *file) @@ -1955,7 +1955,7 @@ void __audit_inode_child(struct inode *parent, n = audit_alloc_name(context, AUDIT_TYPE_PARENT); if (!n) return; - audit_copy_inode(n, NULL, parent); + audit_copy_inode(n, NULL, parent, 0); } if (!found_child) { @@ -1974,7 +1974,7 @@ void __audit_inode_child(struct inode *parent, } if (inode) - audit_copy_inode(found_child, dentry, inode); + audit_copy_inode(found_child, dentry, inode, 0); else found_child->ino = AUDIT_INO_UNSET; } -- cgit v1.2.3 From befa618112a0a4590ce21d70aa35c9d341337774 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 28 Jan 2019 09:21:19 -0800 Subject: bpf: BPF_PROG_TYPE_CGROUP_{SKB, SOCK, SOCK_ADDR} require cgroups enabled There is no way to exercise appropriate attach points without cgroups enabled. This lets test_verifier correctly skip tests for these prog_types if kernel was compiled without BPF cgroup support. Signed-off-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann --- include/linux/bpf_types.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 44d9ab4809bd..08bf2f1fe553 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -6,9 +6,11 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_SOCKET_FILTER, sk_filter) BPF_PROG_TYPE(BPF_PROG_TYPE_SCHED_CLS, tc_cls_act) BPF_PROG_TYPE(BPF_PROG_TYPE_SCHED_ACT, tc_cls_act) BPF_PROG_TYPE(BPF_PROG_TYPE_XDP, xdp) +#ifdef CONFIG_CGROUP_BPF BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SKB, cg_skb) BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK, cg_sock) BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK_ADDR, cg_sock_addr) +#endif BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_IN, lwt_in) BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_OUT, lwt_out) BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_XMIT, lwt_xmit) -- cgit v1.2.3 From 116bfa96a255123ed209da6544f74a4f2eaca5da Mon Sep 17 00:00:00 2001 From: Valdis Kletnieks Date: Tue, 29 Jan 2019 01:04:25 -0500 Subject: bpf: fix missing prototype warnings Compiling with W=1 generates warnings: CC kernel/bpf/core.o kernel/bpf/core.c:721:12: warning: no previous prototype for ?bpf_jit_alloc_exec_limit? [-Wmissing-prototypes] 721 | u64 __weak bpf_jit_alloc_exec_limit(void) | ^~~~~~~~~~~~~~~~~~~~~~~~ kernel/bpf/core.c:757:14: warning: no previous prototype for ?bpf_jit_alloc_exec? [-Wmissing-prototypes] 757 | void *__weak bpf_jit_alloc_exec(unsigned long size) | ^~~~~~~~~~~~~~~~~~ kernel/bpf/core.c:762:13: warning: no previous prototype for ?bpf_jit_free_exec? [-Wmissing-prototypes] 762 | void __weak bpf_jit_free_exec(void *addr) | ^~~~~~~~~~~~~~~~~ All three are weak functions that archs can override, provide proper prototypes for when a new arch provides their own. Signed-off-by: Valdis Kletnieks Acked-by: Song Liu Signed-off-by: Daniel Borkmann --- include/linux/filter.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index e4b473f85b46..7317376734f7 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -880,7 +880,9 @@ bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr, unsigned int alignment, bpf_jit_fill_hole_t bpf_fill_ill_insns); void bpf_jit_binary_free(struct bpf_binary_header *hdr); - +u64 bpf_jit_alloc_exec_limit(void); +void *bpf_jit_alloc_exec(unsigned long size); +void bpf_jit_free_exec(void *addr); void bpf_jit_free(struct bpf_prog *fp); int bpf_jit_get_func_addr(const struct bpf_prog *prog, -- cgit v1.2.3 From 1832f4ef5867fd3898d8a6c6c1978b75d76fc246 Mon Sep 17 00:00:00 2001 From: Valdis Kletnieks Date: Tue, 29 Jan 2019 01:47:06 -0500 Subject: bpf, cgroups: clean up kerneldoc warnings Building with W=1 reveals some bitrot: CC kernel/bpf/cgroup.o kernel/bpf/cgroup.c:238: warning: Function parameter or member 'flags' not described in '__cgroup_bpf_attach' kernel/bpf/cgroup.c:367: warning: Function parameter or member 'unused_flags' not described in '__cgroup_bpf_detach' Add a kerneldoc line for 'flags'. Fixing the warning for 'unused_flags' is best approached by removing the unused parameter on the function call. Signed-off-by: Valdis Kletnieks Acked-by: Song Liu Signed-off-by: Daniel Borkmann --- include/linux/bpf-cgroup.h | 2 +- kernel/bpf/cgroup.c | 3 ++- kernel/cgroup/cgroup.c | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 588dd5f0bd85..695b2a880d9a 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -78,7 +78,7 @@ int cgroup_bpf_inherit(struct cgroup *cgrp); int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, enum bpf_attach_type type, u32 flags); int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, - enum bpf_attach_type type, u32 flags); + enum bpf_attach_type type); int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, union bpf_attr __user *uattr); diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index ab612fe9862f..d78cfec5807d 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -230,6 +230,7 @@ cleanup: * @cgrp: The cgroup which descendants to traverse * @prog: A program to attach * @type: Type of attach operation + * @flags: Option flags * * Must be called with cgroup_mutex held. */ @@ -363,7 +364,7 @@ cleanup: * Must be called with cgroup_mutex held. */ int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, - enum bpf_attach_type type, u32 unused_flags) + enum bpf_attach_type type) { struct list_head *progs = &cgrp->bpf.progs[type]; enum bpf_cgroup_storage_type stype; diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index f31bd61c9466..9f617605dacb 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -5996,7 +5996,7 @@ int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, int ret; mutex_lock(&cgroup_mutex); - ret = __cgroup_bpf_detach(cgrp, prog, type, flags); + ret = __cgroup_bpf_detach(cgrp, prog, type); mutex_unlock(&cgroup_mutex); return ret; } -- cgit v1.2.3 From a08c2a5a31941131c41feaa0429e4c8854cf48f2 Mon Sep 17 00:00:00 2001 From: Thara Gopinath Date: Wed, 23 Jan 2019 08:50:14 +0100 Subject: PM-runtime: Replace jiffies-based accounting with ktime-based accounting Replace jiffies-based accounting for runtime_active_time and runtime_suspended_time with ktime-based accounting. This makes the runtime debug counters inline with genpd and other PM subsytems which use ktime-based accounting. Timekeeping is initialized before driver_init(). It's only at that time that PM-runtime can be enabled. Signed-off-by: Thara Gopinath [switch from ktime to raw nsec] Signed-off-by: Vincent Guittot Reviewed-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki --- drivers/base/power/runtime.c | 17 +++++++++-------- drivers/base/power/sysfs.c | 11 ++++++++--- include/linux/pm.h | 6 +++--- 3 files changed, 20 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index f23b7ecfce3b..eb1a3b878e1e 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -66,8 +66,8 @@ static int rpm_suspend(struct device *dev, int rpmflags); */ void update_pm_runtime_accounting(struct device *dev) { - unsigned long now = jiffies; - unsigned long delta; + u64 now = ktime_to_ns(ktime_get()); + u64 delta; delta = now - dev->power.accounting_timestamp; @@ -77,9 +77,9 @@ void update_pm_runtime_accounting(struct device *dev) return; if (dev->power.runtime_status == RPM_SUSPENDED) - dev->power.suspended_jiffies += delta; + dev->power.suspended_time += delta; else - dev->power.active_jiffies += delta; + dev->power.active_time += delta; } static void __update_runtime_status(struct device *dev, enum rpm_status status) @@ -90,16 +90,17 @@ static void __update_runtime_status(struct device *dev, enum rpm_status status) u64 pm_runtime_suspended_time(struct device *dev) { - unsigned long flags, time; + u64 time; + unsigned long flags; spin_lock_irqsave(&dev->power.lock, flags); update_pm_runtime_accounting(dev); - time = dev->power.suspended_jiffies; + time = dev->power.suspended_time; spin_unlock_irqrestore(&dev->power.lock, flags); - return jiffies_to_nsecs(time); + return time; } EXPORT_SYMBOL_GPL(pm_runtime_suspended_time); @@ -1314,7 +1315,7 @@ void pm_runtime_enable(struct device *dev) /* About to enable runtime pm, set accounting_timestamp to now */ if (!dev->power.disable_depth) - dev->power.accounting_timestamp = jiffies; + dev->power.accounting_timestamp = ktime_to_ns(ktime_get()); } else { dev_warn(dev, "Unbalanced %s!\n", __func__); } diff --git a/drivers/base/power/sysfs.c b/drivers/base/power/sysfs.c index d713738ce796..96c8a227610a 100644 --- a/drivers/base/power/sysfs.c +++ b/drivers/base/power/sysfs.c @@ -125,9 +125,12 @@ static ssize_t runtime_active_time_show(struct device *dev, struct device_attribute *attr, char *buf) { int ret; + u64 tmp; spin_lock_irq(&dev->power.lock); update_pm_runtime_accounting(dev); - ret = sprintf(buf, "%i\n", jiffies_to_msecs(dev->power.active_jiffies)); + tmp = dev->power.active_time; + do_div(tmp, NSEC_PER_MSEC); + ret = sprintf(buf, "%llu\n", tmp); spin_unlock_irq(&dev->power.lock); return ret; } @@ -138,10 +141,12 @@ static ssize_t runtime_suspended_time_show(struct device *dev, struct device_attribute *attr, char *buf) { int ret; + u64 tmp; spin_lock_irq(&dev->power.lock); update_pm_runtime_accounting(dev); - ret = sprintf(buf, "%i\n", - jiffies_to_msecs(dev->power.suspended_jiffies)); + tmp = dev->power.suspended_time; + do_div(tmp, NSEC_PER_MSEC); + ret = sprintf(buf, "%llu\n", tmp); spin_unlock_irq(&dev->power.lock); return ret; } diff --git a/include/linux/pm.h b/include/linux/pm.h index 0bd9de116826..3d2cbf947768 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -633,9 +633,9 @@ struct dev_pm_info { int runtime_error; int autosuspend_delay; u64 last_busy; - unsigned long active_jiffies; - unsigned long suspended_jiffies; - unsigned long accounting_timestamp; + u64 active_time; + u64 suspended_time; + u64 accounting_timestamp; #endif struct pm_subsys_data *subsys_data; /* Owned by the subsystem. */ void (*set_latency_tolerance)(struct device *, s32); -- cgit v1.2.3 From 8204e0c1113d6b7f599bcd7ebfbfde72e76c102f Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Tue, 22 Jan 2019 10:39:26 -0800 Subject: workqueue: Provide queue_work_node to queue work near a given NUMA node Provide a new function, queue_work_node, which is meant to schedule work on a "random" CPU of the requested NUMA node. The main motivation for this is to help assist asynchronous init to better improve boot times for devices that are local to a specific node. For now we just default to the first CPU that is in the intersection of the cpumask of the node and the online cpumask. The only exception is if the CPU is local to the node we will just use the current CPU. This should work for our purposes as we are currently only using this for unbound work so the CPU will be translated to a node anyway instead of being directly used. As we are only using the first CPU to represent the NUMA node for now I am limiting the scope of the function so that it can only be used with unbound workqueues. Acked-by: Tejun Heo Reviewed-by: Bart Van Assche Acked-by: Dan Williams Signed-off-by: Alexander Duyck Signed-off-by: Greg Kroah-Hartman --- include/linux/workqueue.h | 2 ++ kernel/workqueue.c | 84 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 86 insertions(+) (limited to 'include/linux') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 60d673e15632..1f50c1e586e7 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -463,6 +463,8 @@ int workqueue_set_unbound_cpumask(cpumask_var_t cpumask); extern bool queue_work_on(int cpu, struct workqueue_struct *wq, struct work_struct *work); +extern bool queue_work_node(int node, struct workqueue_struct *wq, + struct work_struct *work); extern bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq, struct delayed_work *work, unsigned long delay); extern bool mod_delayed_work_on(int cpu, struct workqueue_struct *wq, diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 392be4b252f6..d5a26e456f7a 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1492,6 +1492,90 @@ bool queue_work_on(int cpu, struct workqueue_struct *wq, } EXPORT_SYMBOL(queue_work_on); +/** + * workqueue_select_cpu_near - Select a CPU based on NUMA node + * @node: NUMA node ID that we want to select a CPU from + * + * This function will attempt to find a "random" cpu available on a given + * node. If there are no CPUs available on the given node it will return + * WORK_CPU_UNBOUND indicating that we should just schedule to any + * available CPU if we need to schedule this work. + */ +static int workqueue_select_cpu_near(int node) +{ + int cpu; + + /* No point in doing this if NUMA isn't enabled for workqueues */ + if (!wq_numa_enabled) + return WORK_CPU_UNBOUND; + + /* Delay binding to CPU if node is not valid or online */ + if (node < 0 || node >= MAX_NUMNODES || !node_online(node)) + return WORK_CPU_UNBOUND; + + /* Use local node/cpu if we are already there */ + cpu = raw_smp_processor_id(); + if (node == cpu_to_node(cpu)) + return cpu; + + /* Use "random" otherwise know as "first" online CPU of node */ + cpu = cpumask_any_and(cpumask_of_node(node), cpu_online_mask); + + /* If CPU is valid return that, otherwise just defer */ + return cpu < nr_cpu_ids ? cpu : WORK_CPU_UNBOUND; +} + +/** + * queue_work_node - queue work on a "random" cpu for a given NUMA node + * @node: NUMA node that we are targeting the work for + * @wq: workqueue to use + * @work: work to queue + * + * We queue the work to a "random" CPU within a given NUMA node. The basic + * idea here is to provide a way to somehow associate work with a given + * NUMA node. + * + * This function will only make a best effort attempt at getting this onto + * the right NUMA node. If no node is requested or the requested node is + * offline then we just fall back to standard queue_work behavior. + * + * Currently the "random" CPU ends up being the first available CPU in the + * intersection of cpu_online_mask and the cpumask of the node, unless we + * are running on the node. In that case we just use the current CPU. + * + * Return: %false if @work was already on a queue, %true otherwise. + */ +bool queue_work_node(int node, struct workqueue_struct *wq, + struct work_struct *work) +{ + unsigned long flags; + bool ret = false; + + /* + * This current implementation is specific to unbound workqueues. + * Specifically we only return the first available CPU for a given + * node instead of cycling through individual CPUs within the node. + * + * If this is used with a per-cpu workqueue then the logic in + * workqueue_select_cpu_near would need to be updated to allow for + * some round robin type logic. + */ + WARN_ON_ONCE(!(wq->flags & WQ_UNBOUND)); + + local_irq_save(flags); + + if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) { + int cpu = workqueue_select_cpu_near(node); + + __queue_work(cpu, wq, work); + ret = true; + } + + local_irq_restore(flags); + return ret; +} +EXPORT_SYMBOL_GPL(queue_work_node); + void delayed_work_timer_fn(struct timer_list *t) { struct delayed_work *dwork = from_timer(dwork, t, timer); -- cgit v1.2.3 From 6be9238e5cb64741ff95c3ae440b112753ad93de Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Tue, 22 Jan 2019 10:39:31 -0800 Subject: async: Add support for queueing on specific NUMA node Introduce four new variants of the async_schedule_ functions that allow scheduling on a specific NUMA node. The first two functions are async_schedule_near and async_schedule_near_domain end up mapping to async_schedule and async_schedule_domain, but provide NUMA node specific functionality. They replace the original functions which were moved to inline function definitions that call the new functions while passing NUMA_NO_NODE. The second two functions are async_schedule_dev and async_schedule_dev_domain which provide NUMA specific functionality when passing a device as the data member and that device has a NUMA node other than NUMA_NO_NODE. The main motivation behind this is to address the need to be able to schedule device specific init work on specific NUMA nodes in order to improve performance of memory initialization. I have seen a significant improvement in initialziation time for persistent memory as a result of this approach. In the case of 3TB of memory on a single node the initialization time in the worst case went from 36s down to about 26s for a 10s improvement. As such the data shows a general benefit for affinitizing the async work to the node local to the device. Reviewed-by: Bart Van Assche Reviewed-by: Dan Williams Signed-off-by: Alexander Duyck Signed-off-by: Greg Kroah-Hartman --- include/linux/async.h | 82 +++++++++++++++++++++++++++++++++++++++++++++++++-- kernel/async.c | 53 ++++++++++++++++++--------------- 2 files changed, 108 insertions(+), 27 deletions(-) (limited to 'include/linux') diff --git a/include/linux/async.h b/include/linux/async.h index 6b0226bdaadc..f81d6dbffe68 100644 --- a/include/linux/async.h +++ b/include/linux/async.h @@ -14,6 +14,8 @@ #include #include +#include +#include typedef u64 async_cookie_t; typedef void (*async_func_t) (void *data, async_cookie_t cookie); @@ -37,9 +39,83 @@ struct async_domain { struct async_domain _name = { .pending = LIST_HEAD_INIT(_name.pending), \ .registered = 0 } -extern async_cookie_t async_schedule(async_func_t func, void *data); -extern async_cookie_t async_schedule_domain(async_func_t func, void *data, - struct async_domain *domain); +async_cookie_t async_schedule_node(async_func_t func, void *data, + int node); +async_cookie_t async_schedule_node_domain(async_func_t func, void *data, + int node, + struct async_domain *domain); + +/** + * async_schedule - schedule a function for asynchronous execution + * @func: function to execute asynchronously + * @data: data pointer to pass to the function + * + * Returns an async_cookie_t that may be used for checkpointing later. + * Note: This function may be called from atomic or non-atomic contexts. + */ +static inline async_cookie_t async_schedule(async_func_t func, void *data) +{ + return async_schedule_node(func, data, NUMA_NO_NODE); +} + +/** + * async_schedule_domain - schedule a function for asynchronous execution within a certain domain + * @func: function to execute asynchronously + * @data: data pointer to pass to the function + * @domain: the domain + * + * Returns an async_cookie_t that may be used for checkpointing later. + * @domain may be used in the async_synchronize_*_domain() functions to + * wait within a certain synchronization domain rather than globally. + * Note: This function may be called from atomic or non-atomic contexts. + */ +static inline async_cookie_t +async_schedule_domain(async_func_t func, void *data, + struct async_domain *domain) +{ + return async_schedule_node_domain(func, data, NUMA_NO_NODE, domain); +} + +/** + * async_schedule_dev - A device specific version of async_schedule + * @func: function to execute asynchronously + * @dev: device argument to be passed to function + * + * Returns an async_cookie_t that may be used for checkpointing later. + * @dev is used as both the argument for the function and to provide NUMA + * context for where to run the function. By doing this we can try to + * provide for the best possible outcome by operating on the device on the + * CPUs closest to the device. + * Note: This function may be called from atomic or non-atomic contexts. + */ +static inline async_cookie_t +async_schedule_dev(async_func_t func, struct device *dev) +{ + return async_schedule_node(func, dev, dev_to_node(dev)); +} + +/** + * async_schedule_dev_domain - A device specific version of async_schedule_domain + * @func: function to execute asynchronously + * @dev: device argument to be passed to function + * @domain: the domain + * + * Returns an async_cookie_t that may be used for checkpointing later. + * @dev is used as both the argument for the function and to provide NUMA + * context for where to run the function. By doing this we can try to + * provide for the best possible outcome by operating on the device on the + * CPUs closest to the device. + * @domain may be used in the async_synchronize_*_domain() functions to + * wait within a certain synchronization domain rather than globally. + * Note: This function may be called from atomic or non-atomic contexts. + */ +static inline async_cookie_t +async_schedule_dev_domain(async_func_t func, struct device *dev, + struct async_domain *domain) +{ + return async_schedule_node_domain(func, dev, dev_to_node(dev), domain); +} + void async_unregister_domain(struct async_domain *domain); extern void async_synchronize_full(void); extern void async_synchronize_full_domain(struct async_domain *domain); diff --git a/kernel/async.c b/kernel/async.c index a893d6170944..f6bd0d9885e1 100644 --- a/kernel/async.c +++ b/kernel/async.c @@ -149,7 +149,25 @@ static void async_run_entry_fn(struct work_struct *work) wake_up(&async_done); } -static async_cookie_t __async_schedule(async_func_t func, void *data, struct async_domain *domain) +/** + * async_schedule_node_domain - NUMA specific version of async_schedule_domain + * @func: function to execute asynchronously + * @data: data pointer to pass to the function + * @node: NUMA node that we want to schedule this on or close to + * @domain: the domain + * + * Returns an async_cookie_t that may be used for checkpointing later. + * @domain may be used in the async_synchronize_*_domain() functions to + * wait within a certain synchronization domain rather than globally. + * + * Note: This function may be called from atomic or non-atomic contexts. + * + * The node requested will be honored on a best effort basis. If the node + * has no CPUs associated with it then the work is distributed among all + * available CPUs. + */ +async_cookie_t async_schedule_node_domain(async_func_t func, void *data, + int node, struct async_domain *domain) { struct async_entry *entry; unsigned long flags; @@ -195,43 +213,30 @@ static async_cookie_t __async_schedule(async_func_t func, void *data, struct asy current->flags |= PF_USED_ASYNC; /* schedule for execution */ - queue_work(system_unbound_wq, &entry->work); + queue_work_node(node, system_unbound_wq, &entry->work); return newcookie; } +EXPORT_SYMBOL_GPL(async_schedule_node_domain); /** - * async_schedule - schedule a function for asynchronous execution + * async_schedule_node - NUMA specific version of async_schedule * @func: function to execute asynchronously * @data: data pointer to pass to the function + * @node: NUMA node that we want to schedule this on or close to * * Returns an async_cookie_t that may be used for checkpointing later. * Note: This function may be called from atomic or non-atomic contexts. - */ -async_cookie_t async_schedule(async_func_t func, void *data) -{ - return __async_schedule(func, data, &async_dfl_domain); -} -EXPORT_SYMBOL_GPL(async_schedule); - -/** - * async_schedule_domain - schedule a function for asynchronous execution within a certain domain - * @func: function to execute asynchronously - * @data: data pointer to pass to the function - * @domain: the domain * - * Returns an async_cookie_t that may be used for checkpointing later. - * @domain may be used in the async_synchronize_*_domain() functions to - * wait within a certain synchronization domain rather than globally. A - * synchronization domain is specified via @domain. Note: This function - * may be called from atomic or non-atomic contexts. + * The node requested will be honored on a best effort basis. If the node + * has no CPUs associated with it then the work is distributed among all + * available CPUs. */ -async_cookie_t async_schedule_domain(async_func_t func, void *data, - struct async_domain *domain) +async_cookie_t async_schedule_node(async_func_t func, void *data, int node) { - return __async_schedule(func, data, domain); + return async_schedule_node_domain(func, data, node, &async_dfl_domain); } -EXPORT_SYMBOL_GPL(async_schedule_domain); +EXPORT_SYMBOL_GPL(async_schedule_node); /** * async_synchronize_full - synchronize all asynchronous function calls -- cgit v1.2.3 From 51bee5abeab2058ea5813c5615d6197a23dbf041 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 28 Jan 2019 17:00:13 +0100 Subject: cgroup/pids: turn cgroup_subsys->free() into cgroup_subsys->release() to fix the accounting The only user of cgroup_subsys->free() callback is pids_cgrp_subsys which needs pids_free() to uncharge the pid. However, ->free() is called from __put_task_struct()->cgroup_free() and this is too late. Even the trivial program which does for (;;) { int pid = fork(); assert(pid >= 0); if (pid) wait(NULL); else exit(0); } can run out of limits because release_task()->call_rcu(delayed_put_task_struct) implies an RCU gp after the task/pid goes away and before the final put(). Test-case: mkdir -p /tmp/CG mount -t cgroup2 none /tmp/CG echo '+pids' > /tmp/CG/cgroup.subtree_control mkdir /tmp/CG/PID echo 2 > /tmp/CG/PID/pids.max perl -e 'while ($p = fork) { wait; } $p // die "fork failed: $!\n"' & echo $! > /tmp/CG/PID/cgroup.procs Without this patch the forking process fails soon after migration. Rename cgroup_subsys->free() to cgroup_subsys->release() and move the callsite into the new helper, cgroup_release(), called by release_task() which actually frees the pid(s). Reported-by: Herton R. Krzesinski Reported-by: Jan Stancek Signed-off-by: Oleg Nesterov Signed-off-by: Tejun Heo --- include/linux/cgroup-defs.h | 2 +- include/linux/cgroup.h | 2 ++ kernel/cgroup/cgroup.c | 15 +++++++++------ kernel/cgroup/pids.c | 4 ++-- kernel/exit.c | 1 + 5 files changed, 15 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 8fcbae1b8db0..120d1d40704b 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -602,7 +602,7 @@ struct cgroup_subsys { void (*cancel_fork)(struct task_struct *task); void (*fork)(struct task_struct *task); void (*exit)(struct task_struct *task); - void (*free)(struct task_struct *task); + void (*release)(struct task_struct *task); void (*bind)(struct cgroup_subsys_state *root_css); bool early_init:1; diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 9968332cceed..81f58b4a5418 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -121,6 +121,7 @@ extern int cgroup_can_fork(struct task_struct *p); extern void cgroup_cancel_fork(struct task_struct *p); extern void cgroup_post_fork(struct task_struct *p); void cgroup_exit(struct task_struct *p); +void cgroup_release(struct task_struct *p); void cgroup_free(struct task_struct *p); int cgroup_init_early(void); @@ -697,6 +698,7 @@ static inline int cgroup_can_fork(struct task_struct *p) { return 0; } static inline void cgroup_cancel_fork(struct task_struct *p) {} static inline void cgroup_post_fork(struct task_struct *p) {} static inline void cgroup_exit(struct task_struct *p) {} +static inline void cgroup_release(struct task_struct *p) {} static inline void cgroup_free(struct task_struct *p) {} static inline int cgroup_init_early(void) { return 0; } diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index f31bd61c9466..f4418371c83b 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -197,7 +197,7 @@ static u64 css_serial_nr_next = 1; */ static u16 have_fork_callback __read_mostly; static u16 have_exit_callback __read_mostly; -static u16 have_free_callback __read_mostly; +static u16 have_release_callback __read_mostly; static u16 have_canfork_callback __read_mostly; /* cgroup namespace for init task */ @@ -5313,7 +5313,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss, bool early) have_fork_callback |= (bool)ss->fork << ss->id; have_exit_callback |= (bool)ss->exit << ss->id; - have_free_callback |= (bool)ss->free << ss->id; + have_release_callback |= (bool)ss->release << ss->id; have_canfork_callback |= (bool)ss->can_fork << ss->id; /* At system boot, before all subsystems have been @@ -5749,16 +5749,19 @@ void cgroup_exit(struct task_struct *tsk) } while_each_subsys_mask(); } -void cgroup_free(struct task_struct *task) +void cgroup_release(struct task_struct *task) { - struct css_set *cset = task_css_set(task); struct cgroup_subsys *ss; int ssid; - do_each_subsys_mask(ss, ssid, have_free_callback) { - ss->free(task); + do_each_subsys_mask(ss, ssid, have_release_callback) { + ss->release(task); } while_each_subsys_mask(); +} +void cgroup_free(struct task_struct *task) +{ + struct css_set *cset = task_css_set(task); put_css_set(cset); } diff --git a/kernel/cgroup/pids.c b/kernel/cgroup/pids.c index 9829c67ebc0a..c9960baaa14f 100644 --- a/kernel/cgroup/pids.c +++ b/kernel/cgroup/pids.c @@ -247,7 +247,7 @@ static void pids_cancel_fork(struct task_struct *task) pids_uncharge(pids, 1); } -static void pids_free(struct task_struct *task) +static void pids_release(struct task_struct *task) { struct pids_cgroup *pids = css_pids(task_css(task, pids_cgrp_id)); @@ -342,7 +342,7 @@ struct cgroup_subsys pids_cgrp_subsys = { .cancel_attach = pids_cancel_attach, .can_fork = pids_can_fork, .cancel_fork = pids_cancel_fork, - .free = pids_free, + .release = pids_release, .legacy_cftypes = pids_files, .dfl_cftypes = pids_files, .threaded = true, diff --git a/kernel/exit.c b/kernel/exit.c index 3fb7be001964..c2b8443f30b4 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -219,6 +219,7 @@ repeat: } write_unlock_irq(&tasklist_lock); + cgroup_release(p); release_thread(p); call_rcu(&p->rcu, delayed_put_task_struct); -- cgit v1.2.3 From 90462a5bd30c6ed91c6758e59537d047d7878ff9 Mon Sep 17 00:00:00 2001 From: Richard Guy Briggs Date: Thu, 31 Jan 2019 11:52:11 -0500 Subject: audit: remove unused actx param from audit_rule_match The audit_rule_match() struct audit_context *actx parameter is not used by any in-tree consumers (selinux, apparmour, integrity, smack). The audit context is an internal audit structure that should only be accessed by audit accessor functions. It was part of commit 03d37d25e0f9 ("LSM/Audit: Introduce generic Audit LSM hooks") but appears to have never been used. Remove it. Please see the github issue https://github.com/linux-audit/audit-kernel/issues/107 Signed-off-by: Richard Guy Briggs [PM: fixed the referenced commit title] Signed-off-by: Paul Moore --- include/linux/lsm_hooks.h | 4 +--- include/linux/security.h | 5 ++--- kernel/auditfilter.c | 2 +- kernel/auditsc.c | 21 ++++++++++++--------- security/apparmor/audit.c | 3 +-- security/apparmor/include/audit.h | 3 +-- security/integrity/ima/ima.h | 3 +-- security/integrity/ima/ima_policy.c | 6 ++---- security/security.c | 6 ++---- security/selinux/include/audit.h | 4 +--- security/selinux/ss/services.c | 3 +-- security/smack/smack_lsm.c | 4 +--- 12 files changed, 26 insertions(+), 38 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 9a0bdf91e646..d0b5c7a05832 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -1344,7 +1344,6 @@ * @field contains the field which relates to current LSM. * @op contains the operator that will be used for matching. * @rule points to the audit rule that will be checked against. - * @actx points to the audit context associated with the check. * Return 1 if secid matches the rule, 0 if it does not, -ERRNO on failure. * * @audit_rule_free: @@ -1764,8 +1763,7 @@ union security_list_options { int (*audit_rule_init)(u32 field, u32 op, char *rulestr, void **lsmrule); int (*audit_rule_known)(struct audit_krule *krule); - int (*audit_rule_match)(u32 secid, u32 field, u32 op, void *lsmrule, - struct audit_context *actx); + int (*audit_rule_match)(u32 secid, u32 field, u32 op, void *lsmrule); void (*audit_rule_free)(void *lsmrule); #endif /* CONFIG_AUDIT */ diff --git a/include/linux/security.h b/include/linux/security.h index dbfb5a66babb..e8febec62ffb 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1674,8 +1674,7 @@ static inline int security_key_getsecurity(struct key *key, char **_buffer) #ifdef CONFIG_SECURITY int security_audit_rule_init(u32 field, u32 op, char *rulestr, void **lsmrule); int security_audit_rule_known(struct audit_krule *krule); -int security_audit_rule_match(u32 secid, u32 field, u32 op, void *lsmrule, - struct audit_context *actx); +int security_audit_rule_match(u32 secid, u32 field, u32 op, void *lsmrule); void security_audit_rule_free(void *lsmrule); #else @@ -1692,7 +1691,7 @@ static inline int security_audit_rule_known(struct audit_krule *krule) } static inline int security_audit_rule_match(u32 secid, u32 field, u32 op, - void *lsmrule, struct audit_context *actx) + void *lsmrule) { return 0; } diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index 26a80a9d43a9..add360b46b38 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -1355,7 +1355,7 @@ int audit_filter(int msgtype, unsigned int listtype) if (f->lsm_rule) { security_task_getsecid(current, &sid); result = security_audit_rule_match(sid, - f->type, f->op, f->lsm_rule, NULL); + f->type, f->op, f->lsm_rule); } break; case AUDIT_EXE: diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 68da71001096..7d37cb1e4aef 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -631,9 +631,8 @@ static int audit_filter_rules(struct task_struct *tsk, need_sid = 0; } result = security_audit_rule_match(sid, f->type, - f->op, - f->lsm_rule, - ctx); + f->op, + f->lsm_rule); } break; case AUDIT_OBJ_USER: @@ -647,13 +646,17 @@ static int audit_filter_rules(struct task_struct *tsk, /* Find files that match */ if (name) { result = security_audit_rule_match( - name->osid, f->type, f->op, - f->lsm_rule, ctx); + name->osid, + f->type, + f->op, + f->lsm_rule); } else if (ctx) { list_for_each_entry(n, &ctx->names_list, list) { - if (security_audit_rule_match(n->osid, f->type, - f->op, f->lsm_rule, - ctx)) { + if (security_audit_rule_match( + n->osid, + f->type, + f->op, + f->lsm_rule)) { ++result; break; } @@ -664,7 +667,7 @@ static int audit_filter_rules(struct task_struct *tsk, break; if (security_audit_rule_match(ctx->ipc.osid, f->type, f->op, - f->lsm_rule, ctx)) + f->lsm_rule)) ++result; } break; diff --git a/security/apparmor/audit.c b/security/apparmor/audit.c index eeaddfe0c0fb..5a8b9cded4f2 100644 --- a/security/apparmor/audit.c +++ b/security/apparmor/audit.c @@ -225,8 +225,7 @@ int aa_audit_rule_known(struct audit_krule *rule) return 0; } -int aa_audit_rule_match(u32 sid, u32 field, u32 op, void *vrule, - struct audit_context *actx) +int aa_audit_rule_match(u32 sid, u32 field, u32 op, void *vrule) { struct aa_audit_rule *rule = vrule; struct aa_label *label; diff --git a/security/apparmor/include/audit.h b/security/apparmor/include/audit.h index b8c8b1066b0a..ee559bc2acb8 100644 --- a/security/apparmor/include/audit.h +++ b/security/apparmor/include/audit.h @@ -192,7 +192,6 @@ static inline int complain_error(int error) void aa_audit_rule_free(void *vrule); int aa_audit_rule_init(u32 field, u32 op, char *rulestr, void **vrule); int aa_audit_rule_known(struct audit_krule *rule); -int aa_audit_rule_match(u32 sid, u32 field, u32 op, void *vrule, - struct audit_context *actx); +int aa_audit_rule_match(u32 sid, u32 field, u32 op, void *vrule); #endif /* __AA_AUDIT_H */ diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h index cc12f3449a72..026163f37ba1 100644 --- a/security/integrity/ima/ima.h +++ b/security/integrity/ima/ima.h @@ -307,8 +307,7 @@ static inline int security_filter_rule_init(u32 field, u32 op, char *rulestr, } static inline int security_filter_rule_match(u32 secid, u32 field, u32 op, - void *lsmrule, - struct audit_context *actx) + void *lsmrule) { return -EINVAL; } diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c index 8bc8a1c8cb3f..26fa9d9723f6 100644 --- a/security/integrity/ima/ima_policy.c +++ b/security/integrity/ima/ima_policy.c @@ -340,8 +340,7 @@ retry: rc = security_filter_rule_match(osid, rule->lsm[i].type, Audit_equal, - rule->lsm[i].rule, - NULL); + rule->lsm[i].rule); break; case LSM_SUBJ_USER: case LSM_SUBJ_ROLE: @@ -349,8 +348,7 @@ retry: rc = security_filter_rule_match(secid, rule->lsm[i].type, Audit_equal, - rule->lsm[i].rule, - NULL); + rule->lsm[i].rule); default: break; } diff --git a/security/security.c b/security/security.c index f1b8d2587639..5f954b179a8e 100644 --- a/security/security.c +++ b/security/security.c @@ -1783,11 +1783,9 @@ void security_audit_rule_free(void *lsmrule) call_void_hook(audit_rule_free, lsmrule); } -int security_audit_rule_match(u32 secid, u32 field, u32 op, void *lsmrule, - struct audit_context *actx) +int security_audit_rule_match(u32 secid, u32 field, u32 op, void *lsmrule) { - return call_int_hook(audit_rule_match, 0, secid, field, op, lsmrule, - actx); + return call_int_hook(audit_rule_match, 0, secid, field, op, lsmrule); } #endif /* CONFIG_AUDIT */ diff --git a/security/selinux/include/audit.h b/security/selinux/include/audit.h index 1bdf973433cc..e51a81ffb8c9 100644 --- a/security/selinux/include/audit.h +++ b/security/selinux/include/audit.h @@ -46,13 +46,11 @@ void selinux_audit_rule_free(void *rule); * @field: the field this rule refers to * @op: the operater the rule uses * @rule: pointer to the audit rule to check against - * @actx: the audit context (can be NULL) associated with the check * * Returns 1 if the context id matches the rule, 0 if it does not, and * -errno on failure. */ -int selinux_audit_rule_match(u32 sid, u32 field, u32 op, void *rule, - struct audit_context *actx); +int selinux_audit_rule_match(u32 sid, u32 field, u32 op, void *rule); /** * selinux_audit_rule_known - check to see if rule contains selinux fields. diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index dd44126c8d14..0b7e33f6aa59 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -3376,8 +3376,7 @@ int selinux_audit_rule_known(struct audit_krule *rule) return 0; } -int selinux_audit_rule_match(u32 sid, u32 field, u32 op, void *vrule, - struct audit_context *actx) +int selinux_audit_rule_match(u32 sid, u32 field, u32 op, void *vrule) { struct selinux_state *state = &selinux_state; struct context *ctxt; diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index 430d4f35e55c..403513df42fc 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -4393,13 +4393,11 @@ static int smack_audit_rule_known(struct audit_krule *krule) * @field: audit rule flags given from user-space * @op: required testing operator * @vrule: smack internal rule presentation - * @actx: audit context associated with the check * * The core Audit hook. It's used to take the decision of * whether to audit or not to audit a given object. */ -static int smack_audit_rule_match(u32 secid, u32 field, u32 op, void *vrule, - struct audit_context *actx) +static int smack_audit_rule_match(u32 secid, u32 field, u32 op, void *vrule) { struct smack_known *skp; char *rule = vrule; -- cgit v1.2.3 From a0ce2f0aa6ad97c3d4927bf2ca54bcebdf062d55 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Wed, 23 Jan 2019 15:19:17 +0100 Subject: splice: don't merge into linked buffers Before this patch, it was possible for two pipes to affect each other after data had been transferred between them with tee(): ============ $ cat tee_test.c int main(void) { int pipe_a[2]; if (pipe(pipe_a)) err(1, "pipe"); int pipe_b[2]; if (pipe(pipe_b)) err(1, "pipe"); if (write(pipe_a[1], "abcd", 4) != 4) err(1, "write"); if (tee(pipe_a[0], pipe_b[1], 2, 0) != 2) err(1, "tee"); if (write(pipe_b[1], "xx", 2) != 2) err(1, "write"); char buf[5]; if (read(pipe_a[0], buf, 4) != 4) err(1, "read"); buf[4] = 0; printf("got back: '%s'\n", buf); } $ gcc -o tee_test tee_test.c $ ./tee_test got back: 'abxx' $ ============ As suggested by Al Viro, fix it by creating a separate type for non-mergeable pipe buffers, then changing the types of buffers in splice_pipe_to_pipe() and link_pipe(). Cc: Fixes: 7c77f0b3f920 ("splice: implement pipe to pipe splicing") Fixes: 70524490ee2e ("[PATCH] splice: add support for sys_tee()") Suggested-by: Al Viro Signed-off-by: Jann Horn Signed-off-by: Al Viro --- fs/pipe.c | 14 ++++++++++++++ fs/splice.c | 4 ++++ include/linux/pipe_fs_i.h | 1 + 3 files changed, 19 insertions(+) (limited to 'include/linux') diff --git a/fs/pipe.c b/fs/pipe.c index bdc5d3c0977d..c51750ed4011 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -234,6 +234,14 @@ static const struct pipe_buf_operations anon_pipe_buf_ops = { .get = generic_pipe_buf_get, }; +static const struct pipe_buf_operations anon_pipe_buf_nomerge_ops = { + .can_merge = 0, + .confirm = generic_pipe_buf_confirm, + .release = anon_pipe_buf_release, + .steal = anon_pipe_buf_steal, + .get = generic_pipe_buf_get, +}; + static const struct pipe_buf_operations packet_pipe_buf_ops = { .can_merge = 0, .confirm = generic_pipe_buf_confirm, @@ -242,6 +250,12 @@ static const struct pipe_buf_operations packet_pipe_buf_ops = { .get = generic_pipe_buf_get, }; +void pipe_buf_mark_unmergeable(struct pipe_buffer *buf) +{ + if (buf->ops == &anon_pipe_buf_ops) + buf->ops = &anon_pipe_buf_nomerge_ops; +} + static ssize_t pipe_read(struct kiocb *iocb, struct iov_iter *to) { diff --git a/fs/splice.c b/fs/splice.c index de2ede048473..90c29675d573 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -1597,6 +1597,8 @@ retry: */ obuf->flags &= ~PIPE_BUF_FLAG_GIFT; + pipe_buf_mark_unmergeable(obuf); + obuf->len = len; opipe->nrbufs++; ibuf->offset += obuf->len; @@ -1671,6 +1673,8 @@ static int link_pipe(struct pipe_inode_info *ipipe, */ obuf->flags &= ~PIPE_BUF_FLAG_GIFT; + pipe_buf_mark_unmergeable(obuf); + if (obuf->len > len) obuf->len = len; diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 5a3bb3b7c9ad..3ecd7ea212ae 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -182,6 +182,7 @@ void generic_pipe_buf_get(struct pipe_inode_info *, struct pipe_buffer *); int generic_pipe_buf_confirm(struct pipe_inode_info *, struct pipe_buffer *); int generic_pipe_buf_steal(struct pipe_inode_info *, struct pipe_buffer *); void generic_pipe_buf_release(struct pipe_inode_info *, struct pipe_buffer *); +void pipe_buf_mark_unmergeable(struct pipe_buffer *buf); extern const struct pipe_buf_operations nosteal_pipe_buf_ops; -- cgit v1.2.3 From 01e7187b41191376cee8bea8de9f907b001e87b4 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Wed, 23 Jan 2019 15:19:18 +0100 Subject: pipe: stop using ->can_merge Al Viro pointed out that since there is only one pipe buffer type to which new data can be appended, it isn't necessary to have a ->can_merge field in struct pipe_buf_operations, we can just check for a magic type. Suggested-by: Al Viro Signed-off-by: Jann Horn Signed-off-by: Al Viro --- fs/pipe.c | 20 ++++++++++++++++---- fs/splice.c | 4 ---- include/linux/pipe_fs_i.h | 7 ------- kernel/relay.c | 1 - kernel/trace/trace.c | 2 -- net/smc/smc_rx.c | 1 - 6 files changed, 16 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/fs/pipe.c b/fs/pipe.c index c51750ed4011..0ff09b490ddf 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -226,8 +226,8 @@ void generic_pipe_buf_release(struct pipe_inode_info *pipe, } EXPORT_SYMBOL(generic_pipe_buf_release); +/* New data written to a pipe may be appended to a buffer with this type. */ static const struct pipe_buf_operations anon_pipe_buf_ops = { - .can_merge = 1, .confirm = generic_pipe_buf_confirm, .release = anon_pipe_buf_release, .steal = anon_pipe_buf_steal, @@ -235,7 +235,6 @@ static const struct pipe_buf_operations anon_pipe_buf_ops = { }; static const struct pipe_buf_operations anon_pipe_buf_nomerge_ops = { - .can_merge = 0, .confirm = generic_pipe_buf_confirm, .release = anon_pipe_buf_release, .steal = anon_pipe_buf_steal, @@ -243,19 +242,32 @@ static const struct pipe_buf_operations anon_pipe_buf_nomerge_ops = { }; static const struct pipe_buf_operations packet_pipe_buf_ops = { - .can_merge = 0, .confirm = generic_pipe_buf_confirm, .release = anon_pipe_buf_release, .steal = anon_pipe_buf_steal, .get = generic_pipe_buf_get, }; +/** + * pipe_buf_mark_unmergeable - mark a &struct pipe_buffer as unmergeable + * @buf: the buffer to mark + * + * Description: + * This function ensures that no future writes will be merged into the + * given &struct pipe_buffer. This is necessary when multiple pipe buffers + * share the same backing page. + */ void pipe_buf_mark_unmergeable(struct pipe_buffer *buf) { if (buf->ops == &anon_pipe_buf_ops) buf->ops = &anon_pipe_buf_nomerge_ops; } +static bool pipe_buf_can_merge(struct pipe_buffer *buf) +{ + return buf->ops == &anon_pipe_buf_ops; +} + static ssize_t pipe_read(struct kiocb *iocb, struct iov_iter *to) { @@ -393,7 +405,7 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) struct pipe_buffer *buf = pipe->bufs + lastbuf; int offset = buf->offset + buf->len; - if (buf->ops->can_merge && offset + chars <= PAGE_SIZE) { + if (pipe_buf_can_merge(buf) && offset + chars <= PAGE_SIZE) { ret = pipe_buf_confirm(pipe, buf); if (ret) goto out; diff --git a/fs/splice.c b/fs/splice.c index 90c29675d573..fc71e9733f7a 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -138,7 +138,6 @@ error: } const struct pipe_buf_operations page_cache_pipe_buf_ops = { - .can_merge = 0, .confirm = page_cache_pipe_buf_confirm, .release = page_cache_pipe_buf_release, .steal = page_cache_pipe_buf_steal, @@ -156,7 +155,6 @@ static int user_page_pipe_buf_steal(struct pipe_inode_info *pipe, } static const struct pipe_buf_operations user_page_pipe_buf_ops = { - .can_merge = 0, .confirm = generic_pipe_buf_confirm, .release = page_cache_pipe_buf_release, .steal = user_page_pipe_buf_steal, @@ -326,7 +324,6 @@ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos, EXPORT_SYMBOL(generic_file_splice_read); const struct pipe_buf_operations default_pipe_buf_ops = { - .can_merge = 0, .confirm = generic_pipe_buf_confirm, .release = generic_pipe_buf_release, .steal = generic_pipe_buf_steal, @@ -341,7 +338,6 @@ static int generic_pipe_buf_nosteal(struct pipe_inode_info *pipe, /* Pipe buffer operations for a socket and similar. */ const struct pipe_buf_operations nosteal_pipe_buf_ops = { - .can_merge = 0, .confirm = generic_pipe_buf_confirm, .release = generic_pipe_buf_release, .steal = generic_pipe_buf_nosteal, diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 3ecd7ea212ae..787d224ff43e 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -73,13 +73,6 @@ struct pipe_inode_info { * in fs/pipe.c for the pipe and generic variants of these hooks. */ struct pipe_buf_operations { - /* - * This is set to 1, if the generic pipe read/write may coalesce - * data into an existing buffer. If this is set to 0, a new pipe - * page segment is always used for new data. - */ - int can_merge; - /* * ->confirm() verifies that the data in the pipe buffer is there * and that the contents are good. If the pages in the pipe belong diff --git a/kernel/relay.c b/kernel/relay.c index 04f248644e06..db3e419c25a6 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -1175,7 +1175,6 @@ static void relay_pipe_buf_release(struct pipe_inode_info *pipe, } static const struct pipe_buf_operations relay_pipe_buf_ops = { - .can_merge = 0, .confirm = generic_pipe_buf_confirm, .release = relay_pipe_buf_release, .steal = generic_pipe_buf_steal, diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index c521b7347482..f380139e972c 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -5823,7 +5823,6 @@ static void tracing_spd_release_pipe(struct splice_pipe_desc *spd, } static const struct pipe_buf_operations tracing_pipe_buf_ops = { - .can_merge = 0, .confirm = generic_pipe_buf_confirm, .release = generic_pipe_buf_release, .steal = generic_pipe_buf_steal, @@ -6843,7 +6842,6 @@ static void buffer_pipe_buf_get(struct pipe_inode_info *pipe, /* Pipe buffer operations for a buffer. */ static const struct pipe_buf_operations buffer_pipe_buf_ops = { - .can_merge = 0, .confirm = generic_pipe_buf_confirm, .release = buffer_pipe_buf_release, .steal = generic_pipe_buf_steal, diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c index bbcf0fe4ae10..413a6abf227e 100644 --- a/net/smc/smc_rx.c +++ b/net/smc/smc_rx.c @@ -136,7 +136,6 @@ static int smc_rx_pipe_buf_nosteal(struct pipe_inode_info *pipe, } static const struct pipe_buf_operations smc_pipe_ops = { - .can_merge = 0, .confirm = generic_pipe_buf_confirm, .release = smc_rx_pipe_buf_release, .steal = smc_rx_pipe_buf_nosteal, -- cgit v1.2.3 From 4bc59c2f7e306775f3d2e1bbafaa854dd1e09335 Mon Sep 17 00:00:00 2001 From: Enric Balletbo i Serra Date: Wed, 12 Dec 2018 18:33:56 +0100 Subject: mfd / platform: cros_ec: Use devm_mfd_add_devices Use devm_mfd_add_devices() for adding cros-ec core MFD child devices. This reduces the need of remove callback from platform/chrome for removing the MFD child devices. Signed-off-by: Enric Balletbo i Serra Reviewed-by: Guenter Roeck Signed-off-by: Lee Jones --- drivers/mfd/cros_ec.c | 14 +++----------- drivers/platform/chrome/cros_ec_i2c.c | 10 ---------- drivers/platform/chrome/cros_ec_lpc.c | 4 ---- drivers/platform/chrome/cros_ec_spi.c | 11 ----------- include/linux/mfd/cros_ec.h | 10 ---------- 5 files changed, 3 insertions(+), 46 deletions(-) (limited to 'include/linux') diff --git a/drivers/mfd/cros_ec.c b/drivers/mfd/cros_ec.c index fe6f83766144..6acfe036d522 100644 --- a/drivers/mfd/cros_ec.c +++ b/drivers/mfd/cros_ec.c @@ -129,8 +129,8 @@ int cros_ec_register(struct cros_ec_device *ec_dev) } } - err = mfd_add_devices(ec_dev->dev, PLATFORM_DEVID_AUTO, &ec_cell, 1, - NULL, ec_dev->irq, NULL); + err = devm_mfd_add_devices(ec_dev->dev, PLATFORM_DEVID_AUTO, &ec_cell, + 1, NULL, ec_dev->irq, NULL); if (err) { dev_err(dev, "Failed to register Embedded Controller subdevice %d\n", @@ -147,7 +147,7 @@ int cros_ec_register(struct cros_ec_device *ec_dev) * - the EC is responsive at init time (it is not true for a * sensor hub. */ - err = mfd_add_devices(ec_dev->dev, PLATFORM_DEVID_AUTO, + err = devm_mfd_add_devices(ec_dev->dev, PLATFORM_DEVID_AUTO, &ec_pd_cell, 1, NULL, ec_dev->irq, NULL); if (err) { dev_err(dev, @@ -181,14 +181,6 @@ int cros_ec_register(struct cros_ec_device *ec_dev) } EXPORT_SYMBOL(cros_ec_register); -int cros_ec_remove(struct cros_ec_device *ec_dev) -{ - mfd_remove_devices(ec_dev->dev); - - return 0; -} -EXPORT_SYMBOL(cros_ec_remove); - #ifdef CONFIG_PM_SLEEP int cros_ec_suspend(struct cros_ec_device *ec_dev) { diff --git a/drivers/platform/chrome/cros_ec_i2c.c b/drivers/platform/chrome/cros_ec_i2c.c index ef9b4763356f..9a009eaa4ada 100644 --- a/drivers/platform/chrome/cros_ec_i2c.c +++ b/drivers/platform/chrome/cros_ec_i2c.c @@ -317,15 +317,6 @@ static int cros_ec_i2c_probe(struct i2c_client *client, return 0; } -static int cros_ec_i2c_remove(struct i2c_client *client) -{ - struct cros_ec_device *ec_dev = i2c_get_clientdata(client); - - cros_ec_remove(ec_dev); - - return 0; -} - #ifdef CONFIG_PM_SLEEP static int cros_ec_i2c_suspend(struct device *dev) { @@ -376,7 +367,6 @@ static struct i2c_driver cros_ec_driver = { .pm = &cros_ec_i2c_pm_ops, }, .probe = cros_ec_i2c_probe, - .remove = cros_ec_i2c_remove, .id_table = cros_ec_i2c_id, }; diff --git a/drivers/platform/chrome/cros_ec_lpc.c b/drivers/platform/chrome/cros_ec_lpc.c index e1b75775cd4a..14684a56e40f 100644 --- a/drivers/platform/chrome/cros_ec_lpc.c +++ b/drivers/platform/chrome/cros_ec_lpc.c @@ -327,7 +327,6 @@ static int cros_ec_lpc_probe(struct platform_device *pdev) static int cros_ec_lpc_remove(struct platform_device *pdev) { - struct cros_ec_device *ec_dev; struct acpi_device *adev; adev = ACPI_COMPANION(&pdev->dev); @@ -335,9 +334,6 @@ static int cros_ec_lpc_remove(struct platform_device *pdev) acpi_remove_notify_handler(adev->handle, ACPI_ALL_NOTIFY, cros_ec_lpc_acpi_notify); - ec_dev = platform_get_drvdata(pdev); - cros_ec_remove(ec_dev); - return 0; } diff --git a/drivers/platform/chrome/cros_ec_spi.c b/drivers/platform/chrome/cros_ec_spi.c index 2060d1483043..6cfbc2835beb 100644 --- a/drivers/platform/chrome/cros_ec_spi.c +++ b/drivers/platform/chrome/cros_ec_spi.c @@ -685,16 +685,6 @@ static int cros_ec_spi_probe(struct spi_device *spi) return 0; } -static int cros_ec_spi_remove(struct spi_device *spi) -{ - struct cros_ec_device *ec_dev; - - ec_dev = spi_get_drvdata(spi); - cros_ec_remove(ec_dev); - - return 0; -} - #ifdef CONFIG_PM_SLEEP static int cros_ec_spi_suspend(struct device *dev) { @@ -733,7 +723,6 @@ static struct spi_driver cros_ec_driver_spi = { .pm = &cros_ec_spi_pm_ops, }, .probe = cros_ec_spi_probe, - .remove = cros_ec_spi_remove, .id_table = cros_ec_spi_id, }; diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h index de8b588c8776..977ebaa78e99 100644 --- a/include/linux/mfd/cros_ec.h +++ b/include/linux/mfd/cros_ec.h @@ -281,16 +281,6 @@ int cros_ec_cmd_xfer(struct cros_ec_device *ec_dev, int cros_ec_cmd_xfer_status(struct cros_ec_device *ec_dev, struct cros_ec_command *msg); -/** - * cros_ec_remove() - Remove a ChromeOS EC. - * @ec_dev: Device to register. - * - * Call this to deregister a ChromeOS EC, then clean up any private data. - * - * Return: 0 on success or negative error code. - */ -int cros_ec_remove(struct cros_ec_device *ec_dev); - /** * cros_ec_register() - Register a new ChromeOS EC, using the provided info. * @ec_dev: Device to register. -- cgit v1.2.3 From ecf8a6cd949ef236ce435ae488ceb6b3354e677e Mon Sep 17 00:00:00 2001 From: Enric Balletbo i Serra Date: Wed, 12 Dec 2018 18:33:57 +0100 Subject: mfd / platform: cros_ec: Move lightbar attributes to its own driver The entire way how cros sysfs attibutes are created is broken. cros_ec_lightbar should be its own driver and its attributes should be associated with a lightbar driver not the mfd driver. In order to retain the path, the lightbar attributes are attached to the cros_class. The patch also adds the sysfs documentation. Signed-off-by: Enric Balletbo i Serra Reviewed-by: Guenter Roeck Signed-off-by: Lee Jones --- .../sysfs-class-chromeos-driver-cros-ec-lightbar | 74 +++++++++++++++++ drivers/mfd/cros_ec_dev.c | 24 +++--- drivers/mfd/cros_ec_dev.h | 6 -- drivers/platform/chrome/Kconfig | 11 +++ drivers/platform/chrome/Makefile | 3 +- drivers/platform/chrome/cros_ec_lightbar.c | 95 +++++++++++++++++----- include/linux/mfd/cros_ec.h | 1 - 7 files changed, 173 insertions(+), 41 deletions(-) create mode 100644 Documentation/ABI/testing/sysfs-class-chromeos-driver-cros-ec-lightbar (limited to 'include/linux') diff --git a/Documentation/ABI/testing/sysfs-class-chromeos-driver-cros-ec-lightbar b/Documentation/ABI/testing/sysfs-class-chromeos-driver-cros-ec-lightbar new file mode 100644 index 000000000000..57a037791403 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-class-chromeos-driver-cros-ec-lightbar @@ -0,0 +1,74 @@ +What: /sys/class/chromeos//lightbar/brightness +Date: August 2015 +KernelVersion: 4.2 +Description: + Writing to this file adjusts the overall brightness of + the lightbar, separate from any color intensity. The + valid range is 0 (off) to 255 (maximum brightness). + +What: /sys/class/chromeos//lightbar/interval_msec +Date: August 2015 +KernelVersion: 4.2 +Description: + The lightbar is controlled by an embedded controller (EC), + which also manages the keyboard, battery charging, fans, + and other system hardware. To prevent unprivileged users + from interfering with the other EC functions, the rate at + which the lightbar control files can be read or written is + limited. + + Reading this file will return the number of milliseconds + that must elapse between accessing any of the lightbar + functions through this interface. Going faster will simply + block until the necessary interval has lapsed. The interval + applies uniformly to all accesses of any kind by any user. + +What: /sys/class/chromeos//lightbar/led_rgb +Date: August 2015 +KernelVersion: 4.2 +Description: + This allows you to control each LED segment. If the + lightbar is already running one of the automatic + sequences, you probably won’t see anything change because + your color setting will be almost immediately replaced. + To get useful results, you should stop the lightbar + sequence first. + + The values written to this file are sets of four integers, + indicating LED, RED, GREEN, BLUE. The LED number is 0 to 3 + to select a single segment, or 4 to set all four segments + to the same value at once. The RED, GREEN, and BLUE + numbers should be in the range 0 (off) to 255 (maximum). + You can update more than one segment at a time by writing + more than one set of four integers. + +What: /sys/class/chromeos//lightbar/program +Date: August 2015 +KernelVersion: 4.2 +Description: + This allows you to upload and run custom lightbar sequences. + +What: /sys/class/chromeos//lightbar/sequence +Date: August 2015 +KernelVersion: 4.2 +Description: + The Pixel lightbar has a number of built-in sequences + that it displays under various conditions, such as at + power on, shut down, or while running. Reading from this + file displays the current sequence that the lightbar is + displaying. Writing to this file allows you to change the + sequence. + +What: /sys/class/chromeos//lightbar/userspace_control +Date: August 2015 +KernelVersion: 4.2 +Description: + This allows you to take the control of the lightbar. This + prevents the kernel from going through its normal + sequences. + +What: /sys/class/chromeos//lightbar/version +Date: August 2015 +KernelVersion: 4.2 +Description: + Show the information about the lightbar version. diff --git a/drivers/mfd/cros_ec_dev.c b/drivers/mfd/cros_ec_dev.c index 2d0fee488c5a..b227718e0ec2 100644 --- a/drivers/mfd/cros_ec_dev.c +++ b/drivers/mfd/cros_ec_dev.c @@ -36,7 +36,6 @@ static int ec_major; static const struct attribute_group *cros_ec_groups[] = { &cros_ec_attr_group, - &cros_ec_lightbar_attr_group, &cros_ec_vbc_attr_group, NULL, }; @@ -395,6 +394,10 @@ static const struct mfd_cell cros_usbpd_charger_cells[] = { { .name = "cros-usbpd-charger" } }; +static const struct mfd_cell cros_ec_platform_cells[] = { + { .name = "cros-ec-lightbar" }, +}; + static int ec_device_probe(struct platform_device *pdev) { int retval = -ENOMEM; @@ -470,9 +473,6 @@ static int ec_device_probe(struct platform_device *pdev) retval); } - /* Take control of the lightbar from the EC. */ - lb_manual_suspend_ctrl(ec, 1); - /* We can now add the sysfs class, we know which parameter to show */ retval = cdev_device_add(&ec->cdev, &ec->class_dev); if (retval) { @@ -480,6 +480,15 @@ static int ec_device_probe(struct platform_device *pdev) goto failed; } + retval = mfd_add_devices(ec->dev, PLATFORM_DEVID_AUTO, + cros_ec_platform_cells, + ARRAY_SIZE(cros_ec_platform_cells), + NULL, 0, NULL); + if (retval) + dev_warn(ec->dev, + "failed to add cros-ec platform devices: %d\n", + retval); + if (cros_ec_debugfs_init(ec)) dev_warn(dev, "failed to create debugfs directory\n"); @@ -494,9 +503,6 @@ static int ec_device_remove(struct platform_device *pdev) { struct cros_ec_dev *ec = dev_get_drvdata(&pdev->dev); - /* Let the EC take over the lightbar again. */ - lb_manual_suspend_ctrl(ec, 0); - cros_ec_debugfs_remove(ec); mfd_remove_devices(ec->dev); @@ -525,8 +531,6 @@ static __maybe_unused int ec_device_suspend(struct device *dev) cros_ec_debugfs_suspend(ec); - lb_suspend(ec); - return 0; } @@ -536,8 +540,6 @@ static __maybe_unused int ec_device_resume(struct device *dev) cros_ec_debugfs_resume(ec); - lb_resume(ec); - return 0; } diff --git a/drivers/mfd/cros_ec_dev.h b/drivers/mfd/cros_ec_dev.h index 978d836a0248..ec750433455a 100644 --- a/drivers/mfd/cros_ec_dev.h +++ b/drivers/mfd/cros_ec_dev.h @@ -44,10 +44,4 @@ struct cros_ec_readmem { #define CROS_EC_DEV_IOCXCMD _IOWR(CROS_EC_DEV_IOC, 0, struct cros_ec_command) #define CROS_EC_DEV_IOCRDMEM _IOWR(CROS_EC_DEV_IOC, 1, struct cros_ec_readmem) -/* Lightbar utilities */ -extern bool ec_has_lightbar(struct cros_ec_dev *ec); -extern int lb_manual_suspend_ctrl(struct cros_ec_dev *ec, uint8_t enable); -extern int lb_suspend(struct cros_ec_dev *ec); -extern int lb_resume(struct cros_ec_dev *ec); - #endif /* _CROS_EC_DEV_H_ */ diff --git a/drivers/platform/chrome/Kconfig b/drivers/platform/chrome/Kconfig index 16b1615958aa..6c05752a3334 100644 --- a/drivers/platform/chrome/Kconfig +++ b/drivers/platform/chrome/Kconfig @@ -111,4 +111,15 @@ config CROS_KBD_LED_BACKLIGHT To compile this driver as a module, choose M here: the module will be called cros_kbd_led_backlight. +config CROS_EC_LIGHTBAR + tristate "Chromebook Pixel's lightbar support" + depends on MFD_CROS_EC_CHARDEV + default MFD_CROS_EC_CHARDEV + help + This option exposes the Chromebook Pixel's lightbar to + userspace. + + To compile this driver as a module, choose M here: the + module will be called cros_ec_lightbar. + endif # CHROMEOS_PLATFORMS diff --git a/drivers/platform/chrome/Makefile b/drivers/platform/chrome/Makefile index cd591bf872bb..3c29a4b405d5 100644 --- a/drivers/platform/chrome/Makefile +++ b/drivers/platform/chrome/Makefile @@ -3,7 +3,7 @@ obj-$(CONFIG_CHROMEOS_LAPTOP) += chromeos_laptop.o obj-$(CONFIG_CHROMEOS_PSTORE) += chromeos_pstore.o obj-$(CONFIG_CHROMEOS_TBMC) += chromeos_tbmc.o -cros_ec_ctl-objs := cros_ec_sysfs.o cros_ec_lightbar.o \ +cros_ec_ctl-objs := cros_ec_sysfs.o \ cros_ec_vbc.o cros_ec_debugfs.o obj-$(CONFIG_CROS_EC_CTL) += cros_ec_ctl.o obj-$(CONFIG_CROS_EC_I2C) += cros_ec_i2c.o @@ -13,3 +13,4 @@ cros_ec_lpcs-$(CONFIG_CROS_EC_LPC_MEC) += cros_ec_lpc_mec.o obj-$(CONFIG_CROS_EC_LPC) += cros_ec_lpcs.o obj-$(CONFIG_CROS_EC_PROTO) += cros_ec_proto.o obj-$(CONFIG_CROS_KBD_LED_BACKLIGHT) += cros_kbd_led_backlight.o +obj-$(CONFIG_CROS_EC_LIGHTBAR) += cros_ec_lightbar.o diff --git a/drivers/platform/chrome/cros_ec_lightbar.c b/drivers/platform/chrome/cros_ec_lightbar.c index 68193bb53383..80eed6317570 100644 --- a/drivers/platform/chrome/cros_ec_lightbar.c +++ b/drivers/platform/chrome/cros_ec_lightbar.c @@ -33,6 +33,8 @@ #include #include +#define DRV_NAME "cros-ec-lightbar" + /* Rate-limit the lightbar interface to prevent DoS. */ static unsigned long lb_interval_jiffies = 50 * HZ / 1000; @@ -373,7 +375,7 @@ error: return ret; } -int lb_manual_suspend_ctrl(struct cros_ec_dev *ec, uint8_t enable) +static int lb_manual_suspend_ctrl(struct cros_ec_dev *ec, uint8_t enable) { struct ec_params_lightbar *param; struct cros_ec_command *msg; @@ -408,25 +410,6 @@ error: return ret; } -EXPORT_SYMBOL(lb_manual_suspend_ctrl); - -int lb_suspend(struct cros_ec_dev *ec) -{ - if (userspace_control || ec != ec_with_lightbar) - return 0; - - return lb_send_empty_cmd(ec, LIGHTBAR_CMD_SUSPEND); -} -EXPORT_SYMBOL(lb_suspend); - -int lb_resume(struct cros_ec_dev *ec) -{ - if (userspace_control || ec != ec_with_lightbar) - return 0; - - return lb_send_empty_cmd(ec, LIGHTBAR_CMD_RESUME); -} -EXPORT_SYMBOL(lb_resume); static ssize_t sequence_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) @@ -584,7 +567,7 @@ static struct attribute *__lb_cmds_attrs[] = { NULL, }; -bool ec_has_lightbar(struct cros_ec_dev *ec) +static bool ec_has_lightbar(struct cros_ec_dev *ec) { return !!get_lightbar_version(ec, NULL, NULL); } @@ -616,4 +599,72 @@ struct attribute_group cros_ec_lightbar_attr_group = { .attrs = __lb_cmds_attrs, .is_visible = cros_ec_lightbar_attrs_are_visible, }; -EXPORT_SYMBOL(cros_ec_lightbar_attr_group); + +static int cros_ec_lightbar_probe(struct platform_device *pd) +{ + struct cros_ec_dev *ec_dev = dev_get_drvdata(pd->dev.parent); + struct device *dev = &pd->dev; + int ret; + + /* Take control of the lightbar from the EC. */ + lb_manual_suspend_ctrl(ec_dev, 1); + + ret = sysfs_create_group(&ec_dev->class_dev.kobj, + &cros_ec_lightbar_attr_group); + if (ret < 0) + dev_err(dev, "failed to create %s attributes. err=%d\n", + cros_ec_lightbar_attr_group.name, ret); + + return ret; +} + +static int cros_ec_lightbar_remove(struct platform_device *pd) +{ + struct cros_ec_dev *ec_dev = dev_get_drvdata(pd->dev.parent); + + sysfs_remove_group(&ec_dev->class_dev.kobj, + &cros_ec_lightbar_attr_group); + + /* Let the EC take over the lightbar again. */ + lb_manual_suspend_ctrl(ec_dev, 0); + + return 0; +} + +static int __maybe_unused cros_ec_lightbar_resume(struct device *dev) +{ + struct cros_ec_dev *ec_dev = dev_get_drvdata(dev); + + if (userspace_control || ec_dev != ec_with_lightbar) + return 0; + + return lb_send_empty_cmd(ec_dev, LIGHTBAR_CMD_RESUME); +} + +static int __maybe_unused cros_ec_lightbar_suspend(struct device *dev) +{ + struct cros_ec_dev *ec_dev = dev_get_drvdata(dev); + + if (userspace_control || ec_dev != ec_with_lightbar) + return 0; + + return lb_send_empty_cmd(ec_dev, LIGHTBAR_CMD_SUSPEND); +} + +static SIMPLE_DEV_PM_OPS(cros_ec_lightbar_pm_ops, + cros_ec_lightbar_suspend, cros_ec_lightbar_resume); + +static struct platform_driver cros_ec_lightbar_driver = { + .driver = { + .name = DRV_NAME, + .pm = &cros_ec_lightbar_pm_ops, + }, + .probe = cros_ec_lightbar_probe, + .remove = cros_ec_lightbar_remove, +}; + +module_platform_driver(cros_ec_lightbar_driver); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Expose the Chromebook Pixel's lightbar to userspace"); +MODULE_ALIAS("platform:" DRV_NAME); diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h index 977ebaa78e99..1e9b569564ea 100644 --- a/include/linux/mfd/cros_ec.h +++ b/include/linux/mfd/cros_ec.h @@ -327,7 +327,6 @@ u32 cros_ec_get_host_event(struct cros_ec_device *ec_dev); /* sysfs stuff */ extern struct attribute_group cros_ec_attr_group; -extern struct attribute_group cros_ec_lightbar_attr_group; extern struct attribute_group cros_ec_vbc_attr_group; /* debugfs stuff */ -- cgit v1.2.3 From acb9900f9e8074858738f48bee9a705138961258 Mon Sep 17 00:00:00 2001 From: Enric Balletbo i Serra Date: Wed, 12 Dec 2018 18:33:58 +0100 Subject: mfd / platform: cros_ec: Move vbc attributes to its own driver The entire way how cros sysfs attibutes are created is broken. cros_ec_vbc should be its own driver and its attributes should be associated with a vbc driver not the mfd driver. In order to retain the path, the vbc attributes are attached to the cros_class. The patch also adds the sysfs documentation. Signed-off-by: Enric Balletbo i Serra Reviewed-by: Guenter Roeck Signed-off-by: Lee Jones --- .../sysfs-class-chromeos-driver-cros-ec-vbc | 6 +++ drivers/mfd/cros_ec_dev.c | 2 +- drivers/platform/chrome/Kconfig | 11 ++++++ drivers/platform/chrome/Makefile | 3 +- drivers/platform/chrome/cros_ec_vbc.c | 43 +++++++++++++++++++++- include/linux/mfd/cros_ec.h | 1 - 6 files changed, 62 insertions(+), 4 deletions(-) create mode 100644 Documentation/ABI/testing/sysfs-class-chromeos-driver-cros-ec-vbc (limited to 'include/linux') diff --git a/Documentation/ABI/testing/sysfs-class-chromeos-driver-cros-ec-vbc b/Documentation/ABI/testing/sysfs-class-chromeos-driver-cros-ec-vbc new file mode 100644 index 000000000000..38c5aaaaa89a --- /dev/null +++ b/Documentation/ABI/testing/sysfs-class-chromeos-driver-cros-ec-vbc @@ -0,0 +1,6 @@ +What: /sys/class/chromeos//vbc/vboot_context +Date: October 2015 +KernelVersion: 4.4 +Description: + Read/write the verified boot context data included on a + small nvram space on some EC implementations. diff --git a/drivers/mfd/cros_ec_dev.c b/drivers/mfd/cros_ec_dev.c index b227718e0ec2..40c98808fa1c 100644 --- a/drivers/mfd/cros_ec_dev.c +++ b/drivers/mfd/cros_ec_dev.c @@ -36,7 +36,6 @@ static int ec_major; static const struct attribute_group *cros_ec_groups[] = { &cros_ec_attr_group, - &cros_ec_vbc_attr_group, NULL, }; @@ -396,6 +395,7 @@ static const struct mfd_cell cros_usbpd_charger_cells[] = { static const struct mfd_cell cros_ec_platform_cells[] = { { .name = "cros-ec-lightbar" }, + { .name = "cros-ec-vbc" }, }; static int ec_device_probe(struct platform_device *pdev) diff --git a/drivers/platform/chrome/Kconfig b/drivers/platform/chrome/Kconfig index 6c05752a3334..1e9dc9626e84 100644 --- a/drivers/platform/chrome/Kconfig +++ b/drivers/platform/chrome/Kconfig @@ -122,4 +122,15 @@ config CROS_EC_LIGHTBAR To compile this driver as a module, choose M here: the module will be called cros_ec_lightbar. +config CROS_EC_VBC + tristate "ChromeOS EC vboot context support" + depends on MFD_CROS_EC_CHARDEV && OF + default MFD_CROS_EC_CHARDEV + help + This option exposes the ChromeOS EC vboot context nvram to + userspace. + + To compile this driver as a module, choose M here: the + module will be called cros_ec_vbc. + endif # CHROMEOS_PLATFORMS diff --git a/drivers/platform/chrome/Makefile b/drivers/platform/chrome/Makefile index 3c29a4b405d5..4081b7179df7 100644 --- a/drivers/platform/chrome/Makefile +++ b/drivers/platform/chrome/Makefile @@ -4,7 +4,7 @@ obj-$(CONFIG_CHROMEOS_LAPTOP) += chromeos_laptop.o obj-$(CONFIG_CHROMEOS_PSTORE) += chromeos_pstore.o obj-$(CONFIG_CHROMEOS_TBMC) += chromeos_tbmc.o cros_ec_ctl-objs := cros_ec_sysfs.o \ - cros_ec_vbc.o cros_ec_debugfs.o + cros_ec_debugfs.o obj-$(CONFIG_CROS_EC_CTL) += cros_ec_ctl.o obj-$(CONFIG_CROS_EC_I2C) += cros_ec_i2c.o obj-$(CONFIG_CROS_EC_SPI) += cros_ec_spi.o @@ -14,3 +14,4 @@ obj-$(CONFIG_CROS_EC_LPC) += cros_ec_lpcs.o obj-$(CONFIG_CROS_EC_PROTO) += cros_ec_proto.o obj-$(CONFIG_CROS_KBD_LED_BACKLIGHT) += cros_kbd_led_backlight.o obj-$(CONFIG_CROS_EC_LIGHTBAR) += cros_ec_lightbar.o +obj-$(CONFIG_CROS_EC_VBC) += cros_ec_vbc.o diff --git a/drivers/platform/chrome/cros_ec_vbc.c b/drivers/platform/chrome/cros_ec_vbc.c index 5356f26bc022..da3bbf05e86f 100644 --- a/drivers/platform/chrome/cros_ec_vbc.c +++ b/drivers/platform/chrome/cros_ec_vbc.c @@ -22,8 +22,11 @@ #include #include #include +#include #include +#define DRV_NAME "cros-ec-vbc" + static ssize_t vboot_context_read(struct file *filp, struct kobject *kobj, struct bin_attribute *att, char *buf, loff_t pos, size_t count) @@ -132,4 +135,42 @@ struct attribute_group cros_ec_vbc_attr_group = { .bin_attrs = cros_ec_vbc_bin_attrs, .is_bin_visible = cros_ec_vbc_is_visible, }; -EXPORT_SYMBOL(cros_ec_vbc_attr_group); + +static int cros_ec_vbc_probe(struct platform_device *pd) +{ + struct cros_ec_dev *ec_dev = dev_get_drvdata(pd->dev.parent); + struct device *dev = &pd->dev; + int ret; + + ret = sysfs_create_group(&ec_dev->class_dev.kobj, + &cros_ec_vbc_attr_group); + if (ret < 0) + dev_err(dev, "failed to create %s attributes. err=%d\n", + cros_ec_vbc_attr_group.name, ret); + + return ret; +} + +static int cros_ec_vbc_remove(struct platform_device *pd) +{ + struct cros_ec_dev *ec_dev = dev_get_drvdata(pd->dev.parent); + + sysfs_remove_group(&ec_dev->class_dev.kobj, + &cros_ec_vbc_attr_group); + + return 0; +} + +static struct platform_driver cros_ec_vbc_driver = { + .driver = { + .name = DRV_NAME, + }, + .probe = cros_ec_vbc_probe, + .remove = cros_ec_vbc_remove, +}; + +module_platform_driver(cros_ec_vbc_driver); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Expose the vboot context nvram to userspace"); +MODULE_ALIAS("platform:" DRV_NAME); diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h index 1e9b569564ea..fdc3152cca1d 100644 --- a/include/linux/mfd/cros_ec.h +++ b/include/linux/mfd/cros_ec.h @@ -327,7 +327,6 @@ u32 cros_ec_get_host_event(struct cros_ec_device *ec_dev); /* sysfs stuff */ extern struct attribute_group cros_ec_attr_group; -extern struct attribute_group cros_ec_vbc_attr_group; /* debugfs stuff */ int cros_ec_debugfs_init(struct cros_ec_dev *ec); -- cgit v1.2.3 From 6fce0a2cf5a050e8a3326556d7d293e69be303be Mon Sep 17 00:00:00 2001 From: Enric Balletbo i Serra Date: Wed, 12 Dec 2018 18:33:59 +0100 Subject: mfd / platform: cros_ec: Move debugfs attributes to its own driver The entire way how cros debugfs attibutes are created is broken. cros_ec_debugfs should be its own driver and its attributes should be associated with a debugfs driver not the mfd driver. Signed-off-by: Enric Balletbo i Serra Reviewed-by: Guenter Roeck Signed-off-by: Lee Jones --- drivers/mfd/cros_ec_dev.c | 41 +------------------- drivers/platform/chrome/Kconfig | 11 ++++++ drivers/platform/chrome/Makefile | 4 +- drivers/platform/chrome/cros_ec_debugfs.c | 62 +++++++++++++++++++++---------- include/linux/mfd/cros_ec.h | 6 --- 5 files changed, 56 insertions(+), 68 deletions(-) (limited to 'include/linux') diff --git a/drivers/mfd/cros_ec_dev.c b/drivers/mfd/cros_ec_dev.c index 40c98808fa1c..9955937b821d 100644 --- a/drivers/mfd/cros_ec_dev.c +++ b/drivers/mfd/cros_ec_dev.c @@ -394,6 +394,7 @@ static const struct mfd_cell cros_usbpd_charger_cells[] = { }; static const struct mfd_cell cros_ec_platform_cells[] = { + { .name = "cros-ec-debugfs" }, { .name = "cros-ec-lightbar" }, { .name = "cros-ec-vbc" }, }; @@ -489,9 +490,6 @@ static int ec_device_probe(struct platform_device *pdev) "failed to add cros-ec platform devices: %d\n", retval); - if (cros_ec_debugfs_init(ec)) - dev_warn(dev, "failed to create debugfs directory\n"); - return 0; failed: @@ -503,62 +501,25 @@ static int ec_device_remove(struct platform_device *pdev) { struct cros_ec_dev *ec = dev_get_drvdata(&pdev->dev); - cros_ec_debugfs_remove(ec); - mfd_remove_devices(ec->dev); cdev_del(&ec->cdev); device_unregister(&ec->class_dev); return 0; } -static void ec_device_shutdown(struct platform_device *pdev) -{ - struct cros_ec_dev *ec = dev_get_drvdata(&pdev->dev); - - /* Be sure to clear up debugfs delayed works */ - cros_ec_debugfs_remove(ec); -} - static const struct platform_device_id cros_ec_id[] = { { DRV_NAME, 0 }, { /* sentinel */ } }; MODULE_DEVICE_TABLE(platform, cros_ec_id); -static __maybe_unused int ec_device_suspend(struct device *dev) -{ - struct cros_ec_dev *ec = dev_get_drvdata(dev); - - cros_ec_debugfs_suspend(ec); - - return 0; -} - -static __maybe_unused int ec_device_resume(struct device *dev) -{ - struct cros_ec_dev *ec = dev_get_drvdata(dev); - - cros_ec_debugfs_resume(ec); - - return 0; -} - -static const struct dev_pm_ops cros_ec_dev_pm_ops = { -#ifdef CONFIG_PM_SLEEP - .suspend = ec_device_suspend, - .resume = ec_device_resume, -#endif -}; - static struct platform_driver cros_ec_dev_driver = { .driver = { .name = DRV_NAME, - .pm = &cros_ec_dev_pm_ops, }, .id_table = cros_ec_id, .probe = ec_device_probe, .remove = ec_device_remove, - .shutdown = ec_device_shutdown, }; static int __init cros_ec_dev_init(void) diff --git a/drivers/platform/chrome/Kconfig b/drivers/platform/chrome/Kconfig index 1e9dc9626e84..6cbf5b69d156 100644 --- a/drivers/platform/chrome/Kconfig +++ b/drivers/platform/chrome/Kconfig @@ -133,4 +133,15 @@ config CROS_EC_VBC To compile this driver as a module, choose M here: the module will be called cros_ec_vbc. +config CROS_EC_DEBUGFS + tristate "Export ChromeOS EC internals in DebugFS" + depends on MFD_CROS_EC_CHARDEV && DEBUG_FS + default MFD_CROS_EC_CHARDEV + help + This option exposes the ChromeOS EC device internals to + userspace. + + To compile this driver as a module, choose M here: the + module will be called cros_ec_debugfs. + endif # CHROMEOS_PLATFORMS diff --git a/drivers/platform/chrome/Makefile b/drivers/platform/chrome/Makefile index 4081b7179df7..12a5c4d18c17 100644 --- a/drivers/platform/chrome/Makefile +++ b/drivers/platform/chrome/Makefile @@ -3,8 +3,7 @@ obj-$(CONFIG_CHROMEOS_LAPTOP) += chromeos_laptop.o obj-$(CONFIG_CHROMEOS_PSTORE) += chromeos_pstore.o obj-$(CONFIG_CHROMEOS_TBMC) += chromeos_tbmc.o -cros_ec_ctl-objs := cros_ec_sysfs.o \ - cros_ec_debugfs.o +cros_ec_ctl-objs := cros_ec_sysfs.o obj-$(CONFIG_CROS_EC_CTL) += cros_ec_ctl.o obj-$(CONFIG_CROS_EC_I2C) += cros_ec_i2c.o obj-$(CONFIG_CROS_EC_SPI) += cros_ec_spi.o @@ -15,3 +14,4 @@ obj-$(CONFIG_CROS_EC_PROTO) += cros_ec_proto.o obj-$(CONFIG_CROS_KBD_LED_BACKLIGHT) += cros_kbd_led_backlight.o obj-$(CONFIG_CROS_EC_LIGHTBAR) += cros_ec_lightbar.o obj-$(CONFIG_CROS_EC_VBC) += cros_ec_vbc.o +obj-$(CONFIG_CROS_EC_DEBUGFS) += cros_ec_debugfs.o diff --git a/drivers/platform/chrome/cros_ec_debugfs.c b/drivers/platform/chrome/cros_ec_debugfs.c index c62ee8e610a0..6cacac53dfce 100644 --- a/drivers/platform/chrome/cros_ec_debugfs.c +++ b/drivers/platform/chrome/cros_ec_debugfs.c @@ -23,12 +23,16 @@ #include #include #include +#include #include +#include #include #include #include #include +#define DRV_NAME "cros-ec-debugfs" + #define LOG_SHIFT 14 #define LOG_SIZE (1 << LOG_SHIFT) #define LOG_POLL_SEC 10 @@ -423,8 +427,9 @@ static int cros_ec_create_pdinfo(struct cros_ec_debugfs *debug_info) return 0; } -int cros_ec_debugfs_init(struct cros_ec_dev *ec) +static int cros_ec_debugfs_probe(struct platform_device *pd) { + struct cros_ec_dev *ec = dev_get_drvdata(pd->dev.parent); struct cros_ec_platform *ec_platform = dev_get_platdata(ec->dev); const char *name = ec_platform->ec_name; struct cros_ec_debugfs *debug_info; @@ -453,40 +458,57 @@ int cros_ec_debugfs_init(struct cros_ec_dev *ec) ec->debug_info = debug_info; + dev_set_drvdata(&pd->dev, ec); + return 0; remove_debugfs: debugfs_remove_recursive(debug_info->dir); return ret; } -EXPORT_SYMBOL(cros_ec_debugfs_init); -void cros_ec_debugfs_remove(struct cros_ec_dev *ec) +static int cros_ec_debugfs_remove(struct platform_device *pd) { - if (!ec->debug_info) - return; + struct cros_ec_dev *ec = dev_get_drvdata(pd->dev.parent); debugfs_remove_recursive(ec->debug_info->dir); cros_ec_cleanup_console_log(ec->debug_info); + + return 0; } -EXPORT_SYMBOL(cros_ec_debugfs_remove); -void cros_ec_debugfs_suspend(struct cros_ec_dev *ec) +static int __maybe_unused cros_ec_debugfs_suspend(struct device *dev) { - /* - * cros_ec_debugfs_init() failures are non-fatal; it's also possible - * that we initted things but decided that console log wasn't supported. - * We'll use the same set of checks that cros_ec_debugfs_remove() + - * cros_ec_cleanup_console_log() end up using to handle those cases. - */ - if (ec->debug_info && ec->debug_info->log_buffer.buf) - cancel_delayed_work_sync(&ec->debug_info->log_poll_work); + struct cros_ec_dev *ec = dev_get_drvdata(dev); + + cancel_delayed_work_sync(&ec->debug_info->log_poll_work); + + return 0; } -EXPORT_SYMBOL(cros_ec_debugfs_suspend); -void cros_ec_debugfs_resume(struct cros_ec_dev *ec) +static int __maybe_unused cros_ec_debugfs_resume(struct device *dev) { - if (ec->debug_info && ec->debug_info->log_buffer.buf) - schedule_delayed_work(&ec->debug_info->log_poll_work, 0); + struct cros_ec_dev *ec = dev_get_drvdata(dev); + + schedule_delayed_work(&ec->debug_info->log_poll_work, 0); + + return 0; } -EXPORT_SYMBOL(cros_ec_debugfs_resume); + +static SIMPLE_DEV_PM_OPS(cros_ec_debugfs_pm_ops, + cros_ec_debugfs_suspend, cros_ec_debugfs_resume); + +static struct platform_driver cros_ec_debugfs_driver = { + .driver = { + .name = DRV_NAME, + .pm = &cros_ec_debugfs_pm_ops, + }, + .probe = cros_ec_debugfs_probe, + .remove = cros_ec_debugfs_remove, +}; + +module_platform_driver(cros_ec_debugfs_driver); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Debug logs for ChromeOS EC"); +MODULE_ALIAS("platform:" DRV_NAME); diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h index fdc3152cca1d..e50860d190db 100644 --- a/include/linux/mfd/cros_ec.h +++ b/include/linux/mfd/cros_ec.h @@ -328,10 +328,4 @@ u32 cros_ec_get_host_event(struct cros_ec_device *ec_dev); /* sysfs stuff */ extern struct attribute_group cros_ec_attr_group; -/* debugfs stuff */ -int cros_ec_debugfs_init(struct cros_ec_dev *ec); -void cros_ec_debugfs_remove(struct cros_ec_dev *ec); -void cros_ec_debugfs_suspend(struct cros_ec_dev *ec); -void cros_ec_debugfs_resume(struct cros_ec_dev *ec); - #endif /* __LINUX_MFD_CROS_EC_H */ -- cgit v1.2.3 From 6fd7f2bbd4422e7635bc771cd1ec440378158cb1 Mon Sep 17 00:00:00 2001 From: Enric Balletbo i Serra Date: Wed, 12 Dec 2018 18:34:00 +0100 Subject: mfd / platform: cros_ec: Move device sysfs attributes to its own driver The entire way how cros debugfs attibutes are created is broken. cros_ec_sysfs should be its own driver and its attributes should be associated with the sysfs driver not the mfd driver. The patch also adds the sysfs documentation. Signed-off-by: Enric Balletbo i Serra Reviewed-by: Guenter Roeck Signed-off-by: Lee Jones --- Documentation/ABI/testing/sysfs-class-chromeos | 32 +++++++++++++++++++++++ drivers/mfd/Kconfig | 1 - drivers/mfd/cros_ec_dev.c | 7 +---- drivers/platform/chrome/Kconfig | 14 +++++++--- drivers/platform/chrome/Makefile | 3 +-- drivers/platform/chrome/cros_ec_lightbar.c | 2 +- drivers/platform/chrome/cros_ec_sysfs.c | 36 +++++++++++++++++++++++++- include/linux/mfd/cros_ec.h | 3 --- 8 files changed, 81 insertions(+), 17 deletions(-) create mode 100644 Documentation/ABI/testing/sysfs-class-chromeos (limited to 'include/linux') diff --git a/Documentation/ABI/testing/sysfs-class-chromeos b/Documentation/ABI/testing/sysfs-class-chromeos new file mode 100644 index 000000000000..5819699d66ec --- /dev/null +++ b/Documentation/ABI/testing/sysfs-class-chromeos @@ -0,0 +1,32 @@ +What: /sys/class/chromeos//flashinfo +Date: August 2015 +KernelVersion: 4.2 +Description: + Show the EC flash information. + +What: /sys/class/chromeos//kb_wake_angle +Date: March 2018 +KernelVersion: 4.17 +Description: + Control the keyboard wake lid angle. Values are between + 0 and 360. This file will also show the keyboard wake lid + angle by querying the hardware. + +What: /sys/class/chromeos//reboot +Date: August 2015 +KernelVersion: 4.2 +Description: + Tell the EC to reboot in various ways. Options are: + "cancel": Cancel a pending reboot. + "ro": Jump to RO without rebooting. + "rw": Jump to RW without rebooting. + "cold": Cold reboot. + "disable-jump": Disable jump until next reboot. + "hibernate": Hibernate the EC. + "at-shutdown": Reboot after an AP shutdown. + +What: /sys/class/chromeos//version +Date: August 2015 +KernelVersion: 4.2 +Description: + Show the information about the EC software and hardware. diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig index f461460a2aeb..2acc105e43cc 100644 --- a/drivers/mfd/Kconfig +++ b/drivers/mfd/Kconfig @@ -215,7 +215,6 @@ config MFD_CROS_EC config MFD_CROS_EC_CHARDEV tristate "Chrome OS Embedded Controller userspace device interface" depends on MFD_CROS_EC - select CROS_EC_CTL ---help--- This driver adds support to talk with the ChromeOS EC from userspace. diff --git a/drivers/mfd/cros_ec_dev.c b/drivers/mfd/cros_ec_dev.c index 9955937b821d..b9ec2a798dbb 100644 --- a/drivers/mfd/cros_ec_dev.c +++ b/drivers/mfd/cros_ec_dev.c @@ -34,15 +34,9 @@ #define CROS_MAX_DEV 128 static int ec_major; -static const struct attribute_group *cros_ec_groups[] = { - &cros_ec_attr_group, - NULL, -}; - static struct class cros_class = { .owner = THIS_MODULE, .name = "chromeos", - .dev_groups = cros_ec_groups, }; /* Basic communication */ @@ -396,6 +390,7 @@ static const struct mfd_cell cros_usbpd_charger_cells[] = { static const struct mfd_cell cros_ec_platform_cells[] = { { .name = "cros-ec-debugfs" }, { .name = "cros-ec-lightbar" }, + { .name = "cros-ec-sysfs" }, { .name = "cros-ec-vbc" }, }; diff --git a/drivers/platform/chrome/Kconfig b/drivers/platform/chrome/Kconfig index 6cbf5b69d156..5e2fde5ff63d 100644 --- a/drivers/platform/chrome/Kconfig +++ b/drivers/platform/chrome/Kconfig @@ -49,9 +49,6 @@ config CHROMEOS_TBMC To compile this driver as a module, choose M here: the module will be called chromeos_tbmc. -config CROS_EC_CTL - tristate - config CROS_EC_I2C tristate "ChromeOS Embedded Controller (I2C)" depends on MFD_CROS_EC && I2C @@ -144,4 +141,15 @@ config CROS_EC_DEBUGFS To compile this driver as a module, choose M here: the module will be called cros_ec_debugfs. +config CROS_EC_SYSFS + tristate "ChromeOS EC control and information through sysfs" + depends on MFD_CROS_EC_CHARDEV && SYSFS + default MFD_CROS_EC_CHARDEV + help + This option exposes some sysfs attributes to control and get + information from ChromeOS EC. + + To compile this driver as a module, choose M here: the + module will be called cros_ec_sysfs. + endif # CHROMEOS_PLATFORMS diff --git a/drivers/platform/chrome/Makefile b/drivers/platform/chrome/Makefile index 12a5c4d18c17..fdbee501931b 100644 --- a/drivers/platform/chrome/Makefile +++ b/drivers/platform/chrome/Makefile @@ -3,8 +3,6 @@ obj-$(CONFIG_CHROMEOS_LAPTOP) += chromeos_laptop.o obj-$(CONFIG_CHROMEOS_PSTORE) += chromeos_pstore.o obj-$(CONFIG_CHROMEOS_TBMC) += chromeos_tbmc.o -cros_ec_ctl-objs := cros_ec_sysfs.o -obj-$(CONFIG_CROS_EC_CTL) += cros_ec_ctl.o obj-$(CONFIG_CROS_EC_I2C) += cros_ec_i2c.o obj-$(CONFIG_CROS_EC_SPI) += cros_ec_spi.o cros_ec_lpcs-objs := cros_ec_lpc.o cros_ec_lpc_reg.o @@ -15,3 +13,4 @@ obj-$(CONFIG_CROS_KBD_LED_BACKLIGHT) += cros_kbd_led_backlight.o obj-$(CONFIG_CROS_EC_LIGHTBAR) += cros_ec_lightbar.o obj-$(CONFIG_CROS_EC_VBC) += cros_ec_vbc.o obj-$(CONFIG_CROS_EC_DEBUGFS) += cros_ec_debugfs.o +obj-$(CONFIG_CROS_EC_SYSFS) += cros_ec_sysfs.o diff --git a/drivers/platform/chrome/cros_ec_lightbar.c b/drivers/platform/chrome/cros_ec_lightbar.c index 80eed6317570..c22318ba93aa 100644 --- a/drivers/platform/chrome/cros_ec_lightbar.c +++ b/drivers/platform/chrome/cros_ec_lightbar.c @@ -594,7 +594,7 @@ static umode_t cros_ec_lightbar_attrs_are_visible(struct kobject *kobj, return 0; } -struct attribute_group cros_ec_lightbar_attr_group = { +static struct attribute_group cros_ec_lightbar_attr_group = { .name = "lightbar", .attrs = __lb_cmds_attrs, .is_visible = cros_ec_lightbar_attrs_are_visible, diff --git a/drivers/platform/chrome/cros_ec_sysfs.c b/drivers/platform/chrome/cros_ec_sysfs.c index f34a50121064..0ff5aa30c070 100644 --- a/drivers/platform/chrome/cros_ec_sysfs.c +++ b/drivers/platform/chrome/cros_ec_sysfs.c @@ -34,6 +34,8 @@ #include #include +#define DRV_NAME "cros-ec-sysfs" + /* Accessor functions */ static ssize_t reboot_show(struct device *dev, @@ -353,7 +355,39 @@ struct attribute_group cros_ec_attr_group = { .attrs = __ec_attrs, .is_visible = cros_ec_ctrl_visible, }; -EXPORT_SYMBOL(cros_ec_attr_group); + +static int cros_ec_sysfs_probe(struct platform_device *pd) +{ + struct cros_ec_dev *ec_dev = dev_get_drvdata(pd->dev.parent); + struct device *dev = &pd->dev; + int ret; + + ret = sysfs_create_group(&ec_dev->class_dev.kobj, &cros_ec_attr_group); + if (ret < 0) + dev_err(dev, "failed to create attributes. err=%d\n", ret); + + return ret; +} + +static int cros_ec_sysfs_remove(struct platform_device *pd) +{ + struct cros_ec_dev *ec_dev = dev_get_drvdata(pd->dev.parent); + + sysfs_remove_group(&ec_dev->class_dev.kobj, &cros_ec_attr_group); + + return 0; +} + +static struct platform_driver cros_ec_sysfs_driver = { + .driver = { + .name = DRV_NAME, + }, + .probe = cros_ec_sysfs_probe, + .remove = cros_ec_sysfs_remove, +}; + +module_platform_driver(cros_ec_sysfs_driver); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("ChromeOS EC control driver"); +MODULE_ALIAS("platform:" DRV_NAME); diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h index e50860d190db..8f2a8918bfa3 100644 --- a/include/linux/mfd/cros_ec.h +++ b/include/linux/mfd/cros_ec.h @@ -325,7 +325,4 @@ int cros_ec_get_next_event(struct cros_ec_device *ec_dev, bool *wake_event); */ u32 cros_ec_get_host_event(struct cros_ec_device *ec_dev); -/* sysfs stuff */ -extern struct attribute_group cros_ec_attr_group; - #endif /* __LINUX_MFD_CROS_EC_H */ -- cgit v1.2.3 From efb5a790dfc33b36bc64dd5a41ffc3ae5a709770 Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Sun, 13 Jan 2019 13:36:46 -0500 Subject: mfd: wm831x-core: Drop unused module infrastructure from non-modular code The Kconfig currently controlling compilation of this code is: drivers/mfd/Kconfig:config MFD_WM831X drivers/mfd/Kconfig: bool ...meaning that it currently is not being built as a module by anyone. Lets remove the couple traces of modular infrastructure use, so that when reading the driver there is no doubt it is builtin-only. We delete the MODULE_LICENSE tag etc. since all that information is already contained at the top of the file in the comments. Previous demodularizaion work has made wm831x_device_exit() no longer used, so it is also removed from the 831x core code. Signed-off-by: Paul Gortmaker Acked-by: Charles Keepax Signed-off-by: Lee Jones --- drivers/mfd/wm831x-core.c | 15 ++------------- include/linux/mfd/wm831x/core.h | 1 - 2 files changed, 2 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/drivers/mfd/wm831x-core.c b/drivers/mfd/wm831x-core.c index e70d35ef5c6d..25fbbaf39cb9 100644 --- a/drivers/mfd/wm831x-core.c +++ b/drivers/mfd/wm831x-core.c @@ -13,7 +13,8 @@ */ #include -#include +#include +#include #include #include #include @@ -1892,14 +1893,6 @@ err: return ret; } -void wm831x_device_exit(struct wm831x *wm831x) -{ - wm831x_otp_exit(wm831x); - mfd_remove_devices(wm831x->dev); - free_irq(wm831x_irq(wm831x, WM831X_IRQ_AUXADC_DATA), wm831x); - wm831x_irq_exit(wm831x); -} - int wm831x_device_suspend(struct wm831x *wm831x) { int reg, mask; @@ -1944,7 +1937,3 @@ void wm831x_device_shutdown(struct wm831x *wm831x) } } EXPORT_SYMBOL_GPL(wm831x_device_shutdown); - -MODULE_DESCRIPTION("Core support for the WM831X AudioPlus PMIC"); -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Mark Brown"); diff --git a/include/linux/mfd/wm831x/core.h b/include/linux/mfd/wm831x/core.h index b49fa67612f1..6fcb8eb00282 100644 --- a/include/linux/mfd/wm831x/core.h +++ b/include/linux/mfd/wm831x/core.h @@ -418,7 +418,6 @@ int wm831x_bulk_read(struct wm831x *wm831x, unsigned short reg, int count, u16 *buf); int wm831x_device_init(struct wm831x *wm831x, int irq); -void wm831x_device_exit(struct wm831x *wm831x); int wm831x_device_suspend(struct wm831x *wm831x); void wm831x_device_shutdown(struct wm831x *wm831x); int wm831x_irq_init(struct wm831x *wm831x, int irq); -- cgit v1.2.3 From 0db88688e1bb0180d6348742bdba8927cd0e5670 Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Sun, 13 Jan 2019 13:36:48 -0500 Subject: mfd: wm8350-core: Drop unused module infrastructure from non-modular code The Kconfig currently controlling compilation of this code is: drivers/mfd/Kconfig:config MFD_WM8350 drivers/mfd/Kconfig: bool ...meaning that it currently is not being built as a module by anyone. Lets remove the couple traces of modular infrastructure use, so that when reading the driver there is no doubt it is builtin-only. We delete the MODULE_LICENSE tag etc. since all that information is already contained at the top of the file in the comments. We replace module.h with init.h and export.h ; the latter since the file does export some symbols. Previous demodularizaion work has made wm8350_device_exit() no longer used, so it is also removed from the 8350 core code. Signed-off-by: Paul Gortmaker Acked-by: Charles Keepax Signed-off-by: Lee Jones --- drivers/mfd/wm8350-core.c | 30 ++---------------------------- include/linux/mfd/wm8350/core.h | 1 - 2 files changed, 2 insertions(+), 29 deletions(-) (limited to 'include/linux') diff --git a/drivers/mfd/wm8350-core.c b/drivers/mfd/wm8350-core.c index 8a07c5634aee..9e1070f26b11 100644 --- a/drivers/mfd/wm8350-core.c +++ b/drivers/mfd/wm8350-core.c @@ -13,7 +13,8 @@ */ #include -#include +#include +#include #include #include #include @@ -442,30 +443,3 @@ err: return ret; } EXPORT_SYMBOL_GPL(wm8350_device_init); - -void wm8350_device_exit(struct wm8350 *wm8350) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(wm8350->pmic.led); i++) - platform_device_unregister(wm8350->pmic.led[i].pdev); - - for (i = 0; i < ARRAY_SIZE(wm8350->pmic.pdev); i++) - platform_device_unregister(wm8350->pmic.pdev[i]); - - platform_device_unregister(wm8350->wdt.pdev); - platform_device_unregister(wm8350->rtc.pdev); - platform_device_unregister(wm8350->power.pdev); - platform_device_unregister(wm8350->hwmon.pdev); - platform_device_unregister(wm8350->gpio.pdev); - platform_device_unregister(wm8350->codec.pdev); - - if (wm8350->irq_base) - free_irq(wm8350->irq_base + WM8350_IRQ_AUXADC_DATARDY, wm8350); - - wm8350_irq_exit(wm8350); -} -EXPORT_SYMBOL_GPL(wm8350_device_exit); - -MODULE_DESCRIPTION("WM8350 AudioPlus PMIC core driver"); -MODULE_LICENSE("GPL"); diff --git a/include/linux/mfd/wm8350/core.h b/include/linux/mfd/wm8350/core.h index 509481d9cf19..202d9bde2c7c 100644 --- a/include/linux/mfd/wm8350/core.h +++ b/include/linux/mfd/wm8350/core.h @@ -643,7 +643,6 @@ struct wm8350_platform_data { */ int wm8350_device_init(struct wm8350 *wm8350, int irq, struct wm8350_platform_data *pdata); -void wm8350_device_exit(struct wm8350 *wm8350); /* * WM8350 device IO -- cgit v1.2.3 From d57f72875eed3f26afaca176c0f425f209bc99d7 Mon Sep 17 00:00:00 2001 From: Christian Hohnstaedt Date: Mon, 14 Jan 2019 09:16:34 +0100 Subject: mfd: tps65218.c: Add input voltage options These options apply to all regulators in this chip. ti,strict-supply-voltage-supervision: Set STRICT flag in CONFIG1 ti,under-voltage-limit-microvolt: Select 2.75, 2.95, 3.25 or 3.35 V UVLO in CONFIG1 ti,under-voltage-hyst-microvolt: Select 200mV or 400mV UVLOHYS in CONFIG2 Signed-off-by: Christian Hohnstaedt Tested-by: Keerthy Reviewed-by: Keerthy Signed-off-by: Lee Jones --- drivers/mfd/tps65218.c | 89 ++++++++++++++++++++++++++++++++++++++++++++ include/linux/mfd/tps65218.h | 4 ++ 2 files changed, 93 insertions(+) (limited to 'include/linux') diff --git a/drivers/mfd/tps65218.c b/drivers/mfd/tps65218.c index 8bcdecf494d0..a62ea4cb8be7 100644 --- a/drivers/mfd/tps65218.c +++ b/drivers/mfd/tps65218.c @@ -211,6 +211,83 @@ static const struct of_device_id of_tps65218_match_table[] = { }; MODULE_DEVICE_TABLE(of, of_tps65218_match_table); +static int tps65218_voltage_set_strict(struct tps65218 *tps) +{ + u32 strict; + + if (of_property_read_u32(tps->dev->of_node, + "ti,strict-supply-voltage-supervision", + &strict)) + return 0; + + if (strict != 0 && strict != 1) { + dev_err(tps->dev, + "Invalid ti,strict-supply-voltage-supervision value\n"); + return -EINVAL; + } + + tps65218_update_bits(tps, TPS65218_REG_CONFIG1, + TPS65218_CONFIG1_STRICT, + strict ? TPS65218_CONFIG1_STRICT : 0, + TPS65218_PROTECT_L1); + return 0; +} + +static int tps65218_voltage_set_uv_hyst(struct tps65218 *tps) +{ + u32 hyst; + + if (of_property_read_u32(tps->dev->of_node, + "ti,under-voltage-hyst-microvolt", &hyst)) + return 0; + + if (hyst != 400000 && hyst != 200000) { + dev_err(tps->dev, + "Invalid ti,under-voltage-hyst-microvolt value\n"); + return -EINVAL; + } + + tps65218_update_bits(tps, TPS65218_REG_CONFIG2, + TPS65218_CONFIG2_UVLOHYS, + hyst == 400000 ? TPS65218_CONFIG2_UVLOHYS : 0, + TPS65218_PROTECT_L1); + return 0; +} + +static int tps65218_voltage_set_uvlo(struct tps65218 *tps) +{ + u32 uvlo; + int uvloval; + + if (of_property_read_u32(tps->dev->of_node, + "ti,under-voltage-limit-microvolt", &uvlo)) + return 0; + + switch (uvlo) { + case 2750000: + uvloval = TPS65218_CONFIG1_UVLO_2750000; + break; + case 2950000: + uvloval = TPS65218_CONFIG1_UVLO_2950000; + break; + case 3250000: + uvloval = TPS65218_CONFIG1_UVLO_3250000; + break; + case 3350000: + uvloval = TPS65218_CONFIG1_UVLO_3350000; + break; + default: + dev_err(tps->dev, + "Invalid ti,under-voltage-limit-microvolt value\n"); + return -EINVAL; + } + + tps65218_update_bits(tps, TPS65218_REG_CONFIG1, + TPS65218_CONFIG1_UVLO_MASK, uvloval, + TPS65218_PROTECT_L1); + return 0; +} + static int tps65218_probe(struct i2c_client *client, const struct i2c_device_id *ids) { @@ -249,6 +326,18 @@ static int tps65218_probe(struct i2c_client *client, tps->rev = chipid & TPS65218_CHIPID_REV_MASK; + ret = tps65218_voltage_set_strict(tps); + if (ret) + return ret; + + ret = tps65218_voltage_set_uvlo(tps); + if (ret) + return ret; + + ret = tps65218_voltage_set_uv_hyst(tps); + if (ret) + return ret; + ret = mfd_add_devices(tps->dev, PLATFORM_DEVID_AUTO, tps65218_cells, ARRAY_SIZE(tps65218_cells), NULL, 0, regmap_irq_get_domain(tps->irq_data)); diff --git a/include/linux/mfd/tps65218.h b/include/linux/mfd/tps65218.h index c204d9a79436..3cbe103495ab 100644 --- a/include/linux/mfd/tps65218.h +++ b/include/linux/mfd/tps65218.h @@ -137,6 +137,10 @@ #define TPS65218_CONFIG1_PGDLY_MASK 0x18 #define TPS65218_CONFIG1_STRICT BIT(2) #define TPS65218_CONFIG1_UVLO_MASK 0x3 +#define TPS65218_CONFIG1_UVLO_2750000 0x0 +#define TPS65218_CONFIG1_UVLO_2950000 0x1 +#define TPS65218_CONFIG1_UVLO_3250000 0x2 +#define TPS65218_CONFIG1_UVLO_3350000 0x3 #define TPS65218_CONFIG2_DC12_RST BIT(7) #define TPS65218_CONFIG2_UVLOHYS BIT(6) -- cgit v1.2.3 From cfced786969c2a3e1bca45d7055a00311d93ae6c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 4 Jan 2019 18:20:05 +0100 Subject: dma-mapping: remove the default map_resource implementation Instead provide a proper implementation in the direct mapping code, and also wire it up for arm and powerpc, leaving an error return for all the IOMMU or virtual mapping instances for which we'd have to wire up an actual implementation Signed-off-by: Christoph Hellwig Tested-by: Marek Szyprowski --- arch/arm/mm/dma-mapping.c | 2 ++ arch/powerpc/kernel/dma-swiotlb.c | 1 + arch/powerpc/kernel/dma.c | 1 + include/linux/dma-mapping.h | 12 +++++++----- kernel/dma/direct.c | 14 ++++++++++++++ 5 files changed, 25 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index f1e2922e447c..3c8534904209 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -188,6 +188,7 @@ const struct dma_map_ops arm_dma_ops = { .unmap_page = arm_dma_unmap_page, .map_sg = arm_dma_map_sg, .unmap_sg = arm_dma_unmap_sg, + .map_resource = dma_direct_map_resource, .sync_single_for_cpu = arm_dma_sync_single_for_cpu, .sync_single_for_device = arm_dma_sync_single_for_device, .sync_sg_for_cpu = arm_dma_sync_sg_for_cpu, @@ -211,6 +212,7 @@ const struct dma_map_ops arm_coherent_dma_ops = { .get_sgtable = arm_dma_get_sgtable, .map_page = arm_coherent_dma_map_page, .map_sg = arm_dma_map_sg, + .map_resource = dma_direct_map_resource, .dma_supported = arm_dma_supported, }; EXPORT_SYMBOL(arm_coherent_dma_ops); diff --git a/arch/powerpc/kernel/dma-swiotlb.c b/arch/powerpc/kernel/dma-swiotlb.c index 7d5fc9751622..fbb2506a414e 100644 --- a/arch/powerpc/kernel/dma-swiotlb.c +++ b/arch/powerpc/kernel/dma-swiotlb.c @@ -55,6 +55,7 @@ const struct dma_map_ops powerpc_swiotlb_dma_ops = { .dma_supported = swiotlb_dma_supported, .map_page = dma_direct_map_page, .unmap_page = dma_direct_unmap_page, + .map_resource = dma_direct_map_resource, .sync_single_for_cpu = dma_direct_sync_single_for_cpu, .sync_single_for_device = dma_direct_sync_single_for_device, .sync_sg_for_cpu = dma_direct_sync_sg_for_cpu, diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c index b1903ebb2e9c..258b9e8ebb99 100644 --- a/arch/powerpc/kernel/dma.c +++ b/arch/powerpc/kernel/dma.c @@ -273,6 +273,7 @@ const struct dma_map_ops dma_nommu_ops = { .dma_supported = dma_nommu_dma_supported, .map_page = dma_nommu_map_page, .unmap_page = dma_nommu_unmap_page, + .map_resource = dma_direct_map_resource, .get_required_mask = dma_nommu_get_required_mask, #ifdef CONFIG_NOT_COHERENT_CACHE .sync_single_for_cpu = dma_nommu_sync_single, diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index f6ded992c183..9842085e6774 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -208,6 +208,8 @@ dma_addr_t dma_direct_map_page(struct device *dev, struct page *page, unsigned long attrs); int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents, enum dma_data_direction dir, unsigned long attrs); +dma_addr_t dma_direct_map_resource(struct device *dev, phys_addr_t paddr, + size_t size, enum dma_data_direction dir, unsigned long attrs); #if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \ defined(CONFIG_SWIOTLB) @@ -346,19 +348,19 @@ static inline dma_addr_t dma_map_resource(struct device *dev, unsigned long attrs) { const struct dma_map_ops *ops = get_dma_ops(dev); - dma_addr_t addr; + dma_addr_t addr = DMA_MAPPING_ERROR; BUG_ON(!valid_dma_direction(dir)); /* Don't allow RAM to be mapped */ BUG_ON(pfn_valid(PHYS_PFN(phys_addr))); - addr = phys_addr; - if (ops && ops->map_resource) + if (dma_is_direct(ops)) + addr = dma_direct_map_resource(dev, phys_addr, size, dir, attrs); + else if (ops->map_resource) addr = ops->map_resource(dev, phys_addr, size, dir, attrs); debug_dma_map_resource(dev, phys_addr, size, dir, addr); - return addr; } @@ -369,7 +371,7 @@ static inline void dma_unmap_resource(struct device *dev, dma_addr_t addr, const struct dma_map_ops *ops = get_dma_ops(dev); BUG_ON(!valid_dma_direction(dir)); - if (ops && ops->unmap_resource) + if (!dma_is_direct(ops) && ops->unmap_resource) ops->unmap_resource(dev, addr, size, dir, attrs); debug_dma_unmap_resource(dev, addr, size, dir); } diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index 355d16acee6d..25bd19974223 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -356,6 +356,20 @@ out_unmap: } EXPORT_SYMBOL(dma_direct_map_sg); +dma_addr_t dma_direct_map_resource(struct device *dev, phys_addr_t paddr, + size_t size, enum dma_data_direction dir, unsigned long attrs) +{ + dma_addr_t dma_addr = paddr; + + if (unlikely(!dma_direct_possible(dev, dma_addr, size))) { + report_addr(dev, dma_addr, size); + return DMA_MAPPING_ERROR; + } + + return dma_addr; +} +EXPORT_SYMBOL(dma_direct_map_resource); + /* * Because 32-bit DMA masks are so common we expect every architecture to be * able to satisfy them - either by not supporting more physical memory, or by -- cgit v1.2.3 From 645386dfe6307dbb28f10a4513792a59beda0efa Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 4 Jan 2019 17:17:53 +0100 Subject: dma-mapping: don't BUG when calling dma_map_resource on RAM Use WARN_ON_ONCE to print a stack trace and return a proper error code instead. Signed-off-by: Christoph Hellwig Reviewed-by: Robin Murphy Tested-by: Marek Szyprowski --- include/linux/dma-mapping.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 9842085e6774..b904d55247ab 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -353,7 +353,8 @@ static inline dma_addr_t dma_map_resource(struct device *dev, BUG_ON(!valid_dma_direction(dir)); /* Don't allow RAM to be mapped */ - BUG_ON(pfn_valid(PHYS_PFN(phys_addr))); + if (WARN_ON_ONCE(pfn_valid(PHYS_PFN(phys_addr)))) + return DMA_MAPPING_ERROR; if (dma_is_direct(ops)) addr = dma_direct_map_resource(dev, phys_addr, size, dir, attrs); -- cgit v1.2.3 From e2f3cd831a280fc226118d9369bf3f77aab58c56 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 1 Feb 2019 01:49:14 +0100 Subject: driver core: Fix handling of runtime PM flags in device_link_add() After commit ead18c23c263 ("driver core: Introduce device links reference counting"), if there is a link between the given supplier and the given consumer already, device_link_add() will refcount it and return it unconditionally without updating its flags. It is possible, however, that the second (or any subsequent) caller of device_link_add() for the same consumer-supplier pair will pass DL_FLAG_PM_RUNTIME, possibly along with DL_FLAG_RPM_ACTIVE, in flags to it and the existing link may not behave as expected then. First, if DL_FLAG_PM_RUNTIME is not set in the existing link's flags at all, it needs to be set like during the original initialization of the link. Second, if DL_FLAG_RPM_ACTIVE is passed to device_link_add() in flags (in addition to DL_FLAG_PM_RUNTIME), the existing link should to be updated to reflect the "active" runtime PM configuration of the consumer-supplier pair and extra care must be taken here to avoid possible destructive races with runtime PM of the consumer. To that end, redefine the rpm_active field in struct device_link as a refcount, initialize it to 1 and make rpm_resume() (for the consumer) and device_link_add() increment it whenever they acquire a runtime PM reference on the supplier device. Accordingly, make rpm_suspend() (for the consumer) and pm_runtime_clean_up_links() decrement it and drop runtime PM references to the supplier device in a loop until rpm_active becones 1 again. Fixes: ead18c23c263 ("driver core: Introduce device links reference counting") Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 45 +++++++++++++++++++++++++++++--------------- drivers/base/power/runtime.c | 26 +++++++++++-------------- include/linux/device.h | 2 +- 3 files changed, 42 insertions(+), 31 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/core.c b/drivers/base/core.c index 50610cd87e71..8611385e44b5 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -165,6 +165,19 @@ void device_pm_move_to_tail(struct device *dev) device_links_read_unlock(idx); } +static void device_link_rpm_prepare(struct device *consumer, + struct device *supplier) +{ + pm_runtime_new_link(consumer); + /* + * If the link is being added by the consumer driver at probe time, + * balance the decrementation of the supplier's runtime PM usage counter + * after consumer probe in driver_probe_device(). + */ + if (consumer->links.status == DL_DEV_PROBING) + pm_runtime_get_noresume(supplier); +} + /** * device_link_add - Create a link between two devices. * @consumer: Consumer end of the link. @@ -201,7 +214,6 @@ struct device_link *device_link_add(struct device *consumer, struct device *supplier, u32 flags) { struct device_link *link; - bool rpm_put_supplier = false; if (!consumer || !supplier || (flags & DL_FLAG_STATELESS && @@ -213,7 +225,6 @@ struct device_link *device_link_add(struct device *consumer, pm_runtime_put_noidle(supplier); return NULL; } - rpm_put_supplier = true; } device_links_write_lock(); @@ -249,6 +260,15 @@ struct device_link *device_link_add(struct device *consumer, if (flags & DL_FLAG_AUTOREMOVE_SUPPLIER) link->flags |= DL_FLAG_AUTOREMOVE_SUPPLIER; + if (flags & DL_FLAG_PM_RUNTIME) { + if (!(link->flags & DL_FLAG_PM_RUNTIME)) { + device_link_rpm_prepare(consumer, supplier); + link->flags |= DL_FLAG_PM_RUNTIME; + } + if (flags & DL_FLAG_RPM_ACTIVE) + refcount_inc(&link->rpm_active); + } + kref_get(&link->kref); goto out; } @@ -257,20 +277,15 @@ struct device_link *device_link_add(struct device *consumer, if (!link) goto out; + refcount_set(&link->rpm_active, 1); + if (flags & DL_FLAG_PM_RUNTIME) { - if (flags & DL_FLAG_RPM_ACTIVE) { - link->rpm_active = true; - rpm_put_supplier = false; - } - pm_runtime_new_link(consumer); - /* - * If the link is being added by the consumer driver at probe - * time, balance the decrementation of the supplier's runtime PM - * usage counter after consumer probe in driver_probe_device(). - */ - if (consumer->links.status == DL_DEV_PROBING) - pm_runtime_get_noresume(supplier); + if (flags & DL_FLAG_RPM_ACTIVE) + refcount_inc(&link->rpm_active); + + device_link_rpm_prepare(consumer, supplier); } + get_device(supplier); link->supplier = supplier; INIT_LIST_HEAD(&link->s_node); @@ -333,7 +348,7 @@ struct device_link *device_link_add(struct device *consumer, device_pm_unlock(); device_links_write_unlock(); - if (rpm_put_supplier) + if ((flags & DL_FLAG_PM_RUNTIME && flags & DL_FLAG_RPM_ACTIVE) && !link) pm_runtime_put(supplier); return link; diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index 457be03b744d..8bc9a432de70 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -259,11 +259,8 @@ static int rpm_get_suppliers(struct device *dev) list_for_each_entry_rcu(link, &dev->links.suppliers, c_node) { int retval; - if (!(link->flags & DL_FLAG_PM_RUNTIME)) - continue; - - if (READ_ONCE(link->status) == DL_STATE_SUPPLIER_UNBIND || - link->rpm_active) + if (!(link->flags & DL_FLAG_PM_RUNTIME) || + READ_ONCE(link->status) == DL_STATE_SUPPLIER_UNBIND) continue; retval = pm_runtime_get_sync(link->supplier); @@ -272,7 +269,7 @@ static int rpm_get_suppliers(struct device *dev) pm_runtime_put_noidle(link->supplier); return retval; } - link->rpm_active = true; + refcount_inc(&link->rpm_active); } return 0; } @@ -281,12 +278,13 @@ static void rpm_put_suppliers(struct device *dev) { struct device_link *link; - list_for_each_entry_rcu(link, &dev->links.suppliers, c_node) - if (link->rpm_active && - READ_ONCE(link->status) != DL_STATE_SUPPLIER_UNBIND) { + list_for_each_entry_rcu(link, &dev->links.suppliers, c_node) { + if (READ_ONCE(link->status) == DL_STATE_SUPPLIER_UNBIND) + continue; + + while (refcount_dec_not_one(&link->rpm_active)) pm_runtime_put(link->supplier); - link->rpm_active = false; - } + } } /** @@ -1539,7 +1537,7 @@ void pm_runtime_remove(struct device *dev) * * Check links from this device to any consumers and if any of them have active * runtime PM references to the device, drop the usage counter of the device - * (once per link). + * (as many times as needed). * * Links with the DL_FLAG_STATELESS flag set are ignored. * @@ -1561,10 +1559,8 @@ void pm_runtime_clean_up_links(struct device *dev) if (link->flags & DL_FLAG_STATELESS) continue; - if (link->rpm_active) { + while (refcount_dec_not_one(&link->rpm_active)) pm_runtime_put_noidle(dev); - link->rpm_active = false; - } } device_links_read_unlock(idx); diff --git a/include/linux/device.h b/include/linux/device.h index d0e452fd0bff..5f49d2eff6ed 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -853,7 +853,7 @@ struct device_link { struct list_head c_node; enum device_link_state status; u32 flags; - bool rpm_active; + refcount_t rpm_active; struct kref kref; #ifdef CONFIG_SRCU struct rcu_head rcu_head; -- cgit v1.2.3 From e7dd40105aac9ba051e44ad711123bc53a5e4c71 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 1 Feb 2019 01:59:42 +0100 Subject: driver core: Add device link flag DL_FLAG_AUTOPROBE_CONSUMER Add a new device link flag, DL_FLAG_AUTOPROBE_CONSUMER, to request the driver core to probe for a consumer driver automatically after binding a driver to the supplier device on a persistent managed device link. As unbinding the supplier driver on a managed device link causes the consumer driver to be detached from its device automatically, this flag provides a complementary mechanism which is needed to address some "composite device" use cases. Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- Documentation/driver-api/device_link.rst | 9 +++++++++ drivers/base/core.c | 16 +++++++++++++++- drivers/base/dd.c | 2 +- include/linux/device.h | 3 +++ 4 files changed, 28 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/Documentation/driver-api/device_link.rst b/Documentation/driver-api/device_link.rst index e249e074a8d2..c764755121c7 100644 --- a/Documentation/driver-api/device_link.rst +++ b/Documentation/driver-api/device_link.rst @@ -94,6 +94,15 @@ Similarly, when the device link is added from supplier's ``->probe`` callback, ``DL_FLAG_AUTOREMOVE_SUPPLIER`` causes the device link to be automatically purged when the supplier fails to probe or later unbinds. +If neither ``DL_FLAG_AUTOREMOVE_CONSUMER`` nor ``DL_FLAG_AUTOREMOVE_SUPPLIER`` +is set, ``DL_FLAG_AUTOPROBE_CONSUMER`` can be used to request the driver core +to probe for a driver for the consumer driver on the link automatically after +a driver has been bound to the supplier device. + +Note, however, that any combinations of ``DL_FLAG_AUTOREMOVE_CONSUMER``, +``DL_FLAG_AUTOREMOVE_SUPPLIER`` or ``DL_FLAG_AUTOPROBE_CONSUMER`` with +``DL_FLAG_STATELESS`` are invalid and cannot be used. + Limitations =========== diff --git a/drivers/base/core.c b/drivers/base/core.c index 9d49b461b1d9..abfce4f613f8 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -208,6 +208,12 @@ static void device_link_rpm_prepare(struct device *consumer, * the link will be maintained until one of the devices pointed to by it (either * the consumer or the supplier) is unregistered. * + * Also, if DL_FLAG_STATELESS, DL_FLAG_AUTOREMOVE_CONSUMER and + * DL_FLAG_AUTOREMOVE_SUPPLIER are not set in @flags (that is, a persistent + * managed device link is being added), the DL_FLAG_AUTOPROBE_CONSUMER flag can + * be used to request the driver core to automaticall probe for a consmer + * driver after successfully binding a driver to the supplier device. + * * The combination of DL_FLAG_STATELESS and either DL_FLAG_AUTOREMOVE_CONSUMER * or DL_FLAG_AUTOREMOVE_SUPPLIER set in @flags at the same time is invalid and * will cause NULL to be returned upfront. @@ -228,7 +234,12 @@ struct device_link *device_link_add(struct device *consumer, if (!consumer || !supplier || (flags & DL_FLAG_STATELESS && - flags & (DL_FLAG_AUTOREMOVE_CONSUMER | DL_FLAG_AUTOREMOVE_SUPPLIER))) + flags & (DL_FLAG_AUTOREMOVE_CONSUMER | + DL_FLAG_AUTOREMOVE_SUPPLIER | + DL_FLAG_AUTOPROBE_CONSUMER)) || + (flags & DL_FLAG_AUTOPROBE_CONSUMER && + flags & (DL_FLAG_AUTOREMOVE_CONSUMER | + DL_FLAG_AUTOREMOVE_SUPPLIER))) return NULL; if (flags & DL_FLAG_PM_RUNTIME && flags & DL_FLAG_RPM_ACTIVE) { @@ -589,6 +600,9 @@ void device_links_driver_bound(struct device *dev) WARN_ON(link->status != DL_STATE_DORMANT); WRITE_ONCE(link->status, DL_STATE_AVAILABLE); + + if (link->flags & DL_FLAG_AUTOPROBE_CONSUMER) + driver_deferred_probe_add(link->consumer); } list_for_each_entry(link, &dev->links.suppliers, c_node) { diff --git a/drivers/base/dd.c b/drivers/base/dd.c index aa6a9c613595..2e898cbba79b 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -116,7 +116,7 @@ static void deferred_probe_work_func(struct work_struct *work) } static DECLARE_WORK(deferred_probe_work, deferred_probe_work_func); -static void driver_deferred_probe_add(struct device *dev) +void driver_deferred_probe_add(struct device *dev) { mutex_lock(&deferred_probe_mutex); if (list_empty(&dev->p->deferred_probe)) { diff --git a/include/linux/device.h b/include/linux/device.h index 5f49d2eff6ed..0ab0a3a80ec3 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -341,6 +341,7 @@ struct device *driver_find_device(struct device_driver *drv, struct device *start, void *data, int (*match)(struct device *dev, void *data)); +void driver_deferred_probe_add(struct device *dev); int driver_deferred_probe_check_state(struct device *dev); /** @@ -827,12 +828,14 @@ enum device_link_state { * PM_RUNTIME: If set, the runtime PM framework will use this link. * RPM_ACTIVE: Run pm_runtime_get_sync() on the supplier during link creation. * AUTOREMOVE_SUPPLIER: Remove the link automatically on supplier driver unbind. + * AUTOPROBE_CONSUMER: Probe consumer driver automatically after supplier binds. */ #define DL_FLAG_STATELESS BIT(0) #define DL_FLAG_AUTOREMOVE_CONSUMER BIT(1) #define DL_FLAG_PM_RUNTIME BIT(2) #define DL_FLAG_RPM_ACTIVE BIT(3) #define DL_FLAG_AUTOREMOVE_SUPPLIER BIT(4) +#define DL_FLAG_AUTOPROBE_CONSUMER BIT(5) /** * struct device_link - Device link representation. -- cgit v1.2.3 From 42bf4152d8a79f89f5456dee63a1f364fbce2dd6 Mon Sep 17 00:00:00 2001 From: Sumit Garg Date: Tue, 29 Jan 2019 11:19:36 +0530 Subject: tee: add supp_nowait flag in tee_context struct This flag indicates that requests in this context should not wait for tee-supplicant daemon to be started if not present and just return with an error code. It is needed for requests which should be non-blocking in nature like ones arising from TEE based kernel drivers or any in kernel api that uses TEE internal client interface. Signed-off-by: Sumit Garg Reviewed-by: Daniel Thompson Signed-off-by: Jens Wiklander --- drivers/tee/optee/supp.c | 10 +++++++++- drivers/tee/tee_core.c | 13 +++++++++++++ include/linux/tee_drv.h | 6 ++++++ 3 files changed, 28 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/tee/optee/supp.c b/drivers/tee/optee/supp.c index 43626e15703a..92f56b8645e3 100644 --- a/drivers/tee/optee/supp.c +++ b/drivers/tee/optee/supp.c @@ -88,10 +88,18 @@ u32 optee_supp_thrd_req(struct tee_context *ctx, u32 func, size_t num_params, { struct optee *optee = tee_get_drvdata(ctx->teedev); struct optee_supp *supp = &optee->supp; - struct optee_supp_req *req = kzalloc(sizeof(*req), GFP_KERNEL); + struct optee_supp_req *req; bool interruptable; u32 ret; + /* + * Return in case there is no supplicant available and + * non-blocking request. + */ + if (!supp->ctx && ctx->supp_nowait) + return TEEC_ERROR_COMMUNICATION; + + req = kzalloc(sizeof(*req), GFP_KERNEL); if (!req) return TEEC_ERROR_OUT_OF_MEMORY; diff --git a/drivers/tee/tee_core.c b/drivers/tee/tee_core.c index 7b2bb4c50058..adf2588282fc 100644 --- a/drivers/tee/tee_core.c +++ b/drivers/tee/tee_core.c @@ -106,6 +106,11 @@ static int tee_open(struct inode *inode, struct file *filp) if (IS_ERR(ctx)) return PTR_ERR(ctx); + /* + * Default user-space behaviour is to wait for tee-supplicant + * if not present for any requests in this context. + */ + ctx->supp_nowait = false; filp->private_data = ctx; return 0; } @@ -982,6 +987,14 @@ tee_client_open_context(struct tee_context *start, } while (IS_ERR(ctx) && PTR_ERR(ctx) != -ENOMEM); put_device(put_dev); + /* + * Default behaviour for in kernel client is to not wait for + * tee-supplicant if not present for any requests in this context. + * Also this flag could be configured again before call to + * tee_client_open_session() if any in kernel client requires + * different behaviour. + */ + ctx->supp_nowait = true; return ctx; } EXPORT_SYMBOL_GPL(tee_client_open_context); diff --git a/include/linux/tee_drv.h b/include/linux/tee_drv.h index 6cfe05893a76..5076502c07d7 100644 --- a/include/linux/tee_drv.h +++ b/include/linux/tee_drv.h @@ -47,6 +47,11 @@ struct tee_shm_pool; * @releasing: flag that indicates if context is being released right now. * It is needed to break circular dependency on context during * shared memory release. + * @supp_nowait: flag that indicates that requests in this context should not + * wait for tee-supplicant daemon to be started if not present + * and just return with an error code. It is needed for requests + * that arises from TEE based kernel drivers that should be + * non-blocking in nature. */ struct tee_context { struct tee_device *teedev; @@ -54,6 +59,7 @@ struct tee_context { void *data; struct kref refcount; bool releasing; + bool supp_nowait; }; struct tee_param_memref { -- cgit v1.2.3 From 0fc1db9d105915021260eb241661b8e96f5c0f1a Mon Sep 17 00:00:00 2001 From: Sumit Garg Date: Tue, 29 Jan 2019 11:19:35 +0530 Subject: tee: add bus driver framework for TEE based devices Introduce a generic TEE bus driver concept for TEE based kernel drivers which would like to communicate with TEE based devices/services. Also add support in module device table for these new TEE based devices. In this TEE bus concept, devices/services are identified via Universally Unique Identifier (UUID) and drivers register a table of device UUIDs which they can support. So this TEE bus framework registers following apis: - match(): Iterates over the driver UUID table to find a corresponding match for device UUID. If a match is found, then this particular device is probed via corresponding probe api registered by the driver. This process happens whenever a device or a driver is registered with TEE bus. - uevent(): Notifies user-space (udev) whenever a new device is registered on this bus for auto-loading of modularized drivers. Also this framework allows for device enumeration to be specific to corresponding TEE implementation like OP-TEE etc. Signed-off-by: Sumit Garg Reviewed-by: Daniel Thompson Reviewed-by: Bhupesh Sharma Signed-off-by: Jens Wiklander --- drivers/tee/tee_core.c | 54 ++++++++++++++++++++++++++++++++++++--- include/linux/mod_devicetable.h | 9 +++++++ include/linux/tee_drv.h | 32 ++++++++++++++++++++++- scripts/mod/devicetable-offsets.c | 3 +++ scripts/mod/file2alias.c | 19 ++++++++++++++ 5 files changed, 112 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/tee/tee_core.c b/drivers/tee/tee_core.c index adf2588282fc..25f3b9cc8908 100644 --- a/drivers/tee/tee_core.c +++ b/drivers/tee/tee_core.c @@ -15,7 +15,6 @@ #define pr_fmt(fmt) "%s: " fmt, __func__ #include -#include #include #include #include @@ -1040,6 +1039,39 @@ int tee_client_invoke_func(struct tee_context *ctx, } EXPORT_SYMBOL_GPL(tee_client_invoke_func); +static int tee_client_device_match(struct device *dev, + struct device_driver *drv) +{ + const struct tee_client_device_id *id_table; + struct tee_client_device *tee_device; + + id_table = to_tee_client_driver(drv)->id_table; + tee_device = to_tee_client_device(dev); + + while (!uuid_is_null(&id_table->uuid)) { + if (uuid_equal(&tee_device->id.uuid, &id_table->uuid)) + return 1; + id_table++; + } + + return 0; +} + +static int tee_client_device_uevent(struct device *dev, + struct kobj_uevent_env *env) +{ + uuid_t *dev_id = &to_tee_client_device(dev)->id.uuid; + + return add_uevent_var(env, "MODALIAS=tee:%pUb", dev_id); +} + +struct bus_type tee_bus_type = { + .name = "tee", + .match = tee_client_device_match, + .uevent = tee_client_device_uevent, +}; +EXPORT_SYMBOL_GPL(tee_bus_type); + static int __init tee_init(void) { int rc; @@ -1053,18 +1085,32 @@ static int __init tee_init(void) rc = alloc_chrdev_region(&tee_devt, 0, TEE_NUM_DEVICES, "tee"); if (rc) { pr_err("failed to allocate char dev region\n"); - class_destroy(tee_class); - tee_class = NULL; + goto out_unreg_class; + } + + rc = bus_register(&tee_bus_type); + if (rc) { + pr_err("failed to register tee bus\n"); + goto out_unreg_chrdev; } + return 0; + +out_unreg_chrdev: + unregister_chrdev_region(tee_devt, TEE_NUM_DEVICES); +out_unreg_class: + class_destroy(tee_class); + tee_class = NULL; + return rc; } static void __exit tee_exit(void) { + bus_unregister(&tee_bus_type); + unregister_chrdev_region(tee_devt, TEE_NUM_DEVICES); class_destroy(tee_class); tee_class = NULL; - unregister_chrdev_region(tee_devt, TEE_NUM_DEVICES); } subsys_initcall(tee_init); diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index f9bd2f34b99f..14eaeeb46f41 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -779,4 +779,13 @@ struct typec_device_id { kernel_ulong_t driver_data; }; +/** + * struct tee_client_device_id - tee based device identifier + * @uuid: For TEE based client devices we use the device uuid as + * the identifier. + */ +struct tee_client_device_id { + uuid_t uuid; +}; + #endif /* LINUX_MOD_DEVICETABLE_H */ diff --git a/include/linux/tee_drv.h b/include/linux/tee_drv.h index 5076502c07d7..56d7f1b4516d 100644 --- a/include/linux/tee_drv.h +++ b/include/linux/tee_drv.h @@ -15,11 +15,14 @@ #ifndef __TEE_DRV_H #define __TEE_DRV_H -#include +#include #include #include #include +#include #include +#include +#include /* * The file describes the API provided by the generic TEE driver to the @@ -544,4 +547,31 @@ static inline bool tee_param_is_memref(struct tee_param *param) } } +extern struct bus_type tee_bus_type; + +/** + * struct tee_client_device - tee based device + * @id: device identifier + * @dev: device structure + */ +struct tee_client_device { + struct tee_client_device_id id; + struct device dev; +}; + +#define to_tee_client_device(d) container_of(d, struct tee_client_device, dev) + +/** + * struct tee_client_driver - tee client driver + * @id_table: device id table supported by this driver + * @driver: driver structure + */ +struct tee_client_driver { + const struct tee_client_device_id *id_table; + struct device_driver driver; +}; + +#define to_tee_client_driver(d) \ + container_of(d, struct tee_client_driver, driver) + #endif /*__TEE_DRV_H*/ diff --git a/scripts/mod/devicetable-offsets.c b/scripts/mod/devicetable-offsets.c index 293004499b4d..160718383a71 100644 --- a/scripts/mod/devicetable-offsets.c +++ b/scripts/mod/devicetable-offsets.c @@ -225,5 +225,8 @@ int main(void) DEVID_FIELD(typec_device_id, svid); DEVID_FIELD(typec_device_id, mode); + DEVID(tee_client_device_id); + DEVID_FIELD(tee_client_device_id, uuid); + return 0; } diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c index a37af7d71973..d0e41723627f 100644 --- a/scripts/mod/file2alias.c +++ b/scripts/mod/file2alias.c @@ -37,6 +37,9 @@ typedef unsigned char __u8; typedef struct { __u8 b[16]; } uuid_le; +typedef struct { + __u8 b[16]; +} uuid_t; /* Big exception to the "don't include kernel headers into userspace, which * even potentially has different endianness and word sizes, since @@ -1287,6 +1290,21 @@ static int do_typec_entry(const char *filename, void *symval, char *alias) return 1; } +/* Looks like: tee:uuid */ +static int do_tee_entry(const char *filename, void *symval, char *alias) +{ + DEF_FIELD(symval, tee_client_device_id, uuid); + + sprintf(alias, "tee:%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x", + uuid.b[0], uuid.b[1], uuid.b[2], uuid.b[3], uuid.b[4], + uuid.b[5], uuid.b[6], uuid.b[7], uuid.b[8], uuid.b[9], + uuid.b[10], uuid.b[11], uuid.b[12], uuid.b[13], uuid.b[14], + uuid.b[15]); + + add_wildcard(alias); + return 1; +} + /* Does namelen bytes of name exactly match the symbol? */ static bool sym_is(const char *name, unsigned namelen, const char *symbol) { @@ -1357,6 +1375,7 @@ static const struct devtable devtable[] = { {"fslmc", SIZE_fsl_mc_device_id, do_fsl_mc_entry}, {"tbsvc", SIZE_tb_service_id, do_tbsvc_entry}, {"typec", SIZE_typec_device_id, do_typec_entry}, + {"tee", SIZE_tee_client_device_id, do_tee_entry}, }; /* Create MODULE_ALIAS() statements. -- cgit v1.2.3 From d83525ca62cf8ebe3271d14c36fb900c294274a2 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 31 Jan 2019 15:40:04 -0800 Subject: bpf: introduce bpf_spin_lock Introduce 'struct bpf_spin_lock' and bpf_spin_lock/unlock() helpers to let bpf program serialize access to other variables. Example: struct hash_elem { int cnt; struct bpf_spin_lock lock; }; struct hash_elem * val = bpf_map_lookup_elem(&hash_map, &key); if (val) { bpf_spin_lock(&val->lock); val->cnt++; bpf_spin_unlock(&val->lock); } Restrictions and safety checks: - bpf_spin_lock is only allowed inside HASH and ARRAY maps. - BTF description of the map is mandatory for safety analysis. - bpf program can take one bpf_spin_lock at a time, since two or more can cause dead locks. - only one 'struct bpf_spin_lock' is allowed per map element. It drastically simplifies implementation yet allows bpf program to use any number of bpf_spin_locks. - when bpf_spin_lock is taken the calls (either bpf2bpf or helpers) are not allowed. - bpf program must bpf_spin_unlock() before return. - bpf program can access 'struct bpf_spin_lock' only via bpf_spin_lock()/bpf_spin_unlock() helpers. - load/store into 'struct bpf_spin_lock lock;' field is not allowed. - to use bpf_spin_lock() helper the BTF description of map value must be a struct and have 'struct bpf_spin_lock anyname;' field at the top level. Nested lock inside another struct is not allowed. - syscall map_lookup doesn't copy bpf_spin_lock field to user space. - syscall map_update and program map_update do not update bpf_spin_lock field. - bpf_spin_lock cannot be on the stack or inside networking packet. bpf_spin_lock can only be inside HASH or ARRAY map value. - bpf_spin_lock is available to root only and to all program types. - bpf_spin_lock is not allowed in inner maps of map-in-map. - ld_abs is not allowed inside spin_lock-ed region. - tracing progs and socket filter progs cannot use bpf_spin_lock due to insufficient preemption checks Implementation details: - cgroup-bpf class of programs can nest with xdp/tc programs. Hence bpf_spin_lock is equivalent to spin_lock_irqsave. Other solutions to avoid nested bpf_spin_lock are possible. Like making sure that all networking progs run with softirq disabled. spin_lock_irqsave is the simplest and doesn't add overhead to the programs that don't use it. - arch_spinlock_t is used when its implemented as queued_spin_lock - archs can force their own arch_spinlock_t - on architectures where queued_spin_lock is not available and sizeof(arch_spinlock_t) != sizeof(__u32) trivial lock is used. - presence of bpf_spin_lock inside map value could have been indicated via extra flag during map_create, but specifying it via BTF is cleaner. It provides introspection for map key/value and reduces user mistakes. Next steps: - allow bpf_spin_lock in other map types (like cgroup local storage) - introduce BPF_F_LOCK flag for bpf_map_update() syscall and helper to request kernel to grab bpf_spin_lock before rewriting the value. That will serialize access to map elements. Acked-by: Peter Zijlstra (Intel) Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- include/linux/bpf.h | 37 +++++++++- include/linux/bpf_verifier.h | 1 + include/linux/btf.h | 1 + include/uapi/linux/bpf.h | 7 +- kernel/Kconfig.locks | 3 + kernel/bpf/arraymap.c | 7 +- kernel/bpf/btf.c | 42 +++++++++++ kernel/bpf/core.c | 2 + kernel/bpf/hashtab.c | 21 +++--- kernel/bpf/helpers.c | 80 ++++++++++++++++++++ kernel/bpf/map_in_map.c | 5 ++ kernel/bpf/syscall.c | 21 +++++- kernel/bpf/verifier.c | 169 ++++++++++++++++++++++++++++++++++++++++++- net/core/filter.c | 16 +++- 14 files changed, 386 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 0394f1f9213b..2ae615b48bb8 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -72,14 +72,15 @@ struct bpf_map { u32 value_size; u32 max_entries; u32 map_flags; - u32 pages; + int spin_lock_off; /* >=0 valid offset, <0 error */ u32 id; int numa_node; u32 btf_key_type_id; u32 btf_value_type_id; struct btf *btf; + u32 pages; bool unpriv_array; - /* 55 bytes hole */ + /* 51 bytes hole */ /* The 3rd and 4th cacheline with misc members to avoid false sharing * particularly with refcounting. @@ -91,6 +92,34 @@ struct bpf_map { char name[BPF_OBJ_NAME_LEN]; }; +static inline bool map_value_has_spin_lock(const struct bpf_map *map) +{ + return map->spin_lock_off >= 0; +} + +static inline void check_and_init_map_lock(struct bpf_map *map, void *dst) +{ + if (likely(!map_value_has_spin_lock(map))) + return; + *(struct bpf_spin_lock *)(dst + map->spin_lock_off) = + (struct bpf_spin_lock){}; +} + +/* copy everything but bpf_spin_lock */ +static inline void copy_map_value(struct bpf_map *map, void *dst, void *src) +{ + if (unlikely(map_value_has_spin_lock(map))) { + u32 off = map->spin_lock_off; + + memcpy(dst, src, off); + memcpy(dst + off + sizeof(struct bpf_spin_lock), + src + off + sizeof(struct bpf_spin_lock), + map->value_size - off - sizeof(struct bpf_spin_lock)); + } else { + memcpy(dst, src, map->value_size); + } +} + struct bpf_offload_dev; struct bpf_offloaded_map; @@ -162,6 +191,7 @@ enum bpf_arg_type { ARG_PTR_TO_CTX, /* pointer to context */ ARG_ANYTHING, /* any (initialized) argument is ok */ ARG_PTR_TO_SOCKET, /* pointer to bpf_sock */ + ARG_PTR_TO_SPIN_LOCK, /* pointer to bpf_spin_lock */ }; /* type of values returned from helper functions */ @@ -879,7 +909,8 @@ extern const struct bpf_func_proto bpf_msg_redirect_hash_proto; extern const struct bpf_func_proto bpf_msg_redirect_map_proto; extern const struct bpf_func_proto bpf_sk_redirect_hash_proto; extern const struct bpf_func_proto bpf_sk_redirect_map_proto; - +extern const struct bpf_func_proto bpf_spin_lock_proto; +extern const struct bpf_func_proto bpf_spin_unlock_proto; extern const struct bpf_func_proto bpf_get_local_storage_proto; /* Shared helpers among cBPF and eBPF. */ diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 0620e418dde5..69f7a3449eda 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -148,6 +148,7 @@ struct bpf_verifier_state { /* call stack tracking */ struct bpf_func_state *frame[MAX_CALL_FRAMES]; u32 curframe; + u32 active_spin_lock; bool speculative; }; diff --git a/include/linux/btf.h b/include/linux/btf.h index 12502e25e767..455d31b55828 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -50,6 +50,7 @@ u32 btf_id(const struct btf *btf); bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s, const struct btf_member *m, u32 expected_offset, u32 expected_size); +int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t); #ifdef CONFIG_BPF_SYSCALL const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 60b99b730a41..86f7c438d40f 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2422,7 +2422,9 @@ union bpf_attr { FN(map_peek_elem), \ FN(msg_push_data), \ FN(msg_pop_data), \ - FN(rc_pointer_rel), + FN(rc_pointer_rel), \ + FN(spin_lock), \ + FN(spin_unlock), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -3056,4 +3058,7 @@ struct bpf_line_info { __u32 line_col; }; +struct bpf_spin_lock { + __u32 val; +}; #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks index 84d882f3e299..fbba478ae522 100644 --- a/kernel/Kconfig.locks +++ b/kernel/Kconfig.locks @@ -242,6 +242,9 @@ config QUEUED_SPINLOCKS def_bool y if ARCH_USE_QUEUED_SPINLOCKS depends on SMP +config BPF_ARCH_SPINLOCK + bool + config ARCH_USE_QUEUED_RWLOCKS bool diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 25632a75d630..d6d979910a2a 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -270,9 +270,10 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value, memcpy(this_cpu_ptr(array->pptrs[index & array->index_mask]), value, map->value_size); else - memcpy(array->value + - array->elem_size * (index & array->index_mask), - value, map->value_size); + copy_map_value(map, + array->value + + array->elem_size * (index & array->index_mask), + value); return 0; } diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 3d661f0606fe..7019c1f05cab 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -355,6 +355,11 @@ static bool btf_type_is_struct(const struct btf_type *t) return kind == BTF_KIND_STRUCT || kind == BTF_KIND_UNION; } +static bool __btf_type_is_struct(const struct btf_type *t) +{ + return BTF_INFO_KIND(t->info) == BTF_KIND_STRUCT; +} + static bool btf_type_is_array(const struct btf_type *t) { return BTF_INFO_KIND(t->info) == BTF_KIND_ARRAY; @@ -2045,6 +2050,43 @@ static void btf_struct_log(struct btf_verifier_env *env, btf_verifier_log(env, "size=%u vlen=%u", t->size, btf_type_vlen(t)); } +/* find 'struct bpf_spin_lock' in map value. + * return >= 0 offset if found + * and < 0 in case of error + */ +int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t) +{ + const struct btf_member *member; + u32 i, off = -ENOENT; + + if (!__btf_type_is_struct(t)) + return -EINVAL; + + for_each_member(i, t, member) { + const struct btf_type *member_type = btf_type_by_id(btf, + member->type); + if (!__btf_type_is_struct(member_type)) + continue; + if (member_type->size != sizeof(struct bpf_spin_lock)) + continue; + if (strcmp(__btf_name_by_offset(btf, member_type->name_off), + "bpf_spin_lock")) + continue; + if (off != -ENOENT) + /* only one 'struct bpf_spin_lock' is allowed */ + return -E2BIG; + off = btf_member_bit_offset(t, member); + if (off % 8) + /* valid C code cannot generate such BTF */ + return -EINVAL; + off /= 8; + if (off % __alignof__(struct bpf_spin_lock)) + /* valid struct bpf_spin_lock will be 4 byte aligned */ + return -EINVAL; + } + return off; +} + static void btf_struct_seq_show(const struct btf *btf, const struct btf_type *t, u32 type_id, void *data, u8 bits_offset, struct seq_file *m) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index f13c543b7b36..ef88b167959d 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -2002,6 +2002,8 @@ const struct bpf_func_proto bpf_map_delete_elem_proto __weak; const struct bpf_func_proto bpf_map_push_elem_proto __weak; const struct bpf_func_proto bpf_map_pop_elem_proto __weak; const struct bpf_func_proto bpf_map_peek_elem_proto __weak; +const struct bpf_func_proto bpf_spin_lock_proto __weak; +const struct bpf_func_proto bpf_spin_unlock_proto __weak; const struct bpf_func_proto bpf_get_prandom_u32_proto __weak; const struct bpf_func_proto bpf_get_smp_processor_id_proto __weak; diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 4b7c76765d9d..6d3b22c5ad68 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -718,21 +718,12 @@ static bool fd_htab_map_needs_adjust(const struct bpf_htab *htab) BITS_PER_LONG == 64; } -static u32 htab_size_value(const struct bpf_htab *htab, bool percpu) -{ - u32 size = htab->map.value_size; - - if (percpu || fd_htab_map_needs_adjust(htab)) - size = round_up(size, 8); - return size; -} - static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key, void *value, u32 key_size, u32 hash, bool percpu, bool onallcpus, struct htab_elem *old_elem) { - u32 size = htab_size_value(htab, percpu); + u32 size = htab->map.value_size; bool prealloc = htab_is_prealloc(htab); struct htab_elem *l_new, **pl_new; void __percpu *pptr; @@ -770,10 +761,13 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key, l_new = ERR_PTR(-ENOMEM); goto dec_count; } + check_and_init_map_lock(&htab->map, + l_new->key + round_up(key_size, 8)); } memcpy(l_new->key, key, key_size); if (percpu) { + size = round_up(size, 8); if (prealloc) { pptr = htab_elem_get_ptr(l_new, key_size); } else { @@ -791,8 +785,13 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key, if (!prealloc) htab_elem_set_ptr(l_new, key_size, pptr); - } else { + } else if (fd_htab_map_needs_adjust(htab)) { + size = round_up(size, 8); memcpy(l_new->key + round_up(key_size, 8), value, size); + } else { + copy_map_value(&htab->map, + l_new->key + round_up(key_size, 8), + value); } l_new->hash = hash; diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index a74972b07e74..fbe544761628 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -221,6 +221,86 @@ const struct bpf_func_proto bpf_get_current_comm_proto = { .arg2_type = ARG_CONST_SIZE, }; +#if defined(CONFIG_QUEUED_SPINLOCKS) || defined(CONFIG_BPF_ARCH_SPINLOCK) + +static inline void __bpf_spin_lock(struct bpf_spin_lock *lock) +{ + arch_spinlock_t *l = (void *)lock; + union { + __u32 val; + arch_spinlock_t lock; + } u = { .lock = __ARCH_SPIN_LOCK_UNLOCKED }; + + compiletime_assert(u.val == 0, "__ARCH_SPIN_LOCK_UNLOCKED not 0"); + BUILD_BUG_ON(sizeof(*l) != sizeof(__u32)); + BUILD_BUG_ON(sizeof(*lock) != sizeof(__u32)); + arch_spin_lock(l); +} + +static inline void __bpf_spin_unlock(struct bpf_spin_lock *lock) +{ + arch_spinlock_t *l = (void *)lock; + + arch_spin_unlock(l); +} + +#else + +static inline void __bpf_spin_lock(struct bpf_spin_lock *lock) +{ + atomic_t *l = (void *)lock; + + BUILD_BUG_ON(sizeof(*l) != sizeof(*lock)); + do { + atomic_cond_read_relaxed(l, !VAL); + } while (atomic_xchg(l, 1)); +} + +static inline void __bpf_spin_unlock(struct bpf_spin_lock *lock) +{ + atomic_t *l = (void *)lock; + + atomic_set_release(l, 0); +} + +#endif + +static DEFINE_PER_CPU(unsigned long, irqsave_flags); + +notrace BPF_CALL_1(bpf_spin_lock, struct bpf_spin_lock *, lock) +{ + unsigned long flags; + + local_irq_save(flags); + __bpf_spin_lock(lock); + __this_cpu_write(irqsave_flags, flags); + return 0; +} + +const struct bpf_func_proto bpf_spin_lock_proto = { + .func = bpf_spin_lock, + .gpl_only = false, + .ret_type = RET_VOID, + .arg1_type = ARG_PTR_TO_SPIN_LOCK, +}; + +notrace BPF_CALL_1(bpf_spin_unlock, struct bpf_spin_lock *, lock) +{ + unsigned long flags; + + flags = __this_cpu_read(irqsave_flags); + __bpf_spin_unlock(lock); + local_irq_restore(flags); + return 0; +} + +const struct bpf_func_proto bpf_spin_unlock_proto = { + .func = bpf_spin_unlock, + .gpl_only = false, + .ret_type = RET_VOID, + .arg1_type = ARG_PTR_TO_SPIN_LOCK, +}; + #ifdef CONFIG_CGROUPS BPF_CALL_0(bpf_get_current_cgroup_id) { diff --git a/kernel/bpf/map_in_map.c b/kernel/bpf/map_in_map.c index 52378d3e34b3..583346a0ab29 100644 --- a/kernel/bpf/map_in_map.c +++ b/kernel/bpf/map_in_map.c @@ -37,6 +37,11 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd) return ERR_PTR(-EINVAL); } + if (map_value_has_spin_lock(inner_map)) { + fdput(f); + return ERR_PTR(-ENOTSUPP); + } + inner_map_meta_size = sizeof(*inner_map_meta); /* In some cases verifier needs to access beyond just base map. */ if (inner_map->ops == &array_map_ops) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index b155cd17c1bd..ebf0a673cb83 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -463,7 +463,7 @@ int map_check_no_btf(const struct bpf_map *map, return -ENOTSUPP; } -static int map_check_btf(const struct bpf_map *map, const struct btf *btf, +static int map_check_btf(struct bpf_map *map, const struct btf *btf, u32 btf_key_id, u32 btf_value_id) { const struct btf_type *key_type, *value_type; @@ -478,6 +478,21 @@ static int map_check_btf(const struct bpf_map *map, const struct btf *btf, if (!value_type || value_size != map->value_size) return -EINVAL; + map->spin_lock_off = btf_find_spin_lock(btf, value_type); + + if (map_value_has_spin_lock(map)) { + if (map->map_type != BPF_MAP_TYPE_HASH && + map->map_type != BPF_MAP_TYPE_ARRAY) + return -ENOTSUPP; + if (map->spin_lock_off + sizeof(struct bpf_spin_lock) > + map->value_size) { + WARN_ONCE(1, + "verifier bug spin_lock_off %d value_size %d\n", + map->spin_lock_off, map->value_size); + return -EFAULT; + } + } + if (map->ops->map_check_btf) ret = map->ops->map_check_btf(map, btf, key_type, value_type); @@ -542,6 +557,8 @@ static int map_create(union bpf_attr *attr) map->btf = btf; map->btf_key_type_id = attr->btf_key_type_id; map->btf_value_type_id = attr->btf_value_type_id; + } else { + map->spin_lock_off = -EINVAL; } err = security_bpf_map_alloc(map); @@ -740,7 +757,7 @@ static int map_lookup_elem(union bpf_attr *attr) err = -ENOENT; } else { err = 0; - memcpy(value, ptr, value_size); + copy_map_value(map, value, ptr); } rcu_read_unlock(); } diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 8c1c21cd50b4..38892bdee651 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -213,6 +213,7 @@ struct bpf_call_arg_meta { s64 msize_smax_value; u64 msize_umax_value; int ptr_id; + int func_id; }; static DEFINE_MUTEX(bpf_verifier_lock); @@ -351,6 +352,12 @@ static bool reg_is_refcounted(const struct bpf_reg_state *reg) return type_is_refcounted(reg->type); } +static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg) +{ + return reg->type == PTR_TO_MAP_VALUE && + map_value_has_spin_lock(reg->map_ptr); +} + static bool reg_is_refcounted_or_null(const struct bpf_reg_state *reg) { return type_is_refcounted_or_null(reg->type); @@ -712,6 +719,7 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state, } dst_state->speculative = src->speculative; dst_state->curframe = src->curframe; + dst_state->active_spin_lock = src->active_spin_lock; for (i = 0; i <= src->curframe; i++) { dst = dst_state->frame[i]; if (!dst) { @@ -1483,6 +1491,21 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno, if (err) verbose(env, "R%d max value is outside of the array range\n", regno); + + if (map_value_has_spin_lock(reg->map_ptr)) { + u32 lock = reg->map_ptr->spin_lock_off; + + /* if any part of struct bpf_spin_lock can be touched by + * load/store reject this program. + * To check that [x1, x2) overlaps with [y1, y2) + * it is sufficient to check x1 < y2 && y1 < x2. + */ + if (reg->smin_value + off < lock + sizeof(struct bpf_spin_lock) && + lock < reg->umax_value + off + size) { + verbose(env, "bpf_spin_lock cannot be accessed directly by load/store\n"); + return -EACCES; + } + } return err; } @@ -2192,6 +2215,91 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno, } } +/* Implementation details: + * bpf_map_lookup returns PTR_TO_MAP_VALUE_OR_NULL + * Two bpf_map_lookups (even with the same key) will have different reg->id. + * For traditional PTR_TO_MAP_VALUE the verifier clears reg->id after + * value_or_null->value transition, since the verifier only cares about + * the range of access to valid map value pointer and doesn't care about actual + * address of the map element. + * For maps with 'struct bpf_spin_lock' inside map value the verifier keeps + * reg->id > 0 after value_or_null->value transition. By doing so + * two bpf_map_lookups will be considered two different pointers that + * point to different bpf_spin_locks. + * The verifier allows taking only one bpf_spin_lock at a time to avoid + * dead-locks. + * Since only one bpf_spin_lock is allowed the checks are simpler than + * reg_is_refcounted() logic. The verifier needs to remember only + * one spin_lock instead of array of acquired_refs. + * cur_state->active_spin_lock remembers which map value element got locked + * and clears it after bpf_spin_unlock. + */ +static int process_spin_lock(struct bpf_verifier_env *env, int regno, + bool is_lock) +{ + struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno]; + struct bpf_verifier_state *cur = env->cur_state; + bool is_const = tnum_is_const(reg->var_off); + struct bpf_map *map = reg->map_ptr; + u64 val = reg->var_off.value; + + if (reg->type != PTR_TO_MAP_VALUE) { + verbose(env, "R%d is not a pointer to map_value\n", regno); + return -EINVAL; + } + if (!is_const) { + verbose(env, + "R%d doesn't have constant offset. bpf_spin_lock has to be at the constant offset\n", + regno); + return -EINVAL; + } + if (!map->btf) { + verbose(env, + "map '%s' has to have BTF in order to use bpf_spin_lock\n", + map->name); + return -EINVAL; + } + if (!map_value_has_spin_lock(map)) { + if (map->spin_lock_off == -E2BIG) + verbose(env, + "map '%s' has more than one 'struct bpf_spin_lock'\n", + map->name); + else if (map->spin_lock_off == -ENOENT) + verbose(env, + "map '%s' doesn't have 'struct bpf_spin_lock'\n", + map->name); + else + verbose(env, + "map '%s' is not a struct type or bpf_spin_lock is mangled\n", + map->name); + return -EINVAL; + } + if (map->spin_lock_off != val + reg->off) { + verbose(env, "off %lld doesn't point to 'struct bpf_spin_lock'\n", + val + reg->off); + return -EINVAL; + } + if (is_lock) { + if (cur->active_spin_lock) { + verbose(env, + "Locking two bpf_spin_locks are not allowed\n"); + return -EINVAL; + } + cur->active_spin_lock = reg->id; + } else { + if (!cur->active_spin_lock) { + verbose(env, "bpf_spin_unlock without taking a lock\n"); + return -EINVAL; + } + if (cur->active_spin_lock != reg->id) { + verbose(env, "bpf_spin_unlock of different lock\n"); + return -EINVAL; + } + cur->active_spin_lock = 0; + } + return 0; +} + static bool arg_type_is_mem_ptr(enum bpf_arg_type type) { return type == ARG_PTR_TO_MEM || @@ -2268,6 +2376,17 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno, return -EFAULT; } meta->ptr_id = reg->id; + } else if (arg_type == ARG_PTR_TO_SPIN_LOCK) { + if (meta->func_id == BPF_FUNC_spin_lock) { + if (process_spin_lock(env, regno, true)) + return -EACCES; + } else if (meta->func_id == BPF_FUNC_spin_unlock) { + if (process_spin_lock(env, regno, false)) + return -EACCES; + } else { + verbose(env, "verifier internal error\n"); + return -EFAULT; + } } else if (arg_type_is_mem_ptr(arg_type)) { expected_type = PTR_TO_STACK; /* One exception here. In case function allows for NULL to be @@ -2887,6 +3006,7 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn return err; } + meta.func_id = func_id; /* check args */ err = check_func_arg(env, BPF_REG_1, fn->arg1_type, &meta); if (err) @@ -4473,7 +4593,8 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state, } else if (reg->type == PTR_TO_SOCKET_OR_NULL) { reg->type = PTR_TO_SOCKET; } - if (is_null || !reg_is_refcounted(reg)) { + if (is_null || !(reg_is_refcounted(reg) || + reg_may_point_to_spin_lock(reg))) { /* We don't need id from this point onwards anymore, * thus we should better reset it, so that state * pruning has chances to take effect. @@ -4871,6 +4992,11 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn) return err; } + if (env->cur_state->active_spin_lock) { + verbose(env, "BPF_LD_[ABS|IND] cannot be used inside bpf_spin_lock-ed region\n"); + return -EINVAL; + } + if (regs[BPF_REG_6].type != PTR_TO_CTX) { verbose(env, "at the time of BPF_LD_ABS|IND R6 != pointer to skb\n"); @@ -5607,8 +5733,11 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur, case PTR_TO_MAP_VALUE: /* If the new min/max/var_off satisfy the old ones and * everything else matches, we are OK. - * We don't care about the 'id' value, because nothing - * uses it for PTR_TO_MAP_VALUE (only for ..._OR_NULL) + * 'id' is not compared, since it's only used for maps with + * bpf_spin_lock inside map element and in such cases if + * the rest of the prog is valid for one map element then + * it's valid for all map elements regardless of the key + * used in bpf_map_lookup() */ return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 && range_within(rold, rcur) && @@ -5811,6 +5940,9 @@ static bool states_equal(struct bpf_verifier_env *env, if (old->speculative && !cur->speculative) return false; + if (old->active_spin_lock != cur->active_spin_lock) + return false; + /* for states to be equal callsites have to be the same * and all frame states need to be equivalent */ @@ -6229,6 +6361,12 @@ static int do_check(struct bpf_verifier_env *env) return -EINVAL; } + if (env->cur_state->active_spin_lock && + (insn->src_reg == BPF_PSEUDO_CALL || + insn->imm != BPF_FUNC_spin_unlock)) { + verbose(env, "function calls are not allowed while holding a lock\n"); + return -EINVAL; + } if (insn->src_reg == BPF_PSEUDO_CALL) err = check_func_call(env, insn, &env->insn_idx); else @@ -6259,6 +6397,11 @@ static int do_check(struct bpf_verifier_env *env) return -EINVAL; } + if (env->cur_state->active_spin_lock) { + verbose(env, "bpf_spin_unlock is missing\n"); + return -EINVAL; + } + if (state->curframe) { /* exit from nested function */ env->prev_insn_idx = env->insn_idx; @@ -6356,6 +6499,19 @@ static int check_map_prealloc(struct bpf_map *map) !(map->map_flags & BPF_F_NO_PREALLOC); } +static bool is_tracing_prog_type(enum bpf_prog_type type) +{ + switch (type) { + case BPF_PROG_TYPE_KPROBE: + case BPF_PROG_TYPE_TRACEPOINT: + case BPF_PROG_TYPE_PERF_EVENT: + case BPF_PROG_TYPE_RAW_TRACEPOINT: + return true; + default: + return false; + } +} + static int check_map_prog_compatibility(struct bpf_verifier_env *env, struct bpf_map *map, struct bpf_prog *prog) @@ -6378,6 +6534,13 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env, } } + if ((is_tracing_prog_type(prog->type) || + prog->type == BPF_PROG_TYPE_SOCKET_FILTER) && + map_value_has_spin_lock(map)) { + verbose(env, "tracing progs cannot use bpf_spin_lock yet\n"); + return -EINVAL; + } + if ((bpf_prog_is_dev_bound(prog->aux) || bpf_map_is_dev_bound(map)) && !bpf_offload_prog_map_match(prog, map)) { verbose(env, "offload device mismatch between prog and map\n"); diff --git a/net/core/filter.c b/net/core/filter.c index 41984ad4b9b4..3a49f68eda10 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5314,10 +5314,20 @@ bpf_base_func_proto(enum bpf_func_id func_id) return &bpf_tail_call_proto; case BPF_FUNC_ktime_get_ns: return &bpf_ktime_get_ns_proto; + default: + break; + } + + if (!capable(CAP_SYS_ADMIN)) + return NULL; + + switch (func_id) { + case BPF_FUNC_spin_lock: + return &bpf_spin_lock_proto; + case BPF_FUNC_spin_unlock: + return &bpf_spin_unlock_proto; case BPF_FUNC_trace_printk: - if (capable(CAP_SYS_ADMIN)) - return bpf_get_trace_printk_proto(); - /* else, fall through */ + return bpf_get_trace_printk_proto(); default: return NULL; } -- cgit v1.2.3 From 96049f3afd50fe8db69fa0068cdca822e747b1e4 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 31 Jan 2019 15:40:09 -0800 Subject: bpf: introduce BPF_F_LOCK flag Introduce BPF_F_LOCK flag for map_lookup and map_update syscall commands and for map_update() helper function. In all these cases take a lock of existing element (which was provided in BTF description) before copying (in or out) the rest of map value. Implementation details that are part of uapi: Array: The array map takes the element lock for lookup/update. Hash: hash map also takes the lock for lookup/update and tries to avoid the bucket lock. If old element exists it takes the element lock and updates the element in place. If element doesn't exist it allocates new one and inserts into hash table while holding the bucket lock. In rare case the hashmap has to take both the bucket lock and the element lock to update old value in place. Cgroup local storage: It is similar to array. update in place and lookup are done with lock taken. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- include/linux/bpf.h | 2 ++ include/uapi/linux/bpf.h | 1 + kernel/bpf/arraymap.c | 24 ++++++++++++++++-------- kernel/bpf/hashtab.c | 42 +++++++++++++++++++++++++++++++++++++++--- kernel/bpf/helpers.c | 16 ++++++++++++++++ kernel/bpf/local_storage.c | 14 +++++++++++++- kernel/bpf/syscall.c | 25 +++++++++++++++++++++++-- 7 files changed, 110 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 2ae615b48bb8..bd169a7bcc93 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -119,6 +119,8 @@ static inline void copy_map_value(struct bpf_map *map, void *dst, void *src) memcpy(dst, src, map->value_size); } } +void copy_map_value_locked(struct bpf_map *map, void *dst, void *src, + bool lock_src); struct bpf_offload_dev; struct bpf_offloaded_map; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 86f7c438d40f..1777fa0c61e4 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -267,6 +267,7 @@ enum bpf_attach_type { #define BPF_ANY 0 /* create new element or update existing */ #define BPF_NOEXIST 1 /* create new element if it didn't exist */ #define BPF_EXIST 2 /* update existing element */ +#define BPF_F_LOCK 4 /* spin_lock-ed map_lookup/map_update */ /* flags for BPF_MAP_CREATE command */ #define BPF_F_NO_PREALLOC (1U << 0) diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index d6d979910a2a..c72e0d8e1e65 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -253,8 +253,9 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value, { struct bpf_array *array = container_of(map, struct bpf_array, map); u32 index = *(u32 *)key; + char *val; - if (unlikely(map_flags > BPF_EXIST)) + if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST)) /* unknown flags */ return -EINVAL; @@ -262,18 +263,25 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value, /* all elements were pre-allocated, cannot insert a new one */ return -E2BIG; - if (unlikely(map_flags == BPF_NOEXIST)) + if (unlikely(map_flags & BPF_NOEXIST)) /* all elements already exist */ return -EEXIST; - if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) + if (unlikely((map_flags & BPF_F_LOCK) && + !map_value_has_spin_lock(map))) + return -EINVAL; + + if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { memcpy(this_cpu_ptr(array->pptrs[index & array->index_mask]), value, map->value_size); - else - copy_map_value(map, - array->value + - array->elem_size * (index & array->index_mask), - value); + } else { + val = array->value + + array->elem_size * (index & array->index_mask); + if (map_flags & BPF_F_LOCK) + copy_map_value_locked(map, val, value, false); + else + copy_map_value(map, val, value); + } return 0; } diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 6d3b22c5ad68..937776531998 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -804,11 +804,11 @@ dec_count: static int check_flags(struct bpf_htab *htab, struct htab_elem *l_old, u64 map_flags) { - if (l_old && map_flags == BPF_NOEXIST) + if (l_old && (map_flags & ~BPF_F_LOCK) == BPF_NOEXIST) /* elem already exists */ return -EEXIST; - if (!l_old && map_flags == BPF_EXIST) + if (!l_old && (map_flags & ~BPF_F_LOCK) == BPF_EXIST) /* elem doesn't exist, cannot update it */ return -ENOENT; @@ -827,7 +827,7 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value, u32 key_size, hash; int ret; - if (unlikely(map_flags > BPF_EXIST)) + if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST)) /* unknown flags */ return -EINVAL; @@ -840,6 +840,28 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value, b = __select_bucket(htab, hash); head = &b->head; + if (unlikely(map_flags & BPF_F_LOCK)) { + if (unlikely(!map_value_has_spin_lock(map))) + return -EINVAL; + /* find an element without taking the bucket lock */ + l_old = lookup_nulls_elem_raw(head, hash, key, key_size, + htab->n_buckets); + ret = check_flags(htab, l_old, map_flags); + if (ret) + return ret; + if (l_old) { + /* grab the element lock and update value in place */ + copy_map_value_locked(map, + l_old->key + round_up(key_size, 8), + value, false); + return 0; + } + /* fall through, grab the bucket lock and lookup again. + * 99.9% chance that the element won't be found, + * but second lookup under lock has to be done. + */ + } + /* bpf_map_update_elem() can be called in_irq() */ raw_spin_lock_irqsave(&b->lock, flags); @@ -849,6 +871,20 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value, if (ret) goto err; + if (unlikely(l_old && (map_flags & BPF_F_LOCK))) { + /* first lookup without the bucket lock didn't find the element, + * but second lookup with the bucket lock found it. + * This case is highly unlikely, but has to be dealt with: + * grab the element lock in addition to the bucket lock + * and update element in place + */ + copy_map_value_locked(map, + l_old->key + round_up(key_size, 8), + value, false); + ret = 0; + goto err; + } + l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false, l_old); if (IS_ERR(l_new)) { diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index fbe544761628..a411fc17d265 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -301,6 +301,22 @@ const struct bpf_func_proto bpf_spin_unlock_proto = { .arg1_type = ARG_PTR_TO_SPIN_LOCK, }; +void copy_map_value_locked(struct bpf_map *map, void *dst, void *src, + bool lock_src) +{ + struct bpf_spin_lock *lock; + + if (lock_src) + lock = src + map->spin_lock_off; + else + lock = dst + map->spin_lock_off; + preempt_disable(); + ____bpf_spin_lock(lock); + copy_map_value(map, dst, src); + ____bpf_spin_unlock(lock); + preempt_enable(); +} + #ifdef CONFIG_CGROUPS BPF_CALL_0(bpf_get_current_cgroup_id) { diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c index 0295427f06e2..6b572e2de7fb 100644 --- a/kernel/bpf/local_storage.c +++ b/kernel/bpf/local_storage.c @@ -131,7 +131,14 @@ static int cgroup_storage_update_elem(struct bpf_map *map, void *_key, struct bpf_cgroup_storage *storage; struct bpf_storage_buffer *new; - if (flags != BPF_ANY && flags != BPF_EXIST) + if (unlikely(flags & ~(BPF_F_LOCK | BPF_EXIST | BPF_NOEXIST))) + return -EINVAL; + + if (unlikely(flags & BPF_NOEXIST)) + return -EINVAL; + + if (unlikely((flags & BPF_F_LOCK) && + !map_value_has_spin_lock(map))) return -EINVAL; storage = cgroup_storage_lookup((struct bpf_cgroup_storage_map *)map, @@ -139,6 +146,11 @@ static int cgroup_storage_update_elem(struct bpf_map *map, void *_key, if (!storage) return -ENOENT; + if (flags & BPF_F_LOCK) { + copy_map_value_locked(map, storage->buf->data, value, false); + return 0; + } + new = kmalloc_node(sizeof(struct bpf_storage_buffer) + map->value_size, __GFP_ZERO | GFP_ATOMIC | __GFP_NOWARN, diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index b29e6dc44650..0834958f1dc4 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -682,7 +682,7 @@ static void *__bpf_copy_key(void __user *ukey, u64 key_size) } /* last field in 'union bpf_attr' used by this command */ -#define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value +#define BPF_MAP_LOOKUP_ELEM_LAST_FIELD flags static int map_lookup_elem(union bpf_attr *attr) { @@ -698,6 +698,9 @@ static int map_lookup_elem(union bpf_attr *attr) if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM)) return -EINVAL; + if (attr->flags & ~BPF_F_LOCK) + return -EINVAL; + f = fdget(ufd); map = __bpf_map_get(f); if (IS_ERR(map)) @@ -708,6 +711,12 @@ static int map_lookup_elem(union bpf_attr *attr) goto err_put; } + if ((attr->flags & BPF_F_LOCK) && + !map_value_has_spin_lock(map)) { + err = -EINVAL; + goto err_put; + } + key = __bpf_copy_key(ukey, map->key_size); if (IS_ERR(key)) { err = PTR_ERR(key); @@ -758,7 +767,13 @@ static int map_lookup_elem(union bpf_attr *attr) err = -ENOENT; } else { err = 0; - copy_map_value(map, value, ptr); + if (attr->flags & BPF_F_LOCK) + /* lock 'ptr' and copy everything but lock */ + copy_map_value_locked(map, value, ptr, true); + else + copy_map_value(map, value, ptr); + /* mask lock, since value wasn't zero inited */ + check_and_init_map_lock(map, value); } rcu_read_unlock(); } @@ -818,6 +833,12 @@ static int map_update_elem(union bpf_attr *attr) goto err_put; } + if ((attr->flags & BPF_F_LOCK) && + !map_value_has_spin_lock(map)) { + err = -EINVAL; + goto err_put; + } + key = __bpf_copy_key(ukey, map->key_size); if (IS_ERR(key)) { err = PTR_ERR(key); -- cgit v1.2.3 From b8580e9de48bf32b884910d22330ef2fa027cf01 Mon Sep 17 00:00:00 2001 From: Shunyong Yang Date: Fri, 1 Feb 2019 17:11:14 -0600 Subject: PCI: Add HXT vendor ID Add the HXT vendor ID to pci_ids.h. Signed-off-by: Shunyong Yang [bhelgaas: split to separate patch] Signed-off-by: Bjorn Helgaas Reviewed-by: Sinan Kaya CC: Joey Zheng --- include/linux/pci_ids.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 5eaf39dbc388..26420e619dd7 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2573,6 +2573,8 @@ #define PCI_VENDOR_ID_HYGON 0x1d94 +#define PCI_VENDOR_ID_HXT 0x1dbf + #define PCI_VENDOR_ID_TEKRAM 0x1de1 #define PCI_DEVICE_ID_TEKRAM_DC290 0xdc29 -- cgit v1.2.3 From 0ce26a1c31ca928df4dfc7504c8898b71ff9f5d5 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 1 Feb 2019 17:24:52 -0600 Subject: PCI: Move Rohm Vendor ID to generic list Move the Rohm Vendor ID to pci_ids.h instead of defining it in several drivers. Signed-off-by: Andy Shevchenko Signed-off-by: Bjorn Helgaas Acked-by: Mark Brown Acked-by: Linus Walleij --- drivers/dma/pch_dma.c | 1 - drivers/gpio/gpio-ml-ioh.c | 2 -- drivers/gpio/gpio-pch.c | 1 - drivers/i2c/busses/i2c-eg20t.c | 1 - drivers/misc/pch_phub.c | 1 - drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c | 7 ++----- drivers/spi/spi-topcliff-pch.c | 1 - drivers/tty/serial/pch_uart.c | 2 -- drivers/usb/gadget/udc/pch_udc.c | 1 - include/linux/pci_ids.h | 2 ++ 10 files changed, 4 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/drivers/dma/pch_dma.c b/drivers/dma/pch_dma.c index afd8f27bda96..538b6e0e17bb 100644 --- a/drivers/dma/pch_dma.c +++ b/drivers/dma/pch_dma.c @@ -972,7 +972,6 @@ static void pch_dma_remove(struct pci_dev *pdev) } /* PCI Device ID of DMA device */ -#define PCI_VENDOR_ID_ROHM 0x10DB #define PCI_DEVICE_ID_EG20T_PCH_DMA_8CH 0x8810 #define PCI_DEVICE_ID_EG20T_PCH_DMA_4CH 0x8815 #define PCI_DEVICE_ID_ML7213_DMA1_8CH 0x8026 diff --git a/drivers/gpio/gpio-ml-ioh.c b/drivers/gpio/gpio-ml-ioh.c index 51c7d1b84c2e..0c076dce9e17 100644 --- a/drivers/gpio/gpio-ml-ioh.c +++ b/drivers/gpio/gpio-ml-ioh.c @@ -31,8 +31,6 @@ #define IOH_IRQ_BASE 0 -#define PCI_VENDOR_ID_ROHM 0x10DB - struct ioh_reg_comn { u32 ien; u32 istatus; diff --git a/drivers/gpio/gpio-pch.c b/drivers/gpio/gpio-pch.c index ee79e5f88b5a..1d99293096f2 100644 --- a/drivers/gpio/gpio-pch.c +++ b/drivers/gpio/gpio-pch.c @@ -437,7 +437,6 @@ static int __maybe_unused pch_gpio_resume(struct device *dev) static SIMPLE_DEV_PM_OPS(pch_gpio_pm_ops, pch_gpio_suspend, pch_gpio_resume); -#define PCI_VENDOR_ID_ROHM 0x10DB static const struct pci_device_id pch_gpio_pcidev_id[] = { { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x8803) }, { PCI_DEVICE(PCI_VENDOR_ID_ROHM, 0x8014) }, diff --git a/drivers/i2c/busses/i2c-eg20t.c b/drivers/i2c/busses/i2c-eg20t.c index 835d54ac2971..231675b10376 100644 --- a/drivers/i2c/busses/i2c-eg20t.c +++ b/drivers/i2c/busses/i2c-eg20t.c @@ -177,7 +177,6 @@ static wait_queue_head_t pch_event; static DEFINE_MUTEX(pch_mutex); /* Definition for ML7213 by LAPIS Semiconductor */ -#define PCI_VENDOR_ID_ROHM 0x10DB #define PCI_DEVICE_ID_ML7213_I2C 0x802D #define PCI_DEVICE_ID_ML7223_I2C 0x8010 #define PCI_DEVICE_ID_ML7831_I2C 0x8817 diff --git a/drivers/misc/pch_phub.c b/drivers/misc/pch_phub.c index 540845651b8c..309703e9c42e 100644 --- a/drivers/misc/pch_phub.c +++ b/drivers/misc/pch_phub.c @@ -64,7 +64,6 @@ #define CLKCFG_UARTCLKSEL (1 << 18) /* Macros for ML7213 */ -#define PCI_VENDOR_ID_ROHM 0x10db #define PCI_DEVICE_ID_ROHM_ML7213_PHUB 0x801A /* Macros for ML7223 */ diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c index 43c0c10dfeb7..3a4225837049 100644 --- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c +++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c @@ -27,7 +27,6 @@ #define DRV_VERSION "1.01" const char pch_driver_version[] = DRV_VERSION; -#define PCI_DEVICE_ID_INTEL_IOH1_GBE 0x8802 /* Pci device ID */ #define PCH_GBE_MAR_ENTRIES 16 #define PCH_GBE_SHORT_PKT 64 #define DSC_INIT16 0xC000 @@ -37,11 +36,9 @@ const char pch_driver_version[] = DRV_VERSION; #define PCH_GBE_PCI_BAR 1 #define PCH_GBE_RESERVE_MEMORY 0x200000 /* 2MB */ -/* Macros for ML7223 */ -#define PCI_VENDOR_ID_ROHM 0x10db -#define PCI_DEVICE_ID_ROHM_ML7223_GBE 0x8013 +#define PCI_DEVICE_ID_INTEL_IOH1_GBE 0x8802 -/* Macros for ML7831 */ +#define PCI_DEVICE_ID_ROHM_ML7223_GBE 0x8013 #define PCI_DEVICE_ID_ROHM_ML7831_GBE 0x8802 #define PCH_GBE_TX_WEIGHT 64 diff --git a/drivers/spi/spi-topcliff-pch.c b/drivers/spi/spi-topcliff-pch.c index 97d137591b18..d794180e83dc 100644 --- a/drivers/spi/spi-topcliff-pch.c +++ b/drivers/spi/spi-topcliff-pch.c @@ -92,7 +92,6 @@ #define PCH_MAX_SPBR 1023 /* Definition for ML7213/ML7223/ML7831 by LAPIS Semiconductor */ -#define PCI_VENDOR_ID_ROHM 0x10DB #define PCI_DEVICE_ID_ML7213_SPI 0x802c #define PCI_DEVICE_ID_ML7223_SPI 0x800F #define PCI_DEVICE_ID_ML7831_SPI 0x8816 diff --git a/drivers/tty/serial/pch_uart.c b/drivers/tty/serial/pch_uart.c index 9ed121f08a54..6157213a8359 100644 --- a/drivers/tty/serial/pch_uart.c +++ b/drivers/tty/serial/pch_uart.c @@ -192,8 +192,6 @@ enum { #define PCH_UART_HAL_LOOP (PCH_UART_MCR_LOOP) #define PCH_UART_HAL_AFE (PCH_UART_MCR_AFE) -#define PCI_VENDOR_ID_ROHM 0x10DB - #define BOTH_EMPTY (UART_LSR_TEMT | UART_LSR_THRE) #define DEFAULT_UARTCLK 1843200 /* 1.8432 MHz */ diff --git a/drivers/usb/gadget/udc/pch_udc.c b/drivers/usb/gadget/udc/pch_udc.c index 55c8c8abeacd..cded51f36fc1 100644 --- a/drivers/usb/gadget/udc/pch_udc.c +++ b/drivers/usb/gadget/udc/pch_udc.c @@ -368,7 +368,6 @@ struct pch_udc_dev { #define PCI_DEVICE_ID_INTEL_QUARK_X1000_UDC 0x0939 #define PCI_DEVICE_ID_INTEL_EG20T_UDC 0x8808 -#define PCI_VENDOR_ID_ROHM 0x10DB #define PCI_DEVICE_ID_ML7213_IOH_UDC 0x801D #define PCI_DEVICE_ID_ML7831_IOH_UDC 0x8808 diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 26420e619dd7..70e86148cb1e 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1140,6 +1140,8 @@ #define PCI_VENDOR_ID_TCONRAD 0x10da #define PCI_DEVICE_ID_TCONRAD_TOKENRING 0x0508 +#define PCI_VENDOR_ID_ROHM 0x10db + #define PCI_VENDOR_ID_NVIDIA 0x10de #define PCI_DEVICE_ID_NVIDIA_TNT 0x0020 #define PCI_DEVICE_ID_NVIDIA_TNT2 0x0028 -- cgit v1.2.3 From f38ab20b749da84e3df1f8c9240ddc791b0d5983 Mon Sep 17 00:00:00 2001 From: Daniel Drake Date: Thu, 20 Dec 2018 14:59:33 +0800 Subject: iio: st_accel: use ACPI orientation data Platform-specific ST accelerometer mount matrix information can be provided by returning a package of 6 integers from the ACPI _ONT method. This has been seen on Acer products such as Veriton Z4860G, Z6860G and A890, which include a ST SMO8840 sensor. We have also confirmed experimentally that the Windows driver uses such information. The _ONT data format was explained by a ST vendor contact. However, strangely enough, the _ONT transformations must be applied after first applying another mount matrix which we determined experimentally. ST have not commented on why this is the case, but we imagine that perhaps earlier devices (before _ONT was introduced) required this translation and hence it became 'standard.' Interpret the _ONT data and export the equivalent mount matrix to userspace. If no _ONT data is present, no mount matrix is exported. Signed-off-by: Daniel Drake Signed-off-by: Jonathan Cameron --- drivers/iio/accel/st_accel_core.c | 171 +++++++++++++++++++++++++++++++++- include/linux/iio/common/st_sensors.h | 1 + 2 files changed, 171 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/iio/accel/st_accel_core.c b/drivers/iio/accel/st_accel_core.c index f7b471121508..a3c0916479fa 100644 --- a/drivers/iio/accel/st_accel_core.c +++ b/drivers/iio/accel/st_accel_core.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -918,12 +919,167 @@ static const struct iio_trigger_ops st_accel_trigger_ops = { #define ST_ACCEL_TRIGGER_OPS NULL #endif +static const struct iio_mount_matrix * +get_mount_matrix(const struct iio_dev *indio_dev, + const struct iio_chan_spec *chan) +{ + struct st_sensor_data *adata = iio_priv(indio_dev); + + return adata->mount_matrix; +} + +static const struct iio_chan_spec_ext_info mount_matrix_ext_info[] = { + IIO_MOUNT_MATRIX(IIO_SHARED_BY_ALL, get_mount_matrix), + { }, +}; + +/* Read ST-specific _ONT orientation data from ACPI and generate an + * appropriate mount matrix. + */ +static int apply_acpi_orientation(struct iio_dev *indio_dev, + struct iio_chan_spec *channels) +{ +#ifdef CONFIG_ACPI + struct st_sensor_data *adata = iio_priv(indio_dev); + struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL}; + struct acpi_device *adev; + union acpi_object *ont; + union acpi_object *elements; + acpi_status status; + int ret = -EINVAL; + unsigned int val; + int i, j; + int final_ont[3][3] = { { 0 }, }; + + /* For some reason, ST's _ONT translation does not apply directly + * to the data read from the sensor. Another translation must be + * performed first, as described by the matrix below. Perhaps + * ST required this specific translation for the first product + * where the device was mounted? + */ + const int default_ont[3][3] = { + { 0, 1, 0 }, + { -1, 0, 0 }, + { 0, 0, -1 }, + }; + + + adev = ACPI_COMPANION(adata->dev); + if (!adev) + return 0; + + /* Read _ONT data, which should be a package of 6 integers. */ + status = acpi_evaluate_object(adev->handle, "_ONT", NULL, &buffer); + if (status == AE_NOT_FOUND) { + return 0; + } else if (ACPI_FAILURE(status)) { + dev_warn(&indio_dev->dev, "failed to execute _ONT: %d\n", + status); + return status; + } + + ont = buffer.pointer; + if (ont->type != ACPI_TYPE_PACKAGE || ont->package.count != 6) + goto out; + + /* The first 3 integers provide axis order information. + * e.g. 0 1 2 would indicate normal X,Y,Z ordering. + * e.g. 1 0 2 indicates that data arrives in order Y,X,Z. + */ + elements = ont->package.elements; + for (i = 0; i < 3; i++) { + if (elements[i].type != ACPI_TYPE_INTEGER) + goto out; + + val = elements[i].integer.value; + if (val < 0 || val > 2) + goto out; + + /* Avoiding full matrix multiplication, we simply reorder the + * columns in the default_ont matrix according to the + * ordering provided by _ONT. + */ + final_ont[0][i] = default_ont[0][val]; + final_ont[1][i] = default_ont[1][val]; + final_ont[2][i] = default_ont[2][val]; + } + + /* The final 3 integers provide sign flip information. + * 0 means no change, 1 means flip. + * e.g. 0 0 1 means that Z data should be sign-flipped. + * This is applied after the axis reordering from above. + */ + elements += 3; + for (i = 0; i < 3; i++) { + if (elements[i].type != ACPI_TYPE_INTEGER) + goto out; + + val = elements[i].integer.value; + if (val != 0 && val != 1) + goto out; + if (!val) + continue; + + /* Flip the values in the indicated column */ + final_ont[0][i] *= -1; + final_ont[1][i] *= -1; + final_ont[2][i] *= -1; + } + + /* Convert our integer matrix to a string-based iio_mount_matrix */ + adata->mount_matrix = devm_kmalloc(&indio_dev->dev, + sizeof(*adata->mount_matrix), + GFP_KERNEL); + if (!adata->mount_matrix) { + ret = -ENOMEM; + goto out; + } + + for (i = 0; i < 3; i++) { + for (j = 0; j < 3; j++) { + int matrix_val = final_ont[i][j]; + char *str_value; + + switch (matrix_val) { + case -1: + str_value = "-1"; + break; + case 0: + str_value = "0"; + break; + case 1: + str_value = "1"; + break; + default: + goto out; + } + adata->mount_matrix->rotation[i * 3 + j] = str_value; + } + } + + /* Expose the mount matrix via ext_info */ + for (i = 0; i < indio_dev->num_channels; i++) + channels[i].ext_info = mount_matrix_ext_info; + + ret = 0; + dev_info(&indio_dev->dev, "computed mount matrix from ACPI\n"); + +out: + kfree(buffer.pointer); + return ret; +#else /* !CONFIG_ACPI */ + return 0; +#endif +} + int st_accel_common_probe(struct iio_dev *indio_dev) { struct st_sensor_data *adata = iio_priv(indio_dev); struct st_sensors_platform_data *pdata = (struct st_sensors_platform_data *)adata->dev->platform_data; int irq = adata->get_irq_data_ready(indio_dev); + struct iio_chan_spec *channels; + size_t channels_size; int err; indio_dev->modes = INDIO_DIRECT_MODE; @@ -942,9 +1098,22 @@ int st_accel_common_probe(struct iio_dev *indio_dev) adata->num_data_channels = ST_ACCEL_NUMBER_DATA_CHANNELS; adata->multiread_bit = adata->sensor_settings->multi_read_bit; - indio_dev->channels = adata->sensor_settings->ch; indio_dev->num_channels = ST_SENSORS_NUMBER_ALL_CHANNELS; + channels_size = indio_dev->num_channels * sizeof(struct iio_chan_spec); + channels = devm_kmemdup(&indio_dev->dev, + adata->sensor_settings->ch, + channels_size, GFP_KERNEL); + if (!channels) { + err = -ENOMEM; + goto st_accel_power_off; + } + + if (apply_acpi_orientation(indio_dev, channels)) + dev_warn(&indio_dev->dev, + "failed to apply ACPI orientation data: %d\n", err); + + indio_dev->channels = channels; adata->current_fullscale = (struct st_sensor_fullscale_avl *) &adata->sensor_settings->fs.fs_avl[0]; adata->odr = adata->sensor_settings->odr.odr_avl[0].hz; diff --git a/include/linux/iio/common/st_sensors.h b/include/linux/iio/common/st_sensors.h index 8092b8e7f37e..45e9667f0a8c 100644 --- a/include/linux/iio/common/st_sensors.h +++ b/include/linux/iio/common/st_sensors.h @@ -260,6 +260,7 @@ struct st_sensor_settings { struct st_sensor_data { struct device *dev; struct iio_trigger *trig; + struct iio_mount_matrix *mount_matrix; struct st_sensor_settings *sensor_settings; struct st_sensor_fullscale_avl *current_fullscale; struct regulator *vdd; -- cgit v1.2.3 From d5d30d5a5c60628de5e77e3f292a8f9012d51350 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sat, 2 Feb 2019 16:35:26 -0800 Subject: libnvdimm/dimm: Add a no-BLK quirk based on NVDIMM family As Dexuan reports the NVDIMM_FAMILY_HYPERV platform is incompatible with the existing Linux namespace implementation because it uses NSLABEL_FLAG_LOCAL for x1-width PMEM interleave sets. Quirk it as an platform / DIMM that does not provide BLK-aperture access. Allow the libnvdimm core to assume no potential for aliasing. In case other implementations make the same mistake, provide a "noblk" module parameter to force-enable the quirk. Link: https://lkml.kernel.org/r/PU1P153MB0169977604493B82B662A01CBF920@PU1P153MB0169.APCP153.PROD.OUTLOOK.COM Reported-by: Dexuan Cui Tested-by: Dexuan Cui Signed-off-by: Dan Williams --- drivers/acpi/nfit/core.c | 4 ++++ drivers/nvdimm/dimm_devs.c | 7 +++++++ drivers/nvdimm/label.c | 3 +++ drivers/nvdimm/namespace_devs.c | 6 ++++++ drivers/nvdimm/region_devs.c | 7 +++++++ include/linux/libnvdimm.h | 2 ++ 6 files changed, 29 insertions(+) (limited to 'include/linux') diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index 4a7e8b1fa43b..811c399a3a76 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -2016,6 +2016,10 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc) cmd_mask |= nfit_mem->dsm_mask & NVDIMM_STANDARD_CMDMASK; } + /* Quirk to ignore LOCAL for labels on HYPERV DIMMs */ + if (nfit_mem->family == NVDIMM_FAMILY_HYPERV) + set_bit(NDD_NOBLK, &flags); + if (test_bit(NFIT_MEM_LSR, &nfit_mem->flags)) { set_bit(ND_CMD_GET_CONFIG_SIZE, &cmd_mask); set_bit(ND_CMD_GET_CONFIG_DATA, &cmd_mask); diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c index 4890310df874..553aa78abeee 100644 --- a/drivers/nvdimm/dimm_devs.c +++ b/drivers/nvdimm/dimm_devs.c @@ -11,6 +11,7 @@ * General Public License for more details. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include #include #include #include @@ -25,6 +26,10 @@ static DEFINE_IDA(dimm_ida); +static bool noblk; +module_param(noblk, bool, 0444); +MODULE_PARM_DESC(noblk, "force disable BLK / local alias support"); + /* * Retrieve bus and dimm handle and return if this bus supports * get_config_data commands @@ -551,6 +556,8 @@ struct nvdimm *__nvdimm_create(struct nvdimm_bus *nvdimm_bus, nvdimm->dimm_id = dimm_id; nvdimm->provider_data = provider_data; + if (noblk) + flags |= 1 << NDD_NOBLK; nvdimm->flags = flags; nvdimm->cmd_mask = cmd_mask; nvdimm->num_flush = num_flush; diff --git a/drivers/nvdimm/label.c b/drivers/nvdimm/label.c index 6d6e9a12150b..f3d753d3169c 100644 --- a/drivers/nvdimm/label.c +++ b/drivers/nvdimm/label.c @@ -392,6 +392,7 @@ int nd_label_reserve_dpa(struct nvdimm_drvdata *ndd) return 0; /* no label, nothing to reserve */ for_each_clear_bit_le(slot, free, nslot) { + struct nvdimm *nvdimm = to_nvdimm(ndd->dev); struct nd_namespace_label *nd_label; struct nd_region *nd_region = NULL; u8 label_uuid[NSLABEL_UUID_LEN]; @@ -406,6 +407,8 @@ int nd_label_reserve_dpa(struct nvdimm_drvdata *ndd) memcpy(label_uuid, nd_label->uuid, NSLABEL_UUID_LEN); flags = __le32_to_cpu(nd_label->flags); + if (test_bit(NDD_NOBLK, &nvdimm->flags)) + flags &= ~NSLABEL_FLAG_LOCAL; nd_label_gen_id(&label_id, label_uuid, flags); res = nvdimm_allocate_dpa(ndd, &label_id, __le64_to_cpu(nd_label->dpa), diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c index 4b077555ac70..3677b0c4a33d 100644 --- a/drivers/nvdimm/namespace_devs.c +++ b/drivers/nvdimm/namespace_devs.c @@ -2492,6 +2492,12 @@ static int init_active_labels(struct nd_region *nd_region) if (!label_ent) break; label = nd_label_active(ndd, j); + if (test_bit(NDD_NOBLK, &nvdimm->flags)) { + u32 flags = __le32_to_cpu(label->flags); + + flags &= ~NSLABEL_FLAG_LOCAL; + label->flags = __cpu_to_le32(flags); + } label_ent->label = label; mutex_lock(&nd_mapping->lock); diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c index e2818f94f292..3b58baa44b5c 100644 --- a/drivers/nvdimm/region_devs.c +++ b/drivers/nvdimm/region_devs.c @@ -1003,6 +1003,13 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus, if (test_bit(NDD_UNARMED, &nvdimm->flags)) ro = 1; + + if (test_bit(NDD_NOBLK, &nvdimm->flags) + && dev_type == &nd_blk_device_type) { + dev_err(&nvdimm_bus->dev, "%s: %s mapping%d is not BLK capable\n", + caller, dev_name(&nvdimm->dev), i); + return NULL; + } } if (dev_type == &nd_blk_device_type) { diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index 5440f11b0907..7da406ae3a2b 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -42,6 +42,8 @@ enum { NDD_SECURITY_OVERWRITE = 3, /* tracking whether or not there is a pending device reference */ NDD_WORK_PENDING = 4, + /* ignore / filter NSLABEL_FLAG_LOCAL for this DIMM, i.e. no aliasing */ + NDD_NOBLK = 5, /* need to set a limit somewhere, but yes, this is likely overkill */ ND_IOCTL_MAX_BUFLEN = SZ_4M, -- cgit v1.2.3 From 79a4e91d1bb2a411a4ce2baa93680fa707567003 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Sat, 2 Feb 2019 19:50:17 -0800 Subject: device.h: Add __cold to dev_ logging functions Add __cold to the dev_ logging functions similar to the use of __cold in the generic printk function. Using __cold moves all the dev_ logging functions out-of-line possibly improving code locality and runtime performance. Signed-off-by: Joe Perches Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 0ab0a3a80ec3..a36830e2d0e5 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -1384,28 +1384,28 @@ void device_link_remove(void *consumer, struct device *supplier); #ifdef CONFIG_PRINTK -__printf(3, 0) +__printf(3, 0) __cold int dev_vprintk_emit(int level, const struct device *dev, const char *fmt, va_list args); -__printf(3, 4) +__printf(3, 4) __cold int dev_printk_emit(int level, const struct device *dev, const char *fmt, ...); -__printf(3, 4) +__printf(3, 4) __cold void dev_printk(const char *level, const struct device *dev, const char *fmt, ...); -__printf(2, 3) +__printf(2, 3) __cold void _dev_emerg(const struct device *dev, const char *fmt, ...); -__printf(2, 3) +__printf(2, 3) __cold void _dev_alert(const struct device *dev, const char *fmt, ...); -__printf(2, 3) +__printf(2, 3) __cold void _dev_crit(const struct device *dev, const char *fmt, ...); -__printf(2, 3) +__printf(2, 3) __cold void _dev_err(const struct device *dev, const char *fmt, ...); -__printf(2, 3) +__printf(2, 3) __cold void _dev_warn(const struct device *dev, const char *fmt, ...); -__printf(2, 3) +__printf(2, 3) __cold void _dev_notice(const struct device *dev, const char *fmt, ...); -__printf(2, 3) +__printf(2, 3) __cold void _dev_info(const struct device *dev, const char *fmt, ...); #else -- cgit v1.2.3 From dda7a817f2873a0e0b1c7fde1265758f3623daa4 Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Tue, 22 Jan 2019 08:48:49 +0200 Subject: net/mlx5: Add XRC transport to ODP device capabilities layout The device capabilities for ODP structure was missing the field for XRC transport so add it here. Signed-off-by: Moni Shoua Reviewed-by: Majd Dibbiny Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 35fe5217b244..5407db8ba8e1 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -831,7 +831,9 @@ struct mlx5_ifc_odp_cap_bits { struct mlx5_ifc_odp_per_transport_service_cap_bits ud_odp_caps; - u8 reserved_at_e0[0x720]; + struct mlx5_ifc_odp_per_transport_service_cap_bits xrc_odp_caps; + + u8 reserved_at_100[0x700]; }; struct mlx5_ifc_calc_op { -- cgit v1.2.3 From 46861e3e88be18846971792b763eaf520a91a802 Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Tue, 22 Jan 2019 08:48:51 +0200 Subject: net/mlx5: Set ODP SRQ support in firmware To avoid compatibility issue with older kernels the firmware doesn't allow SRQ to work with ODP unless kernel asks for it. Signed-off-by: Moni Shoua Reviewed-by: Majd Dibbiny Signed-off-by: Leon Romanovsky --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 53 ++++++++++++++++++++++++++ include/linux/mlx5/device.h | 3 ++ include/linux/mlx5/mlx5_ifc.h | 1 + 3 files changed, 57 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 085e1133b8d5..e38aa206ab6d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -459,6 +459,53 @@ static int handle_hca_cap_atomic(struct mlx5_core_dev *dev) return err; } +static int handle_hca_cap_odp(struct mlx5_core_dev *dev) +{ + void *set_ctx; + void *set_hca_cap; + int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in); + int err; + + if (!MLX5_CAP_GEN(dev, pg)) + return 0; + + err = mlx5_core_get_caps(dev, MLX5_CAP_ODP); + if (err) + return err; + + /** + * If all bits are cleared we shouldn't try to set it + * or we might fail while trying to access a reserved bit. + */ + if (!(MLX5_CAP_ODP_MAX(dev, ud_odp_caps.srq_receive) || + MLX5_CAP_ODP_MAX(dev, rc_odp_caps.srq_receive) || + MLX5_CAP_ODP_MAX(dev, xrc_odp_caps.srq_receive))) + return 0; + + set_ctx = kzalloc(set_sz, GFP_KERNEL); + if (!set_ctx) + return -ENOMEM; + + set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability); + memcpy(set_hca_cap, dev->caps.hca_cur[MLX5_CAP_ODP], + MLX5_ST_SZ_BYTES(odp_cap)); + + /* set ODP SRQ support for RC/UD and XRC transports */ + MLX5_SET(odp_cap, set_hca_cap, ud_odp_caps.srq_receive, + (MLX5_CAP_ODP_MAX(dev, ud_odp_caps.srq_receive))); + + MLX5_SET(odp_cap, set_hca_cap, rc_odp_caps.srq_receive, + (MLX5_CAP_ODP_MAX(dev, rc_odp_caps.srq_receive))); + + MLX5_SET(odp_cap, set_hca_cap, xrc_odp_caps.srq_receive, + (MLX5_CAP_ODP_MAX(dev, xrc_odp_caps.srq_receive))); + + err = set_caps(dev, set_ctx, set_sz, MLX5_SET_HCA_CAP_OP_MOD_ODP); + + kfree(set_ctx); + return err; +} + static int handle_hca_cap(struct mlx5_core_dev *dev) { void *set_ctx = NULL; @@ -931,6 +978,12 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, goto reclaim_boot_pages; } + err = handle_hca_cap_odp(dev); + if (err) { + dev_err(&pdev->dev, "handle_hca_cap_odp failed\n"); + goto reclaim_boot_pages; + } + err = mlx5_satisfy_startup_pages(dev, 0); if (err) { dev_err(&pdev->dev, "failed to allocate init pages\n"); diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 8c4a820bd4c1..0845a227a7b2 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1201,6 +1201,9 @@ enum mlx5_qcam_feature_groups { #define MLX5_CAP_ODP(mdev, cap)\ MLX5_GET(odp_cap, mdev->caps.hca_cur[MLX5_CAP_ODP], cap) +#define MLX5_CAP_ODP_MAX(mdev, cap)\ + MLX5_GET(odp_cap, mdev->caps.hca_max[MLX5_CAP_ODP], cap) + #define MLX5_CAP_VECTOR_CALC(mdev, cap) \ MLX5_GET(vector_calc_cap, \ mdev->caps.hca_cur[MLX5_CAP_VECTOR_CALC], cap) diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 5407db8ba8e1..c5c679390fbd 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -72,6 +72,7 @@ enum { enum { MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE = 0x0, + MLX5_SET_HCA_CAP_OP_MOD_ODP = 0x2, MLX5_SET_HCA_CAP_OP_MOD_ATOMIC = 0x3, }; -- cgit v1.2.3 From 13c6ee2a921683bae4bb4ba57b1f5b82f49e6b8a Mon Sep 17 00:00:00 2001 From: Deepa Dinamani Date: Sat, 2 Feb 2019 07:34:48 -0800 Subject: socket: Use old_timeval types for socket timestamps As part of y2038 solution, all internal uses of struct timeval are replaced by struct __kernel_old_timeval and struct compat_timeval by struct old_timeval32. Make socket timestamps use these new types. This is mainly to be able to verify that the kernel build is y2038 safe when such non y2038 safe types are not supported anymore. Signed-off-by: Deepa Dinamani Acked-by: Willem de Bruijn Cc: isdn@linux-pingi.de Signed-off-by: David S. Miller --- drivers/isdn/mISDN/socket.c | 2 +- include/linux/skbuff.h | 6 +++--- net/bluetooth/hci_sock.c | 4 ++-- net/compat.c | 6 +++--- net/ipv4/tcp.c | 2 +- net/rds/recv.c | 2 +- net/socket.c | 2 +- 7 files changed, 12 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/drivers/isdn/mISDN/socket.c b/drivers/isdn/mISDN/socket.c index 15d3ca37669a..4ab8b1b6608f 100644 --- a/drivers/isdn/mISDN/socket.c +++ b/drivers/isdn/mISDN/socket.c @@ -103,7 +103,7 @@ mISDN_ctrl(struct mISDNchannel *ch, u_int cmd, void *arg) static inline void mISDN_sock_cmsg(struct sock *sk, struct msghdr *msg, struct sk_buff *skb) { - struct timeval tv; + struct __kernel_old_timeval tv; if (_pms(sk)->cmask & MISDN_TIME_STAMP) { skb_get_timestamp(skb, &tv); diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index c34595374e93..4001611a4c9f 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3486,16 +3486,16 @@ static inline ktime_t skb_get_ktime(const struct sk_buff *skb) /** * skb_get_timestamp - get timestamp from a skb * @skb: skb to get stamp from - * @stamp: pointer to struct timeval to store stamp in + * @stamp: pointer to struct __kernel_old_timeval to store stamp in * * Timestamps are stored in the skb as offsets to a base timestamp. * This function converts the offset back to a struct timeval and stores * it in stamp. */ static inline void skb_get_timestamp(const struct sk_buff *skb, - struct timeval *stamp) + struct __kernel_old_timeval *stamp) { - *stamp = ktime_to_timeval(skb->tstamp); + *stamp = ns_to_kernel_old_timeval(skb->tstamp); } static inline void skb_get_timestampns(const struct sk_buff *skb, diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 1506e1632394..65228bfa4487 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -1383,9 +1383,9 @@ static void hci_sock_cmsg(struct sock *sk, struct msghdr *msg, if (mask & HCI_CMSG_TSTAMP) { #ifdef CONFIG_COMPAT - struct compat_timeval ctv; + struct old_timeval32 ctv; #endif - struct timeval tv; + struct __kernel_old_timeval tv; void *data; int len; diff --git a/net/compat.c b/net/compat.c index ccf93cd0e49b..9629f053d4fa 100644 --- a/net/compat.c +++ b/net/compat.c @@ -209,8 +209,8 @@ int put_cmsg_compat(struct msghdr *kmsg, int level, int type, int len, void *dat { struct compat_cmsghdr __user *cm = (struct compat_cmsghdr __user *) kmsg->msg_control; struct compat_cmsghdr cmhdr; - struct compat_timeval ctv; - struct compat_timespec cts[3]; + struct old_timeval32 ctv; + struct old_timespec32 cts[3]; int cmlen; if (cm == NULL || kmsg->msg_controllen < sizeof(*cm)) { @@ -220,7 +220,7 @@ int put_cmsg_compat(struct msghdr *kmsg, int level, int type, int len, void *dat if (!COMPAT_USE_64BIT_TIME) { if (level == SOL_SOCKET && type == SO_TIMESTAMP_OLD) { - struct timeval *tv = (struct timeval *)data; + struct __kernel_old_timeval *tv = (struct __kernel_old_timeval *)data; ctv.tv_sec = tv->tv_sec; ctv.tv_usec = tv->tv_usec; data = &ctv; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index e29aec59cad1..3ce41b04c0f0 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1861,7 +1861,7 @@ static void tcp_update_recv_tstamps(struct sk_buff *skb, static void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk, struct scm_timestamping *tss) { - struct timeval tv; + struct __kernel_old_timeval tv; bool has_timestamping = false; if (tss->ts[0].tv_sec || tss->ts[0].tv_nsec) { diff --git a/net/rds/recv.c b/net/rds/recv.c index 04e30d63a159..435bf2320cd3 100644 --- a/net/rds/recv.c +++ b/net/rds/recv.c @@ -549,7 +549,7 @@ static int rds_cmsg_recv(struct rds_incoming *inc, struct msghdr *msg, if ((inc->i_rx_tstamp != 0) && sock_flag(rds_rs_to_sk(rs), SOCK_RCVTSTAMP)) { - struct timeval tv = ktime_to_timeval(inc->i_rx_tstamp); + struct __kernel_old_timeval tv = ns_to_kernel_old_timeval(inc->i_rx_tstamp); ret = put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_OLD, sizeof(tv), &tv); if (ret) diff --git a/net/socket.c b/net/socket.c index 5087f9e40f3a..9cc281cdb9d9 100644 --- a/net/socket.c +++ b/net/socket.c @@ -719,7 +719,7 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, if (need_software_tstamp) { if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) { - struct timeval tv; + struct __kernel_old_timeval tv; skb_get_timestamp(skb, &tv); put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_OLD, sizeof(tv), &tv); -- cgit v1.2.3 From 887feae36aee6c08e0dafcdaa5ba921abbb2c56b Mon Sep 17 00:00:00 2001 From: Deepa Dinamani Date: Sat, 2 Feb 2019 07:34:50 -0800 Subject: socket: Add SO_TIMESTAMP[NS]_NEW Add SO_TIMESTAMP_NEW and SO_TIMESTAMPNS_NEW variants of socket timestamp options. These are the y2038 safe versions of the SO_TIMESTAMP_OLD and SO_TIMESTAMPNS_OLD for all architectures. Note that the format of scm_timestamping.ts[0] is not changed in this patch. Signed-off-by: Deepa Dinamani Acked-by: Willem de Bruijn Cc: jejb@parisc-linux.org Cc: ralf@linux-mips.org Cc: rth@twiddle.net Cc: linux-alpha@vger.kernel.org Cc: linux-mips@linux-mips.org Cc: linux-parisc@vger.kernel.org Cc: linux-rdma@vger.kernel.org Cc: netdev@vger.kernel.org Cc: sparclinux@vger.kernel.org Signed-off-by: David S. Miller --- arch/alpha/include/uapi/asm/socket.h | 14 ++++++++++++-- arch/mips/include/uapi/asm/socket.h | 14 ++++++++++++-- arch/parisc/include/uapi/asm/socket.h | 14 ++++++++++++-- arch/sparc/include/uapi/asm/socket.h | 14 ++++++++++++-- include/linux/skbuff.h | 18 ++++++++++++++++++ include/net/sock.h | 1 + include/uapi/asm-generic/socket.h | 15 +++++++++++++-- net/core/sock.c | 21 +++++++++++++++++++-- net/ipv4/tcp.c | 33 +++++++++++++++++++++++++-------- net/rds/af_rds.c | 8 ++++++-- net/rds/recv.c | 16 ++++++++++++++-- net/socket.c | 35 +++++++++++++++++++++++++++-------- 12 files changed, 171 insertions(+), 32 deletions(-) (limited to 'include/linux') diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h index 992a0a6dcea1..aab11eec7c22 100644 --- a/arch/alpha/include/uapi/asm/socket.h +++ b/arch/alpha/include/uapi/asm/socket.h @@ -3,6 +3,7 @@ #define _UAPI_ASM_SOCKET_H #include +#include /* For setsockopt(2) */ /* @@ -114,10 +115,19 @@ #define SO_TIMESTAMPNS_OLD 35 #define SO_TIMESTAMPING_OLD 37 +#define SO_TIMESTAMP_NEW 63 +#define SO_TIMESTAMPNS_NEW 64 + #if !defined(__KERNEL__) -#define SO_TIMESTAMP SO_TIMESTAMP_OLD -#define SO_TIMESTAMPNS SO_TIMESTAMPNS_OLD +#if __BITS_PER_LONG == 64 +#define SO_TIMESTAMP SO_TIMESTAMP_OLD +#define SO_TIMESTAMPNS SO_TIMESTAMPNS_OLD +#else +#define SO_TIMESTAMP (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMP_OLD : SO_TIMESTAMP_NEW) +#define SO_TIMESTAMPNS (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMPNS_OLD : SO_TIMESTAMPNS_NEW) +#endif + #define SO_TIMESTAMPING SO_TIMESTAMPING_OLD #define SCM_TIMESTAMP SO_TIMESTAMP diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h index 0f4516c34df2..11014f684d9f 100644 --- a/arch/mips/include/uapi/asm/socket.h +++ b/arch/mips/include/uapi/asm/socket.h @@ -11,6 +11,7 @@ #define _UAPI_ASM_SOCKET_H #include +#include /* * For setsockopt(2) @@ -125,10 +126,19 @@ #define SO_TIMESTAMPNS_OLD 35 #define SO_TIMESTAMPING_OLD 37 +#define SO_TIMESTAMP_NEW 63 +#define SO_TIMESTAMPNS_NEW 64 + #if !defined(__KERNEL__) -#define SO_TIMESTAMP SO_TIMESTAMP_OLD -#define SO_TIMESTAMPNS SO_TIMESTAMPNS_OLD +#if __BITS_PER_LONG == 64 +#define SO_TIMESTAMP SO_TIMESTAMP_OLD +#define SO_TIMESTAMPNS SO_TIMESTAMPNS_OLD +#else +#define SO_TIMESTAMP (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMP_OLD : SO_TIMESTAMP_NEW) +#define SO_TIMESTAMPNS (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMPNS_OLD : SO_TIMESTAMPNS_NEW) +#endif + #define SO_TIMESTAMPING SO_TIMESTAMPING_OLD #define SCM_TIMESTAMP SO_TIMESTAMP diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h index 7c180321ebd6..cbc4b89c2fe4 100644 --- a/arch/parisc/include/uapi/asm/socket.h +++ b/arch/parisc/include/uapi/asm/socket.h @@ -3,6 +3,7 @@ #define _UAPI_ASM_SOCKET_H #include +#include /* For setsockopt(2) */ #define SOL_SOCKET 0xffff @@ -106,10 +107,19 @@ #define SO_TIMESTAMPNS_OLD 0x4013 #define SO_TIMESTAMPING_OLD 0x4020 +#define SO_TIMESTAMP_NEW 0x4038 +#define SO_TIMESTAMPNS_NEW 0x4039 + #if !defined(__KERNEL__) -#define SO_TIMESTAMP SO_TIMESTAMP_OLD -#define SO_TIMESTAMPNS SO_TIMESTAMPNS_OLD +#if __BITS_PER_LONG == 64 +#define SO_TIMESTAMP SO_TIMESTAMP_OLD +#define SO_TIMESTAMPNS SO_TIMESTAMPNS_OLD +#else +#define SO_TIMESTAMP (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMP_OLD : SO_TIMESTAMP_NEW) +#define SO_TIMESTAMPNS (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMPNS_OLD : SO_TIMESTAMPNS_NEW) +#endif + #define SO_TIMESTAMPING SO_TIMESTAMPING_OLD #define SCM_TIMESTAMP SO_TIMESTAMP diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h index d8a1bbc3e6c4..85127425b294 100644 --- a/arch/sparc/include/uapi/asm/socket.h +++ b/arch/sparc/include/uapi/asm/socket.h @@ -3,6 +3,7 @@ #define _ASM_SOCKET_H #include +#include /* For setsockopt(2) */ #define SOL_SOCKET 0xffff @@ -107,10 +108,19 @@ #define SO_TIMESTAMPNS_OLD 0x0021 #define SO_TIMESTAMPING_OLD 0x0023 +#define SO_TIMESTAMP_NEW 0x0041 +#define SO_TIMESTAMPNS_NEW 0x0042 + #if !defined(__KERNEL__) -#define SO_TIMESTAMP SO_TIMESTAMP_OLD -#define SO_TIMESTAMPNS SO_TIMESTAMPNS_OLD +#if __BITS_PER_LONG == 64 +#define SO_TIMESTAMP SO_TIMESTAMP_OLD +#define SO_TIMESTAMPNS SO_TIMESTAMPNS_OLD +#else +#define SO_TIMESTAMP (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMP_OLD : SO_TIMESTAMP_NEW) +#define SO_TIMESTAMPNS (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMPNS_OLD : SO_TIMESTAMPNS_NEW) +#endif + #define SO_TIMESTAMPING SO_TIMESTAMPING_OLD #define SCM_TIMESTAMP SO_TIMESTAMP diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 4001611a4c9f..831846617d07 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3498,12 +3498,30 @@ static inline void skb_get_timestamp(const struct sk_buff *skb, *stamp = ns_to_kernel_old_timeval(skb->tstamp); } +static inline void skb_get_new_timestamp(const struct sk_buff *skb, + struct __kernel_sock_timeval *stamp) +{ + struct timespec64 ts = ktime_to_timespec64(skb->tstamp); + + stamp->tv_sec = ts.tv_sec; + stamp->tv_usec = ts.tv_nsec / 1000; +} + static inline void skb_get_timestampns(const struct sk_buff *skb, struct timespec *stamp) { *stamp = ktime_to_timespec(skb->tstamp); } +static inline void skb_get_new_timestampns(const struct sk_buff *skb, + struct __kernel_timespec *stamp) +{ + struct timespec64 ts = ktime_to_timespec64(skb->tstamp); + + stamp->tv_sec = ts.tv_sec; + stamp->tv_nsec = ts.tv_nsec; +} + static inline void __net_timestamp(struct sk_buff *skb) { skb->tstamp = ktime_get_real(); diff --git a/include/net/sock.h b/include/net/sock.h index 2b229f7be8eb..6679f3c120b0 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -805,6 +805,7 @@ enum sock_flags { SOCK_RCU_FREE, /* wait rcu grace period in sk_destruct() */ SOCK_TXTIME, SOCK_XDP, /* XDP is attached */ + SOCK_TSTAMP_NEW, /* Indicates 64 bit timestamps always */ }; #define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE)) diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h index 4ef3aed31fb7..f22d3f7162f8 100644 --- a/include/uapi/asm-generic/socket.h +++ b/include/uapi/asm-generic/socket.h @@ -3,6 +3,7 @@ #define __ASM_GENERIC_SOCKET_H #include +#include /* For setsockopt(2) */ #define SOL_SOCKET 1 @@ -109,10 +110,20 @@ #define SO_TIMESTAMPNS_OLD 35 #define SO_TIMESTAMPING_OLD 37 +#define SO_TIMESTAMP_NEW 63 +#define SO_TIMESTAMPNS_NEW 64 + #if !defined(__KERNEL__) -#define SO_TIMESTAMP SO_TIMESTAMP_OLD -#define SO_TIMESTAMPNS SO_TIMESTAMPNS_OLD +#if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__)) +/* on 64-bit and x32, avoid the ?: operator */ +#define SO_TIMESTAMP SO_TIMESTAMP_OLD +#define SO_TIMESTAMPNS SO_TIMESTAMPNS_OLD +#else +#define SO_TIMESTAMP (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMP_OLD : SO_TIMESTAMP_NEW) +#define SO_TIMESTAMPNS (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMPNS_OLD : SO_TIMESTAMPNS_NEW) +#endif + #define SO_TIMESTAMPING SO_TIMESTAMPING_OLD #define SCM_TIMESTAMP SO_TIMESTAMP diff --git a/net/core/sock.c b/net/core/sock.c index d5ca8641968f..14b987eab10c 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -868,9 +868,16 @@ set_rcvbuf: break; case SO_TIMESTAMP_OLD: + case SO_TIMESTAMP_NEW: case SO_TIMESTAMPNS_OLD: + case SO_TIMESTAMPNS_NEW: if (valbool) { - if (optname == SO_TIMESTAMP_OLD) + if (optname == SO_TIMESTAMP_NEW || optname == SO_TIMESTAMPNS_NEW) + sock_set_flag(sk, SOCK_TSTAMP_NEW); + else + sock_reset_flag(sk, SOCK_TSTAMP_NEW); + + if (optname == SO_TIMESTAMP_OLD || optname == SO_TIMESTAMP_NEW) sock_reset_flag(sk, SOCK_RCVTSTAMPNS); else sock_set_flag(sk, SOCK_RCVTSTAMPNS); @@ -879,6 +886,7 @@ set_rcvbuf: } else { sock_reset_flag(sk, SOCK_RCVTSTAMP); sock_reset_flag(sk, SOCK_RCVTSTAMPNS); + sock_reset_flag(sk, SOCK_TSTAMP_NEW); } break; @@ -1245,11 +1253,20 @@ int sock_getsockopt(struct socket *sock, int level, int optname, case SO_TIMESTAMP_OLD: v.val = sock_flag(sk, SOCK_RCVTSTAMP) && + !sock_flag(sk, SOCK_TSTAMP_NEW) && !sock_flag(sk, SOCK_RCVTSTAMPNS); break; case SO_TIMESTAMPNS_OLD: - v.val = sock_flag(sk, SOCK_RCVTSTAMPNS); + v.val = sock_flag(sk, SOCK_RCVTSTAMPNS) && !sock_flag(sk, SOCK_TSTAMP_NEW); + break; + + case SO_TIMESTAMP_NEW: + v.val = sock_flag(sk, SOCK_RCVTSTAMP) && sock_flag(sk, SOCK_TSTAMP_NEW); + break; + + case SO_TIMESTAMPNS_NEW: + v.val = sock_flag(sk, SOCK_RCVTSTAMPNS) && sock_flag(sk, SOCK_TSTAMP_NEW); break; case SO_TIMESTAMPING_OLD: diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 3ce41b04c0f0..4e9388bf104a 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1861,20 +1861,37 @@ static void tcp_update_recv_tstamps(struct sk_buff *skb, static void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk, struct scm_timestamping *tss) { - struct __kernel_old_timeval tv; + int new_tstamp = sock_flag(sk, SOCK_TSTAMP_NEW); bool has_timestamping = false; if (tss->ts[0].tv_sec || tss->ts[0].tv_nsec) { if (sock_flag(sk, SOCK_RCVTSTAMP)) { if (sock_flag(sk, SOCK_RCVTSTAMPNS)) { - put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_OLD, - sizeof(tss->ts[0]), &tss->ts[0]); + if (new_tstamp) { + struct __kernel_timespec kts = {tss->ts[0].tv_sec, tss->ts[0].tv_nsec}; + + put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_NEW, + sizeof(kts), &kts); + } else { + put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_OLD, + sizeof(tss->ts[0]), &tss->ts[0]); + } } else { - tv.tv_sec = tss->ts[0].tv_sec; - tv.tv_usec = tss->ts[0].tv_nsec / 1000; - - put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_OLD, - sizeof(tv), &tv); + if (new_tstamp) { + struct __kernel_sock_timeval stv; + + stv.tv_sec = tss->ts[0].tv_sec; + stv.tv_usec = tss->ts[0].tv_nsec / 1000; + put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_NEW, + sizeof(stv), &stv); + } else { + struct __kernel_old_timeval tv; + + tv.tv_sec = tss->ts[0].tv_sec; + tv.tv_usec = tss->ts[0].tv_nsec / 1000; + put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_OLD, + sizeof(tv), &tv); + } } } diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c index eeb4639adbe5..65571a6273c3 100644 --- a/net/rds/af_rds.c +++ b/net/rds/af_rds.c @@ -348,7 +348,7 @@ static int rds_set_transport(struct rds_sock *rs, char __user *optval, } static int rds_enable_recvtstamp(struct sock *sk, char __user *optval, - int optlen) + int optlen, int optname) { int val, valbool; @@ -360,6 +360,9 @@ static int rds_enable_recvtstamp(struct sock *sk, char __user *optval, valbool = val ? 1 : 0; + if (optname == SO_TIMESTAMP_NEW) + sock_set_flag(sk, SOCK_TSTAMP_NEW); + if (valbool) sock_set_flag(sk, SOCK_RCVTSTAMP); else @@ -431,8 +434,9 @@ static int rds_setsockopt(struct socket *sock, int level, int optname, release_sock(sock->sk); break; case SO_TIMESTAMP_OLD: + case SO_TIMESTAMP_NEW: lock_sock(sock->sk); - ret = rds_enable_recvtstamp(sock->sk, optval, optlen); + ret = rds_enable_recvtstamp(sock->sk, optval, optlen, optname); release_sock(sock->sk); break; case SO_RDS_MSG_RXPATH_LATENCY: diff --git a/net/rds/recv.c b/net/rds/recv.c index 435bf2320cd3..6bb6b16ca270 100644 --- a/net/rds/recv.c +++ b/net/rds/recv.c @@ -550,8 +550,20 @@ static int rds_cmsg_recv(struct rds_incoming *inc, struct msghdr *msg, if ((inc->i_rx_tstamp != 0) && sock_flag(rds_rs_to_sk(rs), SOCK_RCVTSTAMP)) { struct __kernel_old_timeval tv = ns_to_kernel_old_timeval(inc->i_rx_tstamp); - ret = put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_OLD, - sizeof(tv), &tv); + + if (!sock_flag(rds_rs_to_sk(rs), SOCK_TSTAMP_NEW)) { + ret = put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_OLD, + sizeof(tv), &tv); + } else { + struct __kernel_sock_timeval sk_tv; + + sk_tv.tv_sec = tv.tv_sec; + sk_tv.tv_usec = tv.tv_usec; + + ret = put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_NEW, + sizeof(sk_tv), &sk_tv); + } + if (ret) goto out; } diff --git a/net/socket.c b/net/socket.c index 9cc281cdb9d9..1de96abd78d3 100644 --- a/net/socket.c +++ b/net/socket.c @@ -705,6 +705,7 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) { int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP); + int new_tstamp = sock_flag(sk, SOCK_TSTAMP_NEW); struct scm_timestamping tss; int empty = 1, false_tstamp = 0; struct skb_shared_hwtstamps *shhwtstamps = @@ -719,15 +720,33 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, if (need_software_tstamp) { if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) { - struct __kernel_old_timeval tv; - skb_get_timestamp(skb, &tv); - put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_OLD, - sizeof(tv), &tv); + if (new_tstamp) { + struct __kernel_sock_timeval tv; + + skb_get_new_timestamp(skb, &tv); + put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_NEW, + sizeof(tv), &tv); + } else { + struct __kernel_old_timeval tv; + + skb_get_timestamp(skb, &tv); + put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_OLD, + sizeof(tv), &tv); + } } else { - struct timespec ts; - skb_get_timestampns(skb, &ts); - put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_OLD, - sizeof(ts), &ts); + if (new_tstamp) { + struct __kernel_timespec ts; + + skb_get_new_timestampns(skb, &ts); + put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_NEW, + sizeof(ts), &ts); + } else { + struct timespec ts; + + skb_get_timestampns(skb, &ts); + put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_OLD, + sizeof(ts), &ts); + } } } -- cgit v1.2.3 From 9718475e69084de15c3930ce35672a7dc6da866b Mon Sep 17 00:00:00 2001 From: Deepa Dinamani Date: Sat, 2 Feb 2019 07:34:51 -0800 Subject: socket: Add SO_TIMESTAMPING_NEW Add SO_TIMESTAMPING_NEW variant of socket timestamp options. This is the y2038 safe versions of the SO_TIMESTAMPING_OLD for all architectures. Signed-off-by: Deepa Dinamani Acked-by: Willem de Bruijn Cc: chris@zankel.net Cc: fenghua.yu@intel.com Cc: rth@twiddle.net Cc: tglx@linutronix.de Cc: ubraun@linux.ibm.com Cc: linux-alpha@vger.kernel.org Cc: linux-arch@vger.kernel.org Cc: linux-ia64@vger.kernel.org Cc: linux-mips@linux-mips.org Cc: linux-s390@vger.kernel.org Cc: linux-xtensa@linux-xtensa.org Cc: sparclinux@vger.kernel.org Signed-off-by: David S. Miller --- arch/alpha/include/uapi/asm/socket.h | 5 +++-- arch/mips/include/uapi/asm/socket.h | 5 +++-- arch/parisc/include/uapi/asm/socket.h | 5 +++-- arch/sparc/include/uapi/asm/socket.h | 5 +++-- include/linux/socket.h | 8 ++++++++ include/uapi/asm-generic/socket.h | 5 +++-- include/uapi/linux/errqueue.h | 4 ++++ net/core/scm.c | 27 +++++++++++++++++++++++++++ net/core/sock.c | 8 +++++++- net/ipv4/tcp.c | 30 +++++++++++++++++------------- net/smc/af_smc.c | 3 ++- net/socket.c | 13 ++++++++----- 12 files changed, 88 insertions(+), 30 deletions(-) (limited to 'include/linux') diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h index aab11eec7c22..934ea6268f1a 100644 --- a/arch/alpha/include/uapi/asm/socket.h +++ b/arch/alpha/include/uapi/asm/socket.h @@ -117,19 +117,20 @@ #define SO_TIMESTAMP_NEW 63 #define SO_TIMESTAMPNS_NEW 64 +#define SO_TIMESTAMPING_NEW 65 #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 #define SO_TIMESTAMP SO_TIMESTAMP_OLD #define SO_TIMESTAMPNS SO_TIMESTAMPNS_OLD +#define SO_TIMESTAMPING SO_TIMESTAMPING_OLD #else #define SO_TIMESTAMP (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMP_OLD : SO_TIMESTAMP_NEW) #define SO_TIMESTAMPNS (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMPNS_OLD : SO_TIMESTAMPNS_NEW) +#define SO_TIMESTAMPING (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMPING_OLD : SO_TIMESTAMPING_NEW) #endif -#define SO_TIMESTAMPING SO_TIMESTAMPING_OLD - #define SCM_TIMESTAMP SO_TIMESTAMP #define SCM_TIMESTAMPNS SO_TIMESTAMPNS #define SCM_TIMESTAMPING SO_TIMESTAMPING diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h index 11014f684d9f..110f9506d64f 100644 --- a/arch/mips/include/uapi/asm/socket.h +++ b/arch/mips/include/uapi/asm/socket.h @@ -128,19 +128,20 @@ #define SO_TIMESTAMP_NEW 63 #define SO_TIMESTAMPNS_NEW 64 +#define SO_TIMESTAMPING_NEW 65 #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 #define SO_TIMESTAMP SO_TIMESTAMP_OLD #define SO_TIMESTAMPNS SO_TIMESTAMPNS_OLD +#define SO_TIMESTAMPING SO_TIMESTAMPING_OLD #else #define SO_TIMESTAMP (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMP_OLD : SO_TIMESTAMP_NEW) #define SO_TIMESTAMPNS (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMPNS_OLD : SO_TIMESTAMPNS_NEW) +#define SO_TIMESTAMPING (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMPING_OLD : SO_TIMESTAMPING_NEW) #endif -#define SO_TIMESTAMPING SO_TIMESTAMPING_OLD - #define SCM_TIMESTAMP SO_TIMESTAMP #define SCM_TIMESTAMPNS SO_TIMESTAMPNS #define SCM_TIMESTAMPING SO_TIMESTAMPING diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h index cbc4b89c2fe4..bee2a9dde656 100644 --- a/arch/parisc/include/uapi/asm/socket.h +++ b/arch/parisc/include/uapi/asm/socket.h @@ -109,19 +109,20 @@ #define SO_TIMESTAMP_NEW 0x4038 #define SO_TIMESTAMPNS_NEW 0x4039 +#define SO_TIMESTAMPING_NEW 0x403A #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 #define SO_TIMESTAMP SO_TIMESTAMP_OLD #define SO_TIMESTAMPNS SO_TIMESTAMPNS_OLD +#define SO_TIMESTAMPING SO_TIMESTAMPING_OLD #else #define SO_TIMESTAMP (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMP_OLD : SO_TIMESTAMP_NEW) #define SO_TIMESTAMPNS (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMPNS_OLD : SO_TIMESTAMPNS_NEW) +#define SO_TIMESTAMPING (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMPING_OLD : SO_TIMESTAMPING_NEW) #endif -#define SO_TIMESTAMPING SO_TIMESTAMPING_OLD - #define SCM_TIMESTAMP SO_TIMESTAMP #define SCM_TIMESTAMPNS SO_TIMESTAMPNS #define SCM_TIMESTAMPING SO_TIMESTAMPING diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h index 85127425b294..2b38dda51426 100644 --- a/arch/sparc/include/uapi/asm/socket.h +++ b/arch/sparc/include/uapi/asm/socket.h @@ -110,19 +110,20 @@ #define SO_TIMESTAMP_NEW 0x0041 #define SO_TIMESTAMPNS_NEW 0x0042 +#define SO_TIMESTAMPING_NEW 0x0043 #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 #define SO_TIMESTAMP SO_TIMESTAMP_OLD #define SO_TIMESTAMPNS SO_TIMESTAMPNS_OLD +#define SO_TIMESTAMPING SO_TIMESTAMPING_OLD #else #define SO_TIMESTAMP (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMP_OLD : SO_TIMESTAMP_NEW) #define SO_TIMESTAMPNS (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMPNS_OLD : SO_TIMESTAMPNS_NEW) +#define SO_TIMESTAMPING (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMPING_OLD : SO_TIMESTAMPING_NEW) #endif -#define SO_TIMESTAMPING SO_TIMESTAMPING_OLD - #define SCM_TIMESTAMP SO_TIMESTAMP #define SCM_TIMESTAMPNS SO_TIMESTAMPNS #define SCM_TIMESTAMPING SO_TIMESTAMPING diff --git a/include/linux/socket.h b/include/linux/socket.h index ab2041a00e01..6016daeecee4 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -349,9 +349,17 @@ struct ucred { extern int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr); extern int put_cmsg(struct msghdr*, int level, int type, int len, void *data); +struct timespec64; struct __kernel_timespec; struct old_timespec32; +struct scm_timestamping_internal { + struct timespec64 ts[3]; +}; + +extern void put_cmsg_scm_timestamping64(struct msghdr *msg, struct scm_timestamping_internal *tss); +extern void put_cmsg_scm_timestamping(struct msghdr *msg, struct scm_timestamping_internal *tss); + /* The __sys_...msg variants allow MSG_CMSG_COMPAT iff * forbid_cmsg_compat==false */ diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h index f22d3f7162f8..2713e0fa68ef 100644 --- a/include/uapi/asm-generic/socket.h +++ b/include/uapi/asm-generic/socket.h @@ -112,6 +112,7 @@ #define SO_TIMESTAMP_NEW 63 #define SO_TIMESTAMPNS_NEW 64 +#define SO_TIMESTAMPING_NEW 65 #if !defined(__KERNEL__) @@ -119,13 +120,13 @@ /* on 64-bit and x32, avoid the ?: operator */ #define SO_TIMESTAMP SO_TIMESTAMP_OLD #define SO_TIMESTAMPNS SO_TIMESTAMPNS_OLD +#define SO_TIMESTAMPING SO_TIMESTAMPING_OLD #else #define SO_TIMESTAMP (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMP_OLD : SO_TIMESTAMP_NEW) #define SO_TIMESTAMPNS (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMPNS_OLD : SO_TIMESTAMPNS_NEW) +#define SO_TIMESTAMPING (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMPING_OLD : SO_TIMESTAMPING_NEW) #endif -#define SO_TIMESTAMPING SO_TIMESTAMPING_OLD - #define SCM_TIMESTAMP SO_TIMESTAMP #define SCM_TIMESTAMPNS SO_TIMESTAMPNS #define SCM_TIMESTAMPING SO_TIMESTAMPING diff --git a/include/uapi/linux/errqueue.h b/include/uapi/linux/errqueue.h index c0151200f7d1..d955b9e32288 100644 --- a/include/uapi/linux/errqueue.h +++ b/include/uapi/linux/errqueue.h @@ -41,6 +41,10 @@ struct scm_timestamping { struct timespec ts[3]; }; +struct scm_timestamping64 { + struct __kernel_timespec ts[3]; +}; + /* The type of scm_timestamping, passed in sock_extended_err ee_info. * This defines the type of ts[0]. For SCM_TSTAMP_SND only, if ts[0] * is zero, then this is a hardware timestamp and recorded in ts[2]. diff --git a/net/core/scm.c b/net/core/scm.c index b1ff8a441748..52ef219cf6df 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -29,6 +29,7 @@ #include #include #include +#include #include @@ -252,6 +253,32 @@ out: } EXPORT_SYMBOL(put_cmsg); +void put_cmsg_scm_timestamping64(struct msghdr *msg, struct scm_timestamping_internal *tss_internal) +{ + struct scm_timestamping64 tss; + int i; + + for (i = 0; i < ARRAY_SIZE(tss.ts); i++) { + tss.ts[i].tv_sec = tss_internal->ts[i].tv_sec; + tss.ts[i].tv_nsec = tss_internal->ts[i].tv_nsec; + } + + put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPING_NEW, sizeof(tss), &tss); +} +EXPORT_SYMBOL(put_cmsg_scm_timestamping64); + +void put_cmsg_scm_timestamping(struct msghdr *msg, struct scm_timestamping_internal *tss_internal) +{ + struct scm_timestamping tss; + int i; + + for (i = 0; i < ARRAY_SIZE(tss.ts); i++) + tss.ts[i] = timespec64_to_timespec(tss_internal->ts[i]); + + put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPING_OLD, sizeof(tss), &tss); +} +EXPORT_SYMBOL(put_cmsg_scm_timestamping); + void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm) { struct cmsghdr __user *cm diff --git a/net/core/sock.c b/net/core/sock.c index 14b987eab10c..a9d1ecce96e5 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -890,6 +890,8 @@ set_rcvbuf: } break; + case SO_TIMESTAMPING_NEW: + sock_set_flag(sk, SOCK_TSTAMP_NEW); case SO_TIMESTAMPING_OLD: if (val & ~SOF_TIMESTAMPING_MASK) { ret = -EINVAL; @@ -921,9 +923,13 @@ set_rcvbuf: if (val & SOF_TIMESTAMPING_RX_SOFTWARE) sock_enable_timestamp(sk, SOCK_TIMESTAMPING_RX_SOFTWARE); - else + else { + if (optname == SO_TIMESTAMPING_NEW) + sock_reset_flag(sk, SOCK_TSTAMP_NEW); + sock_disable_timestamp(sk, (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE)); + } break; case SO_RCVLOWAT: diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 4e9388bf104a..cab6b2f2f61d 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1844,22 +1844,22 @@ out: #endif static void tcp_update_recv_tstamps(struct sk_buff *skb, - struct scm_timestamping *tss) + struct scm_timestamping_internal *tss) { if (skb->tstamp) - tss->ts[0] = ktime_to_timespec(skb->tstamp); + tss->ts[0] = ktime_to_timespec64(skb->tstamp); else - tss->ts[0] = (struct timespec) {0}; + tss->ts[0] = (struct timespec64) {0}; if (skb_hwtstamps(skb)->hwtstamp) - tss->ts[2] = ktime_to_timespec(skb_hwtstamps(skb)->hwtstamp); + tss->ts[2] = ktime_to_timespec64(skb_hwtstamps(skb)->hwtstamp); else - tss->ts[2] = (struct timespec) {0}; + tss->ts[2] = (struct timespec64) {0}; } /* Similar to __sock_recv_timestamp, but does not require an skb */ static void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk, - struct scm_timestamping *tss) + struct scm_timestamping_internal *tss) { int new_tstamp = sock_flag(sk, SOCK_TSTAMP_NEW); bool has_timestamping = false; @@ -1873,8 +1873,10 @@ static void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk, put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_NEW, sizeof(kts), &kts); } else { + struct timespec ts_old = timespec64_to_timespec(tss->ts[0]); + put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_OLD, - sizeof(tss->ts[0]), &tss->ts[0]); + sizeof(ts_old), &ts_old); } } else { if (new_tstamp) { @@ -1898,20 +1900,22 @@ static void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk, if (sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) has_timestamping = true; else - tss->ts[0] = (struct timespec) {0}; + tss->ts[0] = (struct timespec64) {0}; } if (tss->ts[2].tv_sec || tss->ts[2].tv_nsec) { if (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) has_timestamping = true; else - tss->ts[2] = (struct timespec) {0}; + tss->ts[2] = (struct timespec64) {0}; } if (has_timestamping) { - tss->ts[1] = (struct timespec) {0}; - put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPING_OLD, - sizeof(*tss), tss); + tss->ts[1] = (struct timespec64) {0}; + if (sock_flag(sk, SOCK_TSTAMP_NEW)) + put_cmsg_scm_timestamping64(msg, tss); + else + put_cmsg_scm_timestamping(msg, tss); } } @@ -1952,7 +1956,7 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, long timeo; struct sk_buff *skb, *last; u32 urg_hole = 0; - struct scm_timestamping tss; + struct scm_timestamping_internal tss; bool has_tss = false; bool has_cmsg; diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index c4e56602e0c6..369870b0ef79 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -291,7 +291,8 @@ static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk, (1UL << SOCK_RXQ_OVFL) | \ (1UL << SOCK_WIFI_STATUS) | \ (1UL << SOCK_NOFCS) | \ - (1UL << SOCK_FILTER_LOCKED)) + (1UL << SOCK_FILTER_LOCKED) | \ + (1UL << SOCK_TSTAMP_NEW)) /* copy only relevant settings and flags of SOL_SOCKET level from smc to * clc socket (since smc is not called for these options from net/core) */ diff --git a/net/socket.c b/net/socket.c index 1de96abd78d3..d51930689b98 100644 --- a/net/socket.c +++ b/net/socket.c @@ -706,7 +706,8 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, { int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP); int new_tstamp = sock_flag(sk, SOCK_TSTAMP_NEW); - struct scm_timestamping tss; + struct scm_timestamping_internal tss; + int empty = 1, false_tstamp = 0; struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb); @@ -752,20 +753,22 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, memset(&tss, 0, sizeof(tss)); if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) && - ktime_to_timespec_cond(skb->tstamp, tss.ts + 0)) + ktime_to_timespec64_cond(skb->tstamp, tss.ts + 0)) empty = 0; if (shhwtstamps && (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) && !skb_is_swtx_tstamp(skb, false_tstamp) && - ktime_to_timespec_cond(shhwtstamps->hwtstamp, tss.ts + 2)) { + ktime_to_timespec64_cond(shhwtstamps->hwtstamp, tss.ts + 2)) { empty = 0; if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) && !skb_is_err_queue(skb)) put_ts_pktinfo(msg, skb); } if (!empty) { - put_cmsg(msg, SOL_SOCKET, - SO_TIMESTAMPING_OLD, sizeof(tss), &tss); + if (sock_flag(sk, SOCK_TSTAMP_NEW)) + put_cmsg_scm_timestamping64(msg, &tss); + else + put_cmsg_scm_timestamping(msg, &tss); if (skb_is_err_queue(skb) && skb->len && SKB_EXT_ERR(skb)->opt_stats) -- cgit v1.2.3 From 9fb20801dab46238706267896df1b3938d977129 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 1 Feb 2019 20:20:52 -0800 Subject: net: Fix ip_mc_{dec,inc}_group allocation context After 4effd28c1245 ("bridge: join all-snoopers multicast address"), I started seeing the following sleep in atomic warnings: [ 26.763893] BUG: sleeping function called from invalid context at mm/slab.h:421 [ 26.771425] in_atomic(): 1, irqs_disabled(): 0, pid: 1658, name: sh [ 26.777855] INFO: lockdep is turned off. [ 26.781916] CPU: 0 PID: 1658 Comm: sh Not tainted 5.0.0-rc4 #20 [ 26.787943] Hardware name: BCM97278SV (DT) [ 26.792118] Call trace: [ 26.794645] dump_backtrace+0x0/0x170 [ 26.798391] show_stack+0x24/0x30 [ 26.801787] dump_stack+0xa4/0xe4 [ 26.805182] ___might_sleep+0x208/0x218 [ 26.809102] __might_sleep+0x78/0x88 [ 26.812762] kmem_cache_alloc_trace+0x64/0x28c [ 26.817301] igmp_group_dropped+0x150/0x230 [ 26.821573] ip_mc_dec_group+0x1b0/0x1f8 [ 26.825585] br_ip4_multicast_leave_snoopers.isra.11+0x174/0x190 [ 26.831704] br_multicast_toggle+0x78/0xcc [ 26.835887] store_bridge_parm+0xc4/0xfc [ 26.839894] multicast_snooping_store+0x3c/0x4c [ 26.844517] dev_attr_store+0x44/0x5c [ 26.848262] sysfs_kf_write+0x50/0x68 [ 26.852006] kernfs_fop_write+0x14c/0x1b4 [ 26.856102] __vfs_write+0x60/0x190 [ 26.859668] vfs_write+0xc8/0x168 [ 26.863059] ksys_write+0x70/0xc8 [ 26.866449] __arm64_sys_write+0x24/0x30 [ 26.870458] el0_svc_common+0xa0/0x11c [ 26.874291] el0_svc_handler+0x38/0x70 [ 26.878120] el0_svc+0x8/0xc while toggling the bridge's multicast_snooping attribute dynamically. Pass a gfp_t down to igmpv3_add_delrec(), introduce __igmp_group_dropped() and introduce __ip_mc_dec_group() to take a gfp_t argument. Similarly introduce ____ip_mc_inc_group() and __ip_mc_inc_group() to allow caller to specify gfp_t. IPv6 part of the patch appears fine. Fixes: 4effd28c1245 ("bridge: join all-snoopers multicast address") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/igmp.h | 8 +++++++- net/bridge/br_multicast.c | 4 ++-- net/ipv4/igmp.c | 35 ++++++++++++++++++++++++----------- 3 files changed, 33 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/igmp.h b/include/linux/igmp.h index 8b4348f69bc5..cc85f4524dbf 100644 --- a/include/linux/igmp.h +++ b/include/linux/igmp.h @@ -137,7 +137,13 @@ extern void ip_mc_up(struct in_device *); extern void ip_mc_down(struct in_device *); extern void ip_mc_unmap(struct in_device *); extern void ip_mc_remap(struct in_device *); -extern void ip_mc_dec_group(struct in_device *in_dev, __be32 addr); +extern void __ip_mc_dec_group(struct in_device *in_dev, __be32 addr, gfp_t gfp); +static inline void ip_mc_dec_group(struct in_device *in_dev, __be32 addr) +{ + return __ip_mc_dec_group(in_dev, addr, GFP_KERNEL); +} +extern void __ip_mc_inc_group(struct in_device *in_dev, __be32 addr, + gfp_t gfp); extern void ip_mc_inc_group(struct in_device *in_dev, __be32 addr); int ip_mc_check_igmp(struct sk_buff *skb); diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 780757b7a82f..1fb885a33c66 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1841,7 +1841,7 @@ static void br_ip4_multicast_join_snoopers(struct net_bridge *br) if (!in_dev) return; - ip_mc_inc_group(in_dev, htonl(INADDR_ALLSNOOPERS_GROUP)); + __ip_mc_inc_group(in_dev, htonl(INADDR_ALLSNOOPERS_GROUP), GFP_ATOMIC); in_dev_put(in_dev); } @@ -1872,7 +1872,7 @@ static void br_ip4_multicast_leave_snoopers(struct net_bridge *br) if (WARN_ON(!in_dev)) return; - ip_mc_dec_group(in_dev, htonl(INADDR_ALLSNOOPERS_GROUP)); + __ip_mc_dec_group(in_dev, htonl(INADDR_ALLSNOOPERS_GROUP), GFP_ATOMIC); in_dev_put(in_dev); } diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index a40e48ded10d..b448cf32296c 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -159,7 +159,8 @@ static int unsolicited_report_interval(struct in_device *in_dev) return interval_jiffies; } -static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im); +static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im, + gfp_t gfp); static void igmpv3_del_delrec(struct in_device *in_dev, struct ip_mc_list *im); static void igmpv3_clear_delrec(struct in_device *in_dev); static int sf_setstate(struct ip_mc_list *pmc); @@ -1145,7 +1146,8 @@ static void ip_mc_filter_del(struct in_device *in_dev, __be32 addr) /* * deleted ip_mc_list manipulation */ -static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im) +static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im, + gfp_t gfp) { struct ip_mc_list *pmc; struct net *net = dev_net(in_dev->dev); @@ -1156,7 +1158,7 @@ static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im) * for deleted items allows change reports to use common code with * non-deleted or query-response MCA's. */ - pmc = kzalloc(sizeof(*pmc), GFP_KERNEL); + pmc = kzalloc(sizeof(*pmc), gfp); if (!pmc) return; spin_lock_init(&pmc->lock); @@ -1261,7 +1263,7 @@ static void igmpv3_clear_delrec(struct in_device *in_dev) } #endif -static void igmp_group_dropped(struct ip_mc_list *im) +static void __igmp_group_dropped(struct ip_mc_list *im, gfp_t gfp) { struct in_device *in_dev = im->interface; #ifdef CONFIG_IP_MULTICAST @@ -1292,13 +1294,18 @@ static void igmp_group_dropped(struct ip_mc_list *im) return; } /* IGMPv3 */ - igmpv3_add_delrec(in_dev, im); + igmpv3_add_delrec(in_dev, im, gfp); igmp_ifc_event(in_dev); } #endif } +static void igmp_group_dropped(struct ip_mc_list *im) +{ + __igmp_group_dropped(im, GFP_KERNEL); +} + static void igmp_group_added(struct ip_mc_list *im) { struct in_device *in_dev = im->interface; @@ -1400,8 +1407,8 @@ static void ip_mc_hash_remove(struct in_device *in_dev, /* * A socket has joined a multicast group on device dev. */ -static void __ip_mc_inc_group(struct in_device *in_dev, __be32 addr, - unsigned int mode) +static void ____ip_mc_inc_group(struct in_device *in_dev, __be32 addr, + unsigned int mode, gfp_t gfp) { struct ip_mc_list *im; @@ -1415,7 +1422,7 @@ static void __ip_mc_inc_group(struct in_device *in_dev, __be32 addr, } } - im = kzalloc(sizeof(*im), GFP_KERNEL); + im = kzalloc(sizeof(*im), gfp); if (!im) goto out; @@ -1448,6 +1455,12 @@ out: return; } +void __ip_mc_inc_group(struct in_device *in_dev, __be32 addr, gfp_t gfp) +{ + ____ip_mc_inc_group(in_dev, addr, MCAST_EXCLUDE, gfp); +} +EXPORT_SYMBOL(__ip_mc_inc_group); + void ip_mc_inc_group(struct in_device *in_dev, __be32 addr) { __ip_mc_inc_group(in_dev, addr, MCAST_EXCLUDE); @@ -1634,7 +1647,7 @@ static void ip_mc_rejoin_groups(struct in_device *in_dev) * A socket has left a multicast group on device dev */ -void ip_mc_dec_group(struct in_device *in_dev, __be32 addr) +void __ip_mc_dec_group(struct in_device *in_dev, __be32 addr, gfp_t gfp) { struct ip_mc_list *i; struct ip_mc_list __rcu **ip; @@ -1649,7 +1662,7 @@ void ip_mc_dec_group(struct in_device *in_dev, __be32 addr) ip_mc_hash_remove(in_dev, i); *ip = i->next_rcu; in_dev->mc_count--; - igmp_group_dropped(i); + __igmp_group_dropped(i, gfp); ip_mc_clear_src(i); if (!in_dev->dead) @@ -1662,7 +1675,7 @@ void ip_mc_dec_group(struct in_device *in_dev, __be32 addr) } } } -EXPORT_SYMBOL(ip_mc_dec_group); +EXPORT_SYMBOL(__ip_mc_dec_group); /* Device changing type */ -- cgit v1.2.3 From 5f3d544f1671d214cd26e45bda326f921455256e Mon Sep 17 00:00:00 2001 From: Richard Guy Briggs Date: Fri, 1 Feb 2019 22:45:17 -0500 Subject: audit: remove audit_context when CONFIG_ AUDIT and not AUDITSYSCALL Remove audit_context from struct task_struct and struct audit_buffer when CONFIG_AUDIT is enabled but CONFIG_AUDITSYSCALL is not. Also, audit_log_name() (and supporting inode and fcaps functions) should have been put back in auditsc.c when soft and hard link logging was normalized since it is only used by syscall auditing. See github issue https://github.com/linux-audit/audit-kernel/issues/105 Signed-off-by: Richard Guy Briggs Signed-off-by: Paul Moore --- include/linux/sched.h | 4 +- kernel/audit.c | 157 ------------------------------------------------- kernel/audit.h | 9 --- kernel/auditsc.c | 158 ++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 161 insertions(+), 167 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index f9788bb122c5..765119df759a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -885,8 +885,10 @@ struct task_struct { struct callback_head *task_works; - struct audit_context *audit_context; #ifdef CONFIG_AUDIT +#ifdef CONFIG_AUDITSYSCALL + struct audit_context *audit_context; +#endif kuid_t loginuid; unsigned int sessionid; #endif diff --git a/kernel/audit.c b/kernel/audit.c index b7177a8def2e..c89ea48c70a6 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -2067,163 +2067,6 @@ void audit_log_key(struct audit_buffer *ab, char *key) audit_log_format(ab, "(null)"); } -void audit_log_cap(struct audit_buffer *ab, char *prefix, kernel_cap_t *cap) -{ - int i; - - if (cap_isclear(*cap)) { - audit_log_format(ab, " %s=0", prefix); - return; - } - audit_log_format(ab, " %s=", prefix); - CAP_FOR_EACH_U32(i) - audit_log_format(ab, "%08x", cap->cap[CAP_LAST_U32 - i]); -} - -static void audit_log_fcaps(struct audit_buffer *ab, struct audit_names *name) -{ - if (name->fcap_ver == -1) { - audit_log_format(ab, " cap_fe=? cap_fver=? cap_fp=? cap_fi=?"); - return; - } - audit_log_cap(ab, "cap_fp", &name->fcap.permitted); - audit_log_cap(ab, "cap_fi", &name->fcap.inheritable); - audit_log_format(ab, " cap_fe=%d cap_fver=%x cap_frootid=%d", - name->fcap.fE, name->fcap_ver, - from_kuid(&init_user_ns, name->fcap.rootid)); -} - -static inline int audit_copy_fcaps(struct audit_names *name, - const struct dentry *dentry) -{ - struct cpu_vfs_cap_data caps; - int rc; - - if (!dentry) - return 0; - - rc = get_vfs_caps_from_disk(dentry, &caps); - if (rc) - return rc; - - name->fcap.permitted = caps.permitted; - name->fcap.inheritable = caps.inheritable; - name->fcap.fE = !!(caps.magic_etc & VFS_CAP_FLAGS_EFFECTIVE); - name->fcap.rootid = caps.rootid; - name->fcap_ver = (caps.magic_etc & VFS_CAP_REVISION_MASK) >> - VFS_CAP_REVISION_SHIFT; - - return 0; -} - -/* Copy inode data into an audit_names. */ -void audit_copy_inode(struct audit_names *name, const struct dentry *dentry, - struct inode *inode, unsigned int flags) -{ - name->ino = inode->i_ino; - name->dev = inode->i_sb->s_dev; - name->mode = inode->i_mode; - name->uid = inode->i_uid; - name->gid = inode->i_gid; - name->rdev = inode->i_rdev; - security_inode_getsecid(inode, &name->osid); - if (flags & AUDIT_INODE_NOEVAL) { - name->fcap_ver = -1; - return; - } - audit_copy_fcaps(name, dentry); -} - -/** - * audit_log_name - produce AUDIT_PATH record from struct audit_names - * @context: audit_context for the task - * @n: audit_names structure with reportable details - * @path: optional path to report instead of audit_names->name - * @record_num: record number to report when handling a list of names - * @call_panic: optional pointer to int that will be updated if secid fails - */ -void audit_log_name(struct audit_context *context, struct audit_names *n, - const struct path *path, int record_num, int *call_panic) -{ - struct audit_buffer *ab; - ab = audit_log_start(context, GFP_KERNEL, AUDIT_PATH); - if (!ab) - return; - - audit_log_format(ab, "item=%d", record_num); - - if (path) - audit_log_d_path(ab, " name=", path); - else if (n->name) { - switch (n->name_len) { - case AUDIT_NAME_FULL: - /* log the full path */ - audit_log_format(ab, " name="); - audit_log_untrustedstring(ab, n->name->name); - break; - case 0: - /* name was specified as a relative path and the - * directory component is the cwd */ - audit_log_d_path(ab, " name=", &context->pwd); - break; - default: - /* log the name's directory component */ - audit_log_format(ab, " name="); - audit_log_n_untrustedstring(ab, n->name->name, - n->name_len); - } - } else - audit_log_format(ab, " name=(null)"); - - if (n->ino != AUDIT_INO_UNSET) - audit_log_format(ab, " inode=%lu" - " dev=%02x:%02x mode=%#ho" - " ouid=%u ogid=%u rdev=%02x:%02x", - n->ino, - MAJOR(n->dev), - MINOR(n->dev), - n->mode, - from_kuid(&init_user_ns, n->uid), - from_kgid(&init_user_ns, n->gid), - MAJOR(n->rdev), - MINOR(n->rdev)); - if (n->osid != 0) { - char *ctx = NULL; - u32 len; - if (security_secid_to_secctx( - n->osid, &ctx, &len)) { - audit_log_format(ab, " osid=%u", n->osid); - if (call_panic) - *call_panic = 2; - } else { - audit_log_format(ab, " obj=%s", ctx); - security_release_secctx(ctx, len); - } - } - - /* log the audit_names record type */ - switch(n->type) { - case AUDIT_TYPE_NORMAL: - audit_log_format(ab, " nametype=NORMAL"); - break; - case AUDIT_TYPE_PARENT: - audit_log_format(ab, " nametype=PARENT"); - break; - case AUDIT_TYPE_CHILD_DELETE: - audit_log_format(ab, " nametype=DELETE"); - break; - case AUDIT_TYPE_CHILD_CREATE: - audit_log_format(ab, " nametype=CREATE"); - break; - default: - audit_log_format(ab, " nametype=UNKNOWN"); - break; - } - - audit_log_fcaps(ab, n); - audit_log_end(ab); -} - int audit_log_task_context(struct audit_buffer *ab) { char *ctx = NULL; diff --git a/kernel/audit.h b/kernel/audit.h index 002f0f7ba732..82734f438ddd 100644 --- a/kernel/audit.h +++ b/kernel/audit.h @@ -213,15 +213,6 @@ extern bool audit_ever_enabled; extern void audit_log_session_info(struct audit_buffer *ab); -extern void audit_copy_inode(struct audit_names *name, - const struct dentry *dentry, - struct inode *inode, unsigned int flags); -extern void audit_log_cap(struct audit_buffer *ab, char *prefix, - kernel_cap_t *cap); -extern void audit_log_name(struct audit_context *context, - struct audit_names *n, const struct path *path, - int record_num, int *call_panic); - extern int auditd_test_task(struct task_struct *task); #define AUDIT_INODE_BUCKETS 32 diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 7d37cb1e4aef..d1eab1d4a930 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -1139,6 +1139,32 @@ out: kfree(buf_head); } +void audit_log_cap(struct audit_buffer *ab, char *prefix, kernel_cap_t *cap) +{ + int i; + + if (cap_isclear(*cap)) { + audit_log_format(ab, " %s=0", prefix); + return; + } + audit_log_format(ab, " %s=", prefix); + CAP_FOR_EACH_U32(i) + audit_log_format(ab, "%08x", cap->cap[CAP_LAST_U32 - i]); +} + +static void audit_log_fcaps(struct audit_buffer *ab, struct audit_names *name) +{ + if (name->fcap_ver == -1) { + audit_log_format(ab, " cap_fe=? cap_fver=? cap_fp=? cap_fi=?"); + return; + } + audit_log_cap(ab, "cap_fp", &name->fcap.permitted); + audit_log_cap(ab, "cap_fi", &name->fcap.inheritable); + audit_log_format(ab, " cap_fe=%d cap_fver=%x cap_frootid=%d", + name->fcap.fE, name->fcap_ver, + from_kuid(&init_user_ns, name->fcap.rootid)); +} + static void show_special(struct audit_context *context, int *call_panic) { struct audit_buffer *ab; @@ -1261,6 +1287,97 @@ static inline int audit_proctitle_rtrim(char *proctitle, int len) return len; } +/* + * audit_log_name - produce AUDIT_PATH record from struct audit_names + * @context: audit_context for the task + * @n: audit_names structure with reportable details + * @path: optional path to report instead of audit_names->name + * @record_num: record number to report when handling a list of names + * @call_panic: optional pointer to int that will be updated if secid fails + */ +static void audit_log_name(struct audit_context *context, struct audit_names *n, + const struct path *path, int record_num, int *call_panic) +{ + struct audit_buffer *ab; + + ab = audit_log_start(context, GFP_KERNEL, AUDIT_PATH); + if (!ab) + return; + + audit_log_format(ab, "item=%d", record_num); + + if (path) + audit_log_d_path(ab, " name=", path); + else if (n->name) { + switch (n->name_len) { + case AUDIT_NAME_FULL: + /* log the full path */ + audit_log_format(ab, " name="); + audit_log_untrustedstring(ab, n->name->name); + break; + case 0: + /* name was specified as a relative path and the + * directory component is the cwd + */ + audit_log_d_path(ab, " name=", &context->pwd); + break; + default: + /* log the name's directory component */ + audit_log_format(ab, " name="); + audit_log_n_untrustedstring(ab, n->name->name, + n->name_len); + } + } else + audit_log_format(ab, " name=(null)"); + + if (n->ino != AUDIT_INO_UNSET) + audit_log_format(ab, " inode=%lu dev=%02x:%02x mode=%#ho ouid=%u ogid=%u rdev=%02x:%02x", + n->ino, + MAJOR(n->dev), + MINOR(n->dev), + n->mode, + from_kuid(&init_user_ns, n->uid), + from_kgid(&init_user_ns, n->gid), + MAJOR(n->rdev), + MINOR(n->rdev)); + if (n->osid != 0) { + char *ctx = NULL; + u32 len; + + if (security_secid_to_secctx( + n->osid, &ctx, &len)) { + audit_log_format(ab, " osid=%u", n->osid); + if (call_panic) + *call_panic = 2; + } else { + audit_log_format(ab, " obj=%s", ctx); + security_release_secctx(ctx, len); + } + } + + /* log the audit_names record type */ + switch (n->type) { + case AUDIT_TYPE_NORMAL: + audit_log_format(ab, " nametype=NORMAL"); + break; + case AUDIT_TYPE_PARENT: + audit_log_format(ab, " nametype=PARENT"); + break; + case AUDIT_TYPE_CHILD_DELETE: + audit_log_format(ab, " nametype=DELETE"); + break; + case AUDIT_TYPE_CHILD_CREATE: + audit_log_format(ab, " nametype=CREATE"); + break; + default: + audit_log_format(ab, " nametype=UNKNOWN"); + break; + } + + audit_log_fcaps(ab, n); + audit_log_end(ab); +} + static void audit_log_proctitle(void) { int res; @@ -1756,6 +1873,47 @@ void __audit_getname(struct filename *name) get_fs_pwd(current->fs, &context->pwd); } +static inline int audit_copy_fcaps(struct audit_names *name, + const struct dentry *dentry) +{ + struct cpu_vfs_cap_data caps; + int rc; + + if (!dentry) + return 0; + + rc = get_vfs_caps_from_disk(dentry, &caps); + if (rc) + return rc; + + name->fcap.permitted = caps.permitted; + name->fcap.inheritable = caps.inheritable; + name->fcap.fE = !!(caps.magic_etc & VFS_CAP_FLAGS_EFFECTIVE); + name->fcap.rootid = caps.rootid; + name->fcap_ver = (caps.magic_etc & VFS_CAP_REVISION_MASK) >> + VFS_CAP_REVISION_SHIFT; + + return 0; +} + +/* Copy inode data into an audit_names. */ +void audit_copy_inode(struct audit_names *name, const struct dentry *dentry, + struct inode *inode, unsigned int flags) +{ + name->ino = inode->i_ino; + name->dev = inode->i_sb->s_dev; + name->mode = inode->i_mode; + name->uid = inode->i_uid; + name->gid = inode->i_gid; + name->rdev = inode->i_rdev; + security_inode_getsecid(inode, &name->osid); + if (flags & AUDIT_INODE_NOEVAL) { + name->fcap_ver = -1; + return; + } + audit_copy_fcaps(name, dentry); +} + /** * __audit_inode - store the inode and device from a lookup * @name: name being audited -- cgit v1.2.3 From ce3fdb697f684b0c018cda8af91f953b7936a9c2 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Sat, 2 Feb 2019 19:47:25 -0800 Subject: netdevice.h: Add __cold to netdev_ logging functions Add __cold to the netdev_ logging functions similar to the use of __cold in the generic printk function. Using __cold moves all the netdev_ logging functions out-of-line possibly improving code locality and runtime performance. Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- include/linux/netdevice.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e675ef97a426..ba57d0ba425e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4663,22 +4663,22 @@ static inline const char *netdev_reg_state(const struct net_device *dev) return " (unknown)"; } -__printf(3, 4) +__printf(3, 4) __cold void netdev_printk(const char *level, const struct net_device *dev, const char *format, ...); -__printf(2, 3) +__printf(2, 3) __cold void netdev_emerg(const struct net_device *dev, const char *format, ...); -__printf(2, 3) +__printf(2, 3) __cold void netdev_alert(const struct net_device *dev, const char *format, ...); -__printf(2, 3) +__printf(2, 3) __cold void netdev_crit(const struct net_device *dev, const char *format, ...); -__printf(2, 3) +__printf(2, 3) __cold void netdev_err(const struct net_device *dev, const char *format, ...); -__printf(2, 3) +__printf(2, 3) __cold void netdev_warn(const struct net_device *dev, const char *format, ...); -__printf(2, 3) +__printf(2, 3) __cold void netdev_notice(const struct net_device *dev, const char *format, ...); -__printf(2, 3) +__printf(2, 3) __cold void netdev_info(const struct net_device *dev, const char *format, ...); #define netdev_level_once(level, dev, fmt, ...) \ -- cgit v1.2.3 From 494c704f9af0a0cddf593b381ea44320888733e6 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sat, 2 Feb 2019 10:41:13 +0100 Subject: efi: Use 32-bit alignment for efi_guid_t The UEFI spec and EDK2 reference implementation both define EFI_GUID as struct { u32 a; u16; b; u16 c; u8 d[8]; }; and so the implied alignment is 32 bits not 8 bits like our guid_t. In some cases (i.e., on 32-bit ARM), this means that firmware services invoked by the kernel may assume that efi_guid_t* arguments are 32-bit aligned, and use memory accessors that do not tolerate misalignment. So let's set the minimum alignment to 32 bits. Note that the UEFI spec as well as some comments in the EDK2 code base suggest that EFI_GUID should be 64-bit aligned, but this appears to be a mistake, given that no code seems to exist that actually enforces that or relies on it. Reported-by: Heinrich Schuchardt Signed-off-by: Ard Biesheuvel Reviewed-by: Leif Lindholm Cc: AKASHI Takahiro Cc: Alexander Graf Cc: Bjorn Andersson Cc: Borislav Petkov Cc: Jeffrey Hugo Cc: Lee Jones Cc: Linus Torvalds Cc: Matt Fleming Cc: Peter Jones Cc: Peter Zijlstra Cc: Sai Praneeth Prakhya Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/20190202094119.13230-5-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar --- include/linux/efi.h | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 45ff763fba76..be08518c2553 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -48,7 +48,20 @@ typedef u16 efi_char16_t; /* UNICODE character */ typedef u64 efi_physical_addr_t; typedef void *efi_handle_t; -typedef guid_t efi_guid_t; +/* + * The UEFI spec and EDK2 reference implementation both define EFI_GUID as + * struct { u32 a; u16; b; u16 c; u8 d[8]; }; and so the implied alignment + * is 32 bits not 8 bits like our guid_t. In some cases (i.e., on 32-bit ARM), + * this means that firmware services invoked by the kernel may assume that + * efi_guid_t* arguments are 32-bit aligned, and use memory accessors that + * do not tolerate misalignment. So let's set the minimum alignment to 32 bits. + * + * Note that the UEFI spec as well as some comments in the EDK2 code base + * suggest that EFI_GUID should be 64-bit aligned, but this appears to be + * a mistake, given that no code seems to exist that actually enforces that + * or relies on it. + */ +typedef guid_t efi_guid_t __aligned(__alignof__(u32)); #define EFI_GUID(a,b,c,d0,d1,d2,d3,d4,d5,d6,d7) \ GUID_INIT(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7) -- cgit v1.2.3 From 8c94abbbe1ba24961278055434504b7dc3595415 Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Mon, 28 Jan 2019 14:27:26 +0200 Subject: perf: Convert perf_event_context.refcount to refcount_t atomic_t variables are currently used to implement reference counters with the following properties: - counter is initialized to 1 using atomic_set() - a resource is freed upon counter reaching zero - once counter reaches zero, its further increments aren't allowed - counter schema uses basic atomic operations (set, inc, inc_not_zero, dec_and_test, etc.) Such atomic variables should be converted to a newly provided refcount_t type and API that prevents accidental counter overflows and underflows. This is important since overflows and underflows can lead to use-after-free situation and be exploitable. The variable perf_event_context.refcount is used as pure reference counter. Convert it to refcount_t and fix up the operations. ** Important note for maintainers: Some functions from refcount_t API defined in lib/refcount.c have different memory ordering guarantees than their atomic counterparts. Please check Documentation/core-api/refcount-vs-atomic.rst for more information. Normally the differences should not matter since refcount_t provides enough guarantees to satisfy the refcounting use cases, but in some rare cases it might matter. Please double check that you don't have some undocumented memory guarantees for this variable usage. For the perf_event_context.refcount it might make a difference in following places: - get_ctx(), perf_event_ctx_lock_nested(), perf_lock_task_context() and __perf_event_ctx_lock_double(): increment in refcount_inc_not_zero() only guarantees control dependency on success vs. fully ordered atomic counterpart - put_ctx(): decrement in refcount_dec_and_test() provides RELEASE ordering and ACQUIRE ordering + control dependency on success vs. fully ordered atomic counterpart Suggested-by: Kees Cook Signed-off-by: Elena Reshetova Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: David Windsor Reviewed-by: Hans Liljestrand Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: acme@kernel.org Cc: namhyung@kernel.org Link: https://lkml.kernel.org/r/1548678448-24458-2-git-send-email-elena.reshetova@intel.com Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 3 ++- kernel/events/core.c | 12 ++++++------ 2 files changed, 8 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index a79e59fc3b7d..6cb5d483ab34 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -54,6 +54,7 @@ struct perf_guest_info_callbacks { #include #include #include +#include #include struct perf_callchain_entry { @@ -737,7 +738,7 @@ struct perf_event_context { int nr_stat; int nr_freq; int rotate_disable; - atomic_t refcount; + refcount_t refcount; struct task_struct *task; /* diff --git a/kernel/events/core.c b/kernel/events/core.c index 5b89de7918d0..677164d54547 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1172,7 +1172,7 @@ static void perf_event_ctx_deactivate(struct perf_event_context *ctx) static void get_ctx(struct perf_event_context *ctx) { - WARN_ON(!atomic_inc_not_zero(&ctx->refcount)); + refcount_inc(&ctx->refcount); } static void free_ctx(struct rcu_head *head) @@ -1186,7 +1186,7 @@ static void free_ctx(struct rcu_head *head) static void put_ctx(struct perf_event_context *ctx) { - if (atomic_dec_and_test(&ctx->refcount)) { + if (refcount_dec_and_test(&ctx->refcount)) { if (ctx->parent_ctx) put_ctx(ctx->parent_ctx); if (ctx->task && ctx->task != TASK_TOMBSTONE) @@ -1268,7 +1268,7 @@ perf_event_ctx_lock_nested(struct perf_event *event, int nesting) again: rcu_read_lock(); ctx = READ_ONCE(event->ctx); - if (!atomic_inc_not_zero(&ctx->refcount)) { + if (!refcount_inc_not_zero(&ctx->refcount)) { rcu_read_unlock(); goto again; } @@ -1401,7 +1401,7 @@ retry: } if (ctx->task == TASK_TOMBSTONE || - !atomic_inc_not_zero(&ctx->refcount)) { + !refcount_inc_not_zero(&ctx->refcount)) { raw_spin_unlock(&ctx->lock); ctx = NULL; } else { @@ -4057,7 +4057,7 @@ static void __perf_event_init_context(struct perf_event_context *ctx) INIT_LIST_HEAD(&ctx->event_list); INIT_LIST_HEAD(&ctx->pinned_active); INIT_LIST_HEAD(&ctx->flexible_active); - atomic_set(&ctx->refcount, 1); + refcount_set(&ctx->refcount, 1); } static struct perf_event_context * @@ -10613,7 +10613,7 @@ __perf_event_ctx_lock_double(struct perf_event *group_leader, again: rcu_read_lock(); gctx = READ_ONCE(group_leader->ctx); - if (!atomic_inc_not_zero(&gctx->refcount)) { + if (!refcount_inc_not_zero(&gctx->refcount)) { rcu_read_unlock(); goto again; } -- cgit v1.2.3 From d036bda7d0e7269c2982eb979acfef855f5d7977 Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Fri, 18 Jan 2019 14:27:26 +0200 Subject: sched/core: Convert sighand_struct.count to refcount_t atomic_t variables are currently used to implement reference counters with the following properties: - counter is initialized to 1 using atomic_set() - a resource is freed upon counter reaching zero - once counter reaches zero, its further increments aren't allowed - counter schema uses basic atomic operations (set, inc, inc_not_zero, dec_and_test, etc.) Such atomic variables should be converted to a newly provided refcount_t type and API that prevents accidental counter overflows and underflows. This is important since overflows and underflows can lead to use-after-free situation and be exploitable. The variable sighand_struct.count is used as pure reference counter. Convert it to refcount_t and fix up the operations. ** Important note for maintainers: Some functions from refcount_t API defined in lib/refcount.c have different memory ordering guarantees than their atomic counterparts. The full comparison can be seen in https://lkml.org/lkml/2017/11/15/57 and it is hopefully soon in state to be merged to the documentation tree. Normally the differences should not matter since refcount_t provides enough guarantees to satisfy the refcounting use cases, but in some rare cases it might matter. Please double check that you don't have some undocumented memory guarantees for this variable usage. For the sighand_struct.count it might make a difference in following places: - __cleanup_sighand: decrement in refcount_dec_and_test() only provides RELEASE ordering and control dependency on success vs. fully ordered atomic counterpart Suggested-by: Kees Cook Signed-off-by: Elena Reshetova Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: David Windsor Reviewed-by: Hans Liljestrand Reviewed-by: Andrea Parri Reviewed-by: Oleg Nesterov Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: akpm@linux-foundation.org Cc: viro@zeniv.linux.org.uk Link: https://lkml.kernel.org/r/1547814450-18902-2-git-send-email-elena.reshetova@intel.com Signed-off-by: Ingo Molnar --- fs/exec.c | 4 ++-- fs/proc/task_nommu.c | 2 +- include/linux/sched/signal.h | 3 ++- kernel/fork.c | 8 ++++---- 4 files changed, 9 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/fs/exec.c b/fs/exec.c index fb72d36f7823..966cd98a2ce2 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1189,7 +1189,7 @@ no_thread_group: flush_itimer_signals(); #endif - if (atomic_read(&oldsighand->count) != 1) { + if (refcount_read(&oldsighand->count) != 1) { struct sighand_struct *newsighand; /* * This ->sighand is shared with the CLONE_SIGHAND @@ -1199,7 +1199,7 @@ no_thread_group: if (!newsighand) return -ENOMEM; - atomic_set(&newsighand->count, 1); + refcount_set(&newsighand->count, 1); memcpy(newsighand->action, oldsighand->action, sizeof(newsighand->action)); diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index 0b63d68dedb2..f912872fbf91 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c @@ -64,7 +64,7 @@ void task_mem(struct seq_file *m, struct mm_struct *mm) else bytes += kobjsize(current->files); - if (current->sighand && atomic_read(¤t->sighand->count) > 1) + if (current->sighand && refcount_read(¤t->sighand->count) > 1) sbytes += kobjsize(current->sighand); else bytes += kobjsize(current->sighand); diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 13789d10a50e..37eeb1a28eba 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -8,13 +8,14 @@ #include #include #include +#include /* * Types defining task->signal and task->sighand and APIs using them: */ struct sighand_struct { - atomic_t count; + refcount_t count; struct k_sigaction action[_NSIG]; spinlock_t siglock; wait_queue_head_t signalfd_wqh; diff --git a/kernel/fork.c b/kernel/fork.c index b69248e6f0e0..370856d4c0b3 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1463,7 +1463,7 @@ static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk) struct sighand_struct *sig; if (clone_flags & CLONE_SIGHAND) { - atomic_inc(¤t->sighand->count); + refcount_inc(¤t->sighand->count); return 0; } sig = kmem_cache_alloc(sighand_cachep, GFP_KERNEL); @@ -1471,7 +1471,7 @@ static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk) if (!sig) return -ENOMEM; - atomic_set(&sig->count, 1); + refcount_set(&sig->count, 1); spin_lock_irq(¤t->sighand->siglock); memcpy(sig->action, current->sighand->action, sizeof(sig->action)); spin_unlock_irq(¤t->sighand->siglock); @@ -1480,7 +1480,7 @@ static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk) void __cleanup_sighand(struct sighand_struct *sighand) { - if (atomic_dec_and_test(&sighand->count)) { + if (refcount_dec_and_test(&sighand->count)) { signalfd_cleanup(sighand); /* * sighand_cachep is SLAB_TYPESAFE_BY_RCU so we can free it @@ -2439,7 +2439,7 @@ static int check_unshare_flags(unsigned long unshare_flags) return -EINVAL; } if (unshare_flags & (CLONE_SIGHAND | CLONE_VM)) { - if (atomic_read(¤t->sighand->count) > 1) + if (refcount_read(¤t->sighand->count) > 1) return -EINVAL; } if (unshare_flags & CLONE_VM) { -- cgit v1.2.3 From 60d4de3ff7f775509deba94b3db3c1abe55bf7a5 Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Fri, 18 Jan 2019 14:27:27 +0200 Subject: sched/core: Convert signal_struct.sigcnt to refcount_t atomic_t variables are currently used to implement reference counters with the following properties: - counter is initialized to 1 using atomic_set() - a resource is freed upon counter reaching zero - once counter reaches zero, its further increments aren't allowed - counter schema uses basic atomic operations (set, inc, inc_not_zero, dec_and_test, etc.) Such atomic variables should be converted to a newly provided refcount_t type and API that prevents accidental counter overflows and underflows. This is important since overflows and underflows can lead to use-after-free situation and be exploitable. The variable signal_struct.sigcnt is used as pure reference counter. Convert it to refcount_t and fix up the operations. ** Important note for maintainers: Some functions from refcount_t API defined in lib/refcount.c have different memory ordering guarantees than their atomic counterparts. The full comparison can be seen in https://lkml.org/lkml/2017/11/15/57 and it is hopefully soon in state to be merged to the documentation tree. Normally the differences should not matter since refcount_t provides enough guarantees to satisfy the refcounting use cases, but in some rare cases it might matter. Please double check that you don't have some undocumented memory guarantees for this variable usage. For the signal_struct.sigcnt it might make a difference in following places: - put_signal_struct(): decrement in refcount_dec_and_test() only provides RELEASE ordering and control dependency on success vs. fully ordered atomic counterpart Suggested-by: Kees Cook Signed-off-by: Elena Reshetova Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: David Windsor Reviewed-by: Hans Liljestrand Reviewed-by: Andrea Parri Reviewed-by: Oleg Nesterov Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: akpm@linux-foundation.org Cc: viro@zeniv.linux.org.uk Link: https://lkml.kernel.org/r/1547814450-18902-3-git-send-email-elena.reshetova@intel.com Signed-off-by: Ingo Molnar --- include/linux/sched/signal.h | 2 +- init/init_task.c | 2 +- kernel/fork.c | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 37eeb1a28eba..ae5655197698 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -83,7 +83,7 @@ struct multiprocess_signals { * the locking of signal_struct. */ struct signal_struct { - atomic_t sigcnt; + refcount_t sigcnt; atomic_t live; int nr_threads; struct list_head thread_head; diff --git a/init/init_task.c b/init/init_task.c index 5aebe3be4d7c..9aa3ebc74970 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -44,7 +44,7 @@ static struct signal_struct init_signals = { }; static struct sighand_struct init_sighand = { - .count = ATOMIC_INIT(1), + .count = REFCOUNT_INIT(1), .action = { { { .sa_handler = SIG_DFL, } }, }, .siglock = __SPIN_LOCK_UNLOCKED(init_sighand.siglock), .signalfd_wqh = __WAIT_QUEUE_HEAD_INITIALIZER(init_sighand.signalfd_wqh), diff --git a/kernel/fork.c b/kernel/fork.c index 370856d4c0b3..935a42d5f8ff 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -710,7 +710,7 @@ static inline void free_signal_struct(struct signal_struct *sig) static inline void put_signal_struct(struct signal_struct *sig) { - if (atomic_dec_and_test(&sig->sigcnt)) + if (refcount_dec_and_test(&sig->sigcnt)) free_signal_struct(sig); } @@ -1527,7 +1527,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) sig->nr_threads = 1; atomic_set(&sig->live, 1); - atomic_set(&sig->sigcnt, 1); + refcount_set(&sig->sigcnt, 1); /* list_add(thread_node, thread_head) without INIT_LIST_HEAD() */ sig->thread_head = (struct list_head)LIST_HEAD_INIT(tsk->thread_node); @@ -2082,7 +2082,7 @@ static __latent_entropy struct task_struct *copy_process( } else { current->signal->nr_threads++; atomic_inc(¤t->signal->live); - atomic_inc(¤t->signal->sigcnt); + refcount_inc(¤t->signal->sigcnt); task_join_group_stop(p); list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group); -- cgit v1.2.3 From ec1d281923cf81cc660343d0cb8ffc837ffb991d Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Fri, 18 Jan 2019 14:27:29 +0200 Subject: sched/core: Convert task_struct.usage to refcount_t atomic_t variables are currently used to implement reference counters with the following properties: - counter is initialized to 1 using atomic_set() - a resource is freed upon counter reaching zero - once counter reaches zero, its further increments aren't allowed - counter schema uses basic atomic operations (set, inc, inc_not_zero, dec_and_test, etc.) Such atomic variables should be converted to a newly provided refcount_t type and API that prevents accidental counter overflows and underflows. This is important since overflows and underflows can lead to use-after-free situation and be exploitable. The variable task_struct.usage is used as pure reference counter. Convert it to refcount_t and fix up the operations. ** Important note for maintainers: Some functions from refcount_t API defined in lib/refcount.c have different memory ordering guarantees than their atomic counterparts. The full comparison can be seen in https://lkml.org/lkml/2017/11/15/57 and it is hopefully soon in state to be merged to the documentation tree. Normally the differences should not matter since refcount_t provides enough guarantees to satisfy the refcounting use cases, but in some rare cases it might matter. Please double check that you don't have some undocumented memory guarantees for this variable usage. For the task_struct.usage it might make a difference in following places: - put_task_struct(): decrement in refcount_dec_and_test() only provides RELEASE ordering and control dependency on success vs. fully ordered atomic counterpart Suggested-by: Kees Cook Signed-off-by: Elena Reshetova Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: David Windsor Reviewed-by: Hans Liljestrand Reviewed-by: Andrea Parri Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: akpm@linux-foundation.org Cc: viro@zeniv.linux.org.uk Link: https://lkml.kernel.org/r/1547814450-18902-5-git-send-email-elena.reshetova@intel.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 3 ++- include/linux/sched/task.h | 4 ++-- init/init_task.c | 2 +- kernel/fork.c | 4 ++-- 4 files changed, 7 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index e2bba022827d..9d14d6864ca6 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -607,7 +608,7 @@ struct task_struct { randomized_struct_fields_start void *stack; - atomic_t usage; + refcount_t usage; /* Per task flags (PF_*), defined further below: */ unsigned int flags; unsigned int ptrace; diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index 44c6f15800ff..2e97a2227045 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -88,13 +88,13 @@ extern void sched_exec(void); #define sched_exec() {} #endif -#define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0) +#define get_task_struct(tsk) do { refcount_inc(&(tsk)->usage); } while(0) extern void __put_task_struct(struct task_struct *t); static inline void put_task_struct(struct task_struct *t) { - if (atomic_dec_and_test(&t->usage)) + if (refcount_dec_and_test(&t->usage)) __put_task_struct(t); } diff --git a/init/init_task.c b/init/init_task.c index 9aa3ebc74970..aca34c89529f 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -65,7 +65,7 @@ struct task_struct init_task #endif .state = 0, .stack = init_stack, - .usage = ATOMIC_INIT(2), + .usage = REFCOUNT_INIT(2), .flags = PF_KTHREAD, .prio = MAX_PRIO - 20, .static_prio = MAX_PRIO - 20, diff --git a/kernel/fork.c b/kernel/fork.c index 935a42d5f8ff..3f7e192e29f2 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -717,7 +717,7 @@ static inline void put_signal_struct(struct signal_struct *sig) void __put_task_struct(struct task_struct *tsk) { WARN_ON(!tsk->exit_state); - WARN_ON(atomic_read(&tsk->usage)); + WARN_ON(refcount_read(&tsk->usage)); WARN_ON(tsk == current); cgroup_free(tsk); @@ -896,7 +896,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) * One for us, one for whoever does the "release_task()" (usually * parent) */ - atomic_set(&tsk->usage, 2); + refcount_set(&tsk->usage, 2); #ifdef CONFIG_BLK_DEV_IO_TRACE tsk->btrace_seq = 0; #endif -- cgit v1.2.3 From f0b89d3958d73cd0785ec381f0ddf8efb6f183d8 Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Fri, 18 Jan 2019 14:27:30 +0200 Subject: sched/core: Convert task_struct.stack_refcount to refcount_t atomic_t variables are currently used to implement reference counters with the following properties: - counter is initialized to 1 using atomic_set() - a resource is freed upon counter reaching zero - once counter reaches zero, its further increments aren't allowed - counter schema uses basic atomic operations (set, inc, inc_not_zero, dec_and_test, etc.) Such atomic variables should be converted to a newly provided refcount_t type and API that prevents accidental counter overflows and underflows. This is important since overflows and underflows can lead to use-after-free situation and be exploitable. The variable task_struct.stack_refcount is used as pure reference counter. Convert it to refcount_t and fix up the operations. ** Important note for maintainers: Some functions from refcount_t API defined in lib/refcount.c have different memory ordering guarantees than their atomic counterparts. The full comparison can be seen in https://lkml.org/lkml/2017/11/15/57 and it is hopefully soon in state to be merged to the documentation tree. Normally the differences should not matter since refcount_t provides enough guarantees to satisfy the refcounting use cases, but in some rare cases it might matter. Please double check that you don't have some undocumented memory guarantees for this variable usage. For the task_struct.stack_refcount it might make a difference in following places: - try_get_task_stack(): increment in refcount_inc_not_zero() only guarantees control dependency on success vs. fully ordered atomic counterpart - put_task_stack(): decrement in refcount_dec_and_test() only provides RELEASE ordering and control dependency on success vs. fully ordered atomic counterpart Suggested-by: Kees Cook Signed-off-by: Elena Reshetova Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: David Windsor Reviewed-by: Hans Liljestrand Reviewed-by: Andrea Parri Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: akpm@linux-foundation.org Cc: viro@zeniv.linux.org.uk Link: https://lkml.kernel.org/r/1547814450-18902-6-git-send-email-elena.reshetova@intel.com Signed-off-by: Ingo Molnar --- include/linux/init_task.h | 1 + include/linux/sched.h | 2 +- include/linux/sched/task_stack.h | 2 +- init/init_task.c | 2 +- kernel/fork.c | 6 +++--- 5 files changed, 7 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index a7083a45a26c..6049baa5b8bc 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include diff --git a/include/linux/sched.h b/include/linux/sched.h index 9d14d6864ca6..628bf13cb5a5 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1194,7 +1194,7 @@ struct task_struct { #endif #ifdef CONFIG_THREAD_INFO_IN_TASK /* A live task holds one reference: */ - atomic_t stack_refcount; + refcount_t stack_refcount; #endif #ifdef CONFIG_LIVEPATCH int patch_state; diff --git a/include/linux/sched/task_stack.h b/include/linux/sched/task_stack.h index 6a841929073f..2413427e439c 100644 --- a/include/linux/sched/task_stack.h +++ b/include/linux/sched/task_stack.h @@ -61,7 +61,7 @@ static inline unsigned long *end_of_stack(struct task_struct *p) #ifdef CONFIG_THREAD_INFO_IN_TASK static inline void *try_get_task_stack(struct task_struct *tsk) { - return atomic_inc_not_zero(&tsk->stack_refcount) ? + return refcount_inc_not_zero(&tsk->stack_refcount) ? task_stack_page(tsk) : NULL; } diff --git a/init/init_task.c b/init/init_task.c index aca34c89529f..46dbf546264d 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -61,7 +61,7 @@ struct task_struct init_task = { #ifdef CONFIG_THREAD_INFO_IN_TASK .thread_info = INIT_THREAD_INFO(init_task), - .stack_refcount = ATOMIC_INIT(1), + .stack_refcount = REFCOUNT_INIT(1), #endif .state = 0, .stack = init_stack, diff --git a/kernel/fork.c b/kernel/fork.c index 3f7e192e29f2..77059b211608 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -429,7 +429,7 @@ static void release_task_stack(struct task_struct *tsk) #ifdef CONFIG_THREAD_INFO_IN_TASK void put_task_stack(struct task_struct *tsk) { - if (atomic_dec_and_test(&tsk->stack_refcount)) + if (refcount_dec_and_test(&tsk->stack_refcount)) release_task_stack(tsk); } #endif @@ -447,7 +447,7 @@ void free_task(struct task_struct *tsk) * If the task had a separate stack allocation, it should be gone * by now. */ - WARN_ON_ONCE(atomic_read(&tsk->stack_refcount) != 0); + WARN_ON_ONCE(refcount_read(&tsk->stack_refcount) != 0); #endif rt_mutex_debug_task_free(tsk); ftrace_graph_exit_task(tsk); @@ -867,7 +867,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) tsk->stack_vm_area = stack_vm_area; #endif #ifdef CONFIG_THREAD_INFO_IN_TASK - atomic_set(&tsk->stack_refcount, 1); + refcount_set(&tsk->stack_refcount, 1); #endif if (err) -- cgit v1.2.3 From 07879c6a3740fbbf3c8891a0ab484c20a12794d8 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Tue, 18 Dec 2018 11:53:52 -0800 Subject: sched/wake_q: Reduce reference counting for special users Some users, specifically futexes and rwsems, required fixes that allowed the callers to be safe when wakeups occur before they are expected by wake_up_q(). Such scenarios also play games and rely on reference counting, and until now were pivoting on wake_q doing it. With the wake_q_add() call being moved down, this can no longer be the case. As such we end up with a a double task refcounting overhead; and these callers care enough about this (being rather core-ish). This patch introduces a wake_q_add_safe() call that serves for callers that have already done refcounting and therefore the task is 'safe' from wake_q point of view (int that it requires reference throughout the entire queue/>wakeup cycle). In the one case it has internal reference counting, in the other case it consumes the reference counting. Signed-off-by: Davidlohr Bueso Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Waiman Long Cc: Will Deacon Cc: Xie Yongji Cc: Yongji Xie Cc: andrea.parri@amarulasolutions.com Cc: lilin24@baidu.com Cc: liuqi16@baidu.com Cc: nixun@baidu.com Cc: yuanlinsi01@baidu.com Cc: zhangyu31@baidu.com Link: https://lkml.kernel.org/r/20181218195352.7orq3upiwfdbrdne@linux-r8p5 Signed-off-by: Ingo Molnar --- include/linux/sched/wake_q.h | 4 +-- kernel/futex.c | 3 +-- kernel/locking/rwsem-xadd.c | 4 +-- kernel/sched/core.c | 60 ++++++++++++++++++++++++++++++++------------ 4 files changed, 48 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/wake_q.h b/include/linux/sched/wake_q.h index 545f37138057..ad826d2a4557 100644 --- a/include/linux/sched/wake_q.h +++ b/include/linux/sched/wake_q.h @@ -51,8 +51,8 @@ static inline void wake_q_init(struct wake_q_head *head) head->lastp = &head->first; } -extern void wake_q_add(struct wake_q_head *head, - struct task_struct *task); +extern void wake_q_add(struct wake_q_head *head, struct task_struct *task); +extern void wake_q_add_safe(struct wake_q_head *head, struct task_struct *task); extern void wake_up_q(struct wake_q_head *head); #endif /* _LINUX_SCHED_WAKE_Q_H */ diff --git a/kernel/futex.c b/kernel/futex.c index 69e619baf709..2abe1a0b3062 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -1463,8 +1463,7 @@ static void mark_wake_futex(struct wake_q_head *wake_q, struct futex_q *q) * Queue the task for later wakeup for after we've released * the hb->lock. wake_q_add() grabs reference to p. */ - wake_q_add(wake_q, p); - put_task_struct(p); + wake_q_add_safe(wake_q, p); } /* diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c index 50d9af615dc4..fbe96341beee 100644 --- a/kernel/locking/rwsem-xadd.c +++ b/kernel/locking/rwsem-xadd.c @@ -211,9 +211,7 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem, * Ensure issuing the wakeup (either by us or someone else) * after setting the reader waiter to nil. */ - wake_q_add(wake_q, tsk); - /* wake_q_add() already take the task ref */ - put_task_struct(tsk); + wake_q_add_safe(wake_q, tsk); } adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 3c8b4dba3d2d..64ceaa5158c5 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -396,19 +396,7 @@ static bool set_nr_if_polling(struct task_struct *p) #endif #endif -/** - * wake_q_add() - queue a wakeup for 'later' waking. - * @head: the wake_q_head to add @task to - * @task: the task to queue for 'later' wakeup - * - * Queue a task for later wakeup, most likely by the wake_up_q() call in the - * same context, _HOWEVER_ this is not guaranteed, the wakeup can come - * instantly. - * - * This function must be used as-if it were wake_up_process(); IOW the task - * must be ready to be woken at this location. - */ -void wake_q_add(struct wake_q_head *head, struct task_struct *task) +static bool __wake_q_add(struct wake_q_head *head, struct task_struct *task) { struct wake_q_node *node = &task->wake_q; @@ -422,15 +410,55 @@ void wake_q_add(struct wake_q_head *head, struct task_struct *task) */ smp_mb__before_atomic(); if (unlikely(cmpxchg_relaxed(&node->next, NULL, WAKE_Q_TAIL))) - return; - - get_task_struct(task); + return false; /* * The head is context local, there can be no concurrency. */ *head->lastp = node; head->lastp = &node->next; + return true; +} + +/** + * wake_q_add() - queue a wakeup for 'later' waking. + * @head: the wake_q_head to add @task to + * @task: the task to queue for 'later' wakeup + * + * Queue a task for later wakeup, most likely by the wake_up_q() call in the + * same context, _HOWEVER_ this is not guaranteed, the wakeup can come + * instantly. + * + * This function must be used as-if it were wake_up_process(); IOW the task + * must be ready to be woken at this location. + */ +void wake_q_add(struct wake_q_head *head, struct task_struct *task) +{ + if (__wake_q_add(head, task)) + get_task_struct(task); +} + +/** + * wake_q_add_safe() - safely queue a wakeup for 'later' waking. + * @head: the wake_q_head to add @task to + * @task: the task to queue for 'later' wakeup + * + * Queue a task for later wakeup, most likely by the wake_up_q() call in the + * same context, _HOWEVER_ this is not guaranteed, the wakeup can come + * instantly. + * + * This function must be used as-if it were wake_up_process(); IOW the task + * must be ready to be woken at this location. + * + * This function is essentially a task-safe equivalent to wake_q_add(). Callers + * that already hold reference to @task can call the 'safe' version and trust + * wake_q to do the right thing depending whether or not the @task is already + * queued for wakeup. + */ +void wake_q_add_safe(struct wake_q_head *head, struct task_struct *task) +{ + if (!__wake_q_add(head, task)) + put_task_struct(task); } void wake_up_q(struct wake_q_head *head) -- cgit v1.2.3 From 23127296889fe84b0762b191b5d041e8ba6f2599 Mon Sep 17 00:00:00 2001 From: Vincent Guittot Date: Wed, 23 Jan 2019 16:26:53 +0100 Subject: sched/fair: Update scale invariance of PELT The current implementation of load tracking invariance scales the contribution with current frequency and uarch performance (only for utilization) of the CPU. One main result of this formula is that the figures are capped by current capacity of CPU. Another one is that the load_avg is not invariant because not scaled with uarch. The util_avg of a periodic task that runs r time slots every p time slots varies in the range : U * (1-y^r)/(1-y^p) * y^i < Utilization < U * (1-y^r)/(1-y^p) with U is the max util_avg value = SCHED_CAPACITY_SCALE At a lower capacity, the range becomes: U * C * (1-y^r')/(1-y^p) * y^i' < Utilization < U * C * (1-y^r')/(1-y^p) with C reflecting the compute capacity ratio between current capacity and max capacity. so C tries to compensate changes in (1-y^r') but it can't be accurate. Instead of scaling the contribution value of PELT algo, we should scale the running time. The PELT signal aims to track the amount of computation of tasks and/or rq so it seems more correct to scale the running time to reflect the effective amount of computation done since the last update. In order to be fully invariant, we need to apply the same amount of running time and idle time whatever the current capacity. Because running at lower capacity implies that the task will run longer, we have to ensure that the same amount of idle time will be applied when system becomes idle and no idle time has been "stolen". But reaching the maximum utilization value (SCHED_CAPACITY_SCALE) means that the task is seen as an always-running task whatever the capacity of the CPU (even at max compute capacity). In this case, we can discard this "stolen" idle times which becomes meaningless. In order to achieve this time scaling, a new clock_pelt is created per rq. The increase of this clock scales with current capacity when something is running on rq and synchronizes with clock_task when rq is idle. With this mechanism, we ensure the same running and idle time whatever the current capacity. This also enables to simplify the pelt algorithm by removing all references of uarch and frequency and applying the same contribution to utilization and loads. Furthermore, the scaling is done only once per update of clock (update_rq_clock_task()) instead of during each update of sched_entities and cfs/rt/dl_rq of the rq like the current implementation. This is interesting when cgroup are involved as shown in the results below: On a hikey (octo Arm64 platform). Performance cpufreq governor and only shallowest c-state to remove variance generated by those power features so we only track the impact of pelt algo. each test runs 16 times: ./perf bench sched pipe (higher is better) kernel tip/sched/core + patch ops/seconds ops/seconds diff cgroup root 59652(+/- 0.18%) 59876(+/- 0.24%) +0.38% level1 55608(+/- 0.27%) 55923(+/- 0.24%) +0.57% level2 52115(+/- 0.29%) 52564(+/- 0.22%) +0.86% hackbench -l 1000 (lower is better) kernel tip/sched/core + patch duration(sec) duration(sec) diff cgroup root 4.453(+/- 2.37%) 4.383(+/- 2.88%) -1.57% level1 4.859(+/- 8.50%) 4.830(+/- 7.07%) -0.60% level2 5.063(+/- 9.83%) 4.928(+/- 9.66%) -2.66% Then, the responsiveness of PELT is improved when CPU is not running at max capacity with this new algorithm. I have put below some examples of duration to reach some typical load values according to the capacity of the CPU with current implementation and with this patch. These values has been computed based on the geometric series and the half period value: Util (%) max capacity half capacity(mainline) half capacity(w/ patch) 972 (95%) 138ms not reachable 276ms 486 (47.5%) 30ms 138ms 60ms 256 (25%) 13ms 32ms 26ms On my hikey (octo Arm64 platform) with schedutil governor, the time to reach max OPP when starting from a null utilization, decreases from 223ms with current scale invariance down to 121ms with the new algorithm. Signed-off-by: Vincent Guittot Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Mike Galbraith Cc: Morten.Rasmussen@arm.com Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: bsegall@google.com Cc: dietmar.eggemann@arm.com Cc: patrick.bellasi@arm.com Cc: pjt@google.com Cc: pkondeti@codeaurora.org Cc: quentin.perret@arm.com Cc: rjw@rjwysocki.net Cc: srinivas.pandruvada@linux.intel.com Cc: thara.gopinath@linaro.org Link: https://lkml.kernel.org/r/1548257214-13745-3-git-send-email-vincent.guittot@linaro.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 23 +++------- kernel/sched/core.c | 1 + kernel/sched/deadline.c | 6 +-- kernel/sched/fair.c | 45 ++++++++++--------- kernel/sched/pelt.c | 45 ++++++++++--------- kernel/sched/pelt.h | 114 ++++++++++++++++++++++++++++++++++++++++++++++-- kernel/sched/rt.c | 6 +-- kernel/sched/sched.h | 5 ++- 8 files changed, 177 insertions(+), 68 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 628bf13cb5a5..351c0fe64c85 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -357,12 +357,6 @@ struct util_est { * For cfs_rq, it is the aggregated load_avg of all runnable and * blocked sched_entities. * - * load_avg may also take frequency scaling into account: - * - * load_avg = runnable% * scale_load_down(load) * freq% - * - * where freq% is the CPU frequency normalized to the highest frequency. - * * [util_avg definition] * * util_avg = running% * SCHED_CAPACITY_SCALE @@ -371,17 +365,14 @@ struct util_est { * a CPU. For cfs_rq, it is the aggregated util_avg of all runnable * and blocked sched_entities. * - * util_avg may also factor frequency scaling and CPU capacity scaling: - * - * util_avg = running% * SCHED_CAPACITY_SCALE * freq% * capacity% - * - * where freq% is the same as above, and capacity% is the CPU capacity - * normalized to the greatest capacity (due to uarch differences, etc). + * load_avg and util_avg don't direcly factor frequency scaling and CPU + * capacity scaling. The scaling is done through the rq_clock_pelt that + * is used for computing those signals (see update_rq_clock_pelt()) * - * N.B., the above ratios (runnable%, running%, freq%, and capacity%) - * themselves are in the range of [0, 1]. To do fixed point arithmetics, - * we therefore scale them to as large a range as necessary. This is for - * example reflected by util_avg's SCHED_CAPACITY_SCALE. + * N.B., the above ratios (runnable% and running%) themselves are in the + * range of [0, 1]. To do fixed point arithmetics, we therefore scale them + * to as large a range as necessary. This is for example reflected by + * util_avg's SCHED_CAPACITY_SCALE. * * [Overflow issue] * diff --git a/kernel/sched/core.c b/kernel/sched/core.c index a674c7db2f29..32e06704565e 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -180,6 +180,7 @@ static void update_rq_clock_task(struct rq *rq, s64 delta) if ((irq_delta + steal) && sched_feat(NONTASK_CAPACITY)) update_irq_load_avg(rq, irq_delta + steal); #endif + update_rq_clock_pelt(rq, delta); } void update_rq_clock(struct rq *rq) diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index fb8b7b5d745d..6a73e41a2016 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -1767,7 +1767,7 @@ pick_next_task_dl(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) deadline_queue_push_tasks(rq); if (rq->curr->sched_class != &dl_sched_class) - update_dl_rq_load_avg(rq_clock_task(rq), rq, 0); + update_dl_rq_load_avg(rq_clock_pelt(rq), rq, 0); return p; } @@ -1776,7 +1776,7 @@ static void put_prev_task_dl(struct rq *rq, struct task_struct *p) { update_curr_dl(rq); - update_dl_rq_load_avg(rq_clock_task(rq), rq, 1); + update_dl_rq_load_avg(rq_clock_pelt(rq), rq, 1); if (on_dl_rq(&p->dl) && p->nr_cpus_allowed > 1) enqueue_pushable_dl_task(rq, p); } @@ -1793,7 +1793,7 @@ static void task_tick_dl(struct rq *rq, struct task_struct *p, int queued) { update_curr_dl(rq); - update_dl_rq_load_avg(rq_clock_task(rq), rq, 1); + update_dl_rq_load_avg(rq_clock_pelt(rq), rq, 1); /* * Even when we have runtime, update_curr_dl() might have resulted in us * not being the leftmost task anymore. In that case NEED_RESCHED will diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index da13e834e990..f41f2eec6186 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -673,9 +673,8 @@ static u64 sched_vslice(struct cfs_rq *cfs_rq, struct sched_entity *se) return calc_delta_fair(sched_slice(cfs_rq, se), se); } -#ifdef CONFIG_SMP #include "pelt.h" -#include "sched-pelt.h" +#ifdef CONFIG_SMP static int select_idle_sibling(struct task_struct *p, int prev_cpu, int cpu); static unsigned long task_h_load(struct task_struct *p); @@ -763,7 +762,7 @@ void post_init_entity_util_avg(struct sched_entity *se) * such that the next switched_to_fair() has the * expected state. */ - se->avg.last_update_time = cfs_rq_clock_task(cfs_rq); + se->avg.last_update_time = cfs_rq_clock_pelt(cfs_rq); return; } } @@ -3109,7 +3108,7 @@ void set_task_rq_fair(struct sched_entity *se, p_last_update_time = prev->avg.last_update_time; n_last_update_time = next->avg.last_update_time; #endif - __update_load_avg_blocked_se(p_last_update_time, cpu_of(rq_of(prev)), se); + __update_load_avg_blocked_se(p_last_update_time, se); se->avg.last_update_time = n_last_update_time; } @@ -3244,11 +3243,11 @@ update_tg_cfs_runnable(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cf /* * runnable_sum can't be lower than running_sum - * As running sum is scale with CPU capacity wehreas the runnable sum - * is not we rescale running_sum 1st + * Rescale running sum to be in the same range as runnable sum + * running_sum is in [0 : LOAD_AVG_MAX << SCHED_CAPACITY_SHIFT] + * runnable_sum is in [0 : LOAD_AVG_MAX] */ - running_sum = se->avg.util_sum / - arch_scale_cpu_capacity(NULL, cpu_of(rq_of(cfs_rq))); + running_sum = se->avg.util_sum >> SCHED_CAPACITY_SHIFT; runnable_sum = max(runnable_sum, running_sum); load_sum = (s64)se_weight(se) * runnable_sum; @@ -3351,7 +3350,7 @@ static inline void add_tg_cfs_propagate(struct cfs_rq *cfs_rq, long runnable_sum /** * update_cfs_rq_load_avg - update the cfs_rq's load/util averages - * @now: current time, as per cfs_rq_clock_task() + * @now: current time, as per cfs_rq_clock_pelt() * @cfs_rq: cfs_rq to update * * The cfs_rq avg is the direct sum of all its entities (blocked and runnable) @@ -3396,7 +3395,7 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq) decayed = 1; } - decayed |= __update_load_avg_cfs_rq(now, cpu_of(rq_of(cfs_rq)), cfs_rq); + decayed |= __update_load_avg_cfs_rq(now, cfs_rq); #ifndef CONFIG_64BIT smp_wmb(); @@ -3486,9 +3485,7 @@ static void detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s /* Update task and its cfs_rq load average */ static inline void update_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) { - u64 now = cfs_rq_clock_task(cfs_rq); - struct rq *rq = rq_of(cfs_rq); - int cpu = cpu_of(rq); + u64 now = cfs_rq_clock_pelt(cfs_rq); int decayed; /* @@ -3496,7 +3493,7 @@ static inline void update_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s * track group sched_entity load average for task_h_load calc in migration */ if (se->avg.last_update_time && !(flags & SKIP_AGE_LOAD)) - __update_load_avg_se(now, cpu, cfs_rq, se); + __update_load_avg_se(now, cfs_rq, se); decayed = update_cfs_rq_load_avg(now, cfs_rq); decayed |= propagate_entity_load_avg(se); @@ -3548,7 +3545,7 @@ void sync_entity_load_avg(struct sched_entity *se) u64 last_update_time; last_update_time = cfs_rq_last_update_time(cfs_rq); - __update_load_avg_blocked_se(last_update_time, cpu_of(rq_of(cfs_rq)), se); + __update_load_avg_blocked_se(last_update_time, se); } /* @@ -7015,6 +7012,12 @@ idle: if (new_tasks > 0) goto again; + /* + * rq is about to be idle, check if we need to update the + * lost_idle_time of clock_pelt + */ + update_idle_rq_clock_pelt(rq); + return NULL; } @@ -7657,7 +7660,7 @@ static void update_blocked_averages(int cpu) if (throttled_hierarchy(cfs_rq)) continue; - if (update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq)) + if (update_cfs_rq_load_avg(cfs_rq_clock_pelt(cfs_rq), cfs_rq)) update_tg_load_avg(cfs_rq, 0); /* Propagate pending load changes to the parent, if any: */ @@ -7671,8 +7674,8 @@ static void update_blocked_averages(int cpu) } curr_class = rq->curr->sched_class; - update_rt_rq_load_avg(rq_clock_task(rq), rq, curr_class == &rt_sched_class); - update_dl_rq_load_avg(rq_clock_task(rq), rq, curr_class == &dl_sched_class); + update_rt_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &rt_sched_class); + update_dl_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &dl_sched_class); update_irq_load_avg(rq, 0); /* Don't need periodic decay once load/util_avg are null */ if (others_have_blocked(rq)) @@ -7742,11 +7745,11 @@ static inline void update_blocked_averages(int cpu) rq_lock_irqsave(rq, &rf); update_rq_clock(rq); - update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq); + update_cfs_rq_load_avg(cfs_rq_clock_pelt(cfs_rq), cfs_rq); curr_class = rq->curr->sched_class; - update_rt_rq_load_avg(rq_clock_task(rq), rq, curr_class == &rt_sched_class); - update_dl_rq_load_avg(rq_clock_task(rq), rq, curr_class == &dl_sched_class); + update_rt_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &rt_sched_class); + update_dl_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &dl_sched_class); update_irq_load_avg(rq, 0); #ifdef CONFIG_NO_HZ_COMMON rq->last_blocked_load_update_tick = jiffies; diff --git a/kernel/sched/pelt.c b/kernel/sched/pelt.c index 90fb5bc12ad4..befce29bd882 100644 --- a/kernel/sched/pelt.c +++ b/kernel/sched/pelt.c @@ -26,7 +26,6 @@ #include #include "sched.h" -#include "sched-pelt.h" #include "pelt.h" /* @@ -106,16 +105,12 @@ static u32 __accumulate_pelt_segments(u64 periods, u32 d1, u32 d3) * n=1 */ static __always_inline u32 -accumulate_sum(u64 delta, int cpu, struct sched_avg *sa, +accumulate_sum(u64 delta, struct sched_avg *sa, unsigned long load, unsigned long runnable, int running) { - unsigned long scale_freq, scale_cpu; u32 contrib = (u32)delta; /* p == 0 -> delta < 1024 */ u64 periods; - scale_freq = arch_scale_freq_capacity(cpu); - scale_cpu = arch_scale_cpu_capacity(NULL, cpu); - delta += sa->period_contrib; periods = delta / 1024; /* A period is 1024us (~1ms) */ @@ -137,13 +132,12 @@ accumulate_sum(u64 delta, int cpu, struct sched_avg *sa, } sa->period_contrib = delta; - contrib = cap_scale(contrib, scale_freq); if (load) sa->load_sum += load * contrib; if (runnable) sa->runnable_load_sum += runnable * contrib; if (running) - sa->util_sum += contrib * scale_cpu; + sa->util_sum += contrib << SCHED_CAPACITY_SHIFT; return periods; } @@ -177,7 +171,7 @@ accumulate_sum(u64 delta, int cpu, struct sched_avg *sa, * = u_0 + u_1*y + u_2*y^2 + ... [re-labeling u_i --> u_{i+1}] */ static __always_inline int -___update_load_sum(u64 now, int cpu, struct sched_avg *sa, +___update_load_sum(u64 now, struct sched_avg *sa, unsigned long load, unsigned long runnable, int running) { u64 delta; @@ -221,7 +215,7 @@ ___update_load_sum(u64 now, int cpu, struct sched_avg *sa, * Step 1: accumulate *_sum since last_update_time. If we haven't * crossed period boundaries, finish. */ - if (!accumulate_sum(delta, cpu, sa, load, runnable, running)) + if (!accumulate_sum(delta, sa, load, runnable, running)) return 0; return 1; @@ -267,9 +261,9 @@ ___update_load_avg(struct sched_avg *sa, unsigned long load, unsigned long runna * runnable_load_avg = \Sum se->avg.runable_load_avg */ -int __update_load_avg_blocked_se(u64 now, int cpu, struct sched_entity *se) +int __update_load_avg_blocked_se(u64 now, struct sched_entity *se) { - if (___update_load_sum(now, cpu, &se->avg, 0, 0, 0)) { + if (___update_load_sum(now, &se->avg, 0, 0, 0)) { ___update_load_avg(&se->avg, se_weight(se), se_runnable(se)); return 1; } @@ -277,9 +271,9 @@ int __update_load_avg_blocked_se(u64 now, int cpu, struct sched_entity *se) return 0; } -int __update_load_avg_se(u64 now, int cpu, struct cfs_rq *cfs_rq, struct sched_entity *se) +int __update_load_avg_se(u64 now, struct cfs_rq *cfs_rq, struct sched_entity *se) { - if (___update_load_sum(now, cpu, &se->avg, !!se->on_rq, !!se->on_rq, + if (___update_load_sum(now, &se->avg, !!se->on_rq, !!se->on_rq, cfs_rq->curr == se)) { ___update_load_avg(&se->avg, se_weight(se), se_runnable(se)); @@ -290,9 +284,9 @@ int __update_load_avg_se(u64 now, int cpu, struct cfs_rq *cfs_rq, struct sched_e return 0; } -int __update_load_avg_cfs_rq(u64 now, int cpu, struct cfs_rq *cfs_rq) +int __update_load_avg_cfs_rq(u64 now, struct cfs_rq *cfs_rq) { - if (___update_load_sum(now, cpu, &cfs_rq->avg, + if (___update_load_sum(now, &cfs_rq->avg, scale_load_down(cfs_rq->load.weight), scale_load_down(cfs_rq->runnable_weight), cfs_rq->curr != NULL)) { @@ -317,7 +311,7 @@ int __update_load_avg_cfs_rq(u64 now, int cpu, struct cfs_rq *cfs_rq) int update_rt_rq_load_avg(u64 now, struct rq *rq, int running) { - if (___update_load_sum(now, rq->cpu, &rq->avg_rt, + if (___update_load_sum(now, &rq->avg_rt, running, running, running)) { @@ -340,7 +334,7 @@ int update_rt_rq_load_avg(u64 now, struct rq *rq, int running) int update_dl_rq_load_avg(u64 now, struct rq *rq, int running) { - if (___update_load_sum(now, rq->cpu, &rq->avg_dl, + if (___update_load_sum(now, &rq->avg_dl, running, running, running)) { @@ -365,22 +359,31 @@ int update_dl_rq_load_avg(u64 now, struct rq *rq, int running) int update_irq_load_avg(struct rq *rq, u64 running) { int ret = 0; + + /* + * We can't use clock_pelt because irq time is not accounted in + * clock_task. Instead we directly scale the running time to + * reflect the real amount of computation + */ + running = cap_scale(running, arch_scale_freq_capacity(cpu_of(rq))); + running = cap_scale(running, arch_scale_cpu_capacity(NULL, cpu_of(rq))); + /* * We know the time that has been used by interrupt since last update * but we don't when. Let be pessimistic and assume that interrupt has * happened just before the update. This is not so far from reality * because interrupt will most probably wake up task and trig an update - * of rq clock during which the metric si updated. + * of rq clock during which the metric is updated. * We start to decay with normal context time and then we add the * interrupt context time. * We can safely remove running from rq->clock because * rq->clock += delta with delta >= running */ - ret = ___update_load_sum(rq->clock - running, rq->cpu, &rq->avg_irq, + ret = ___update_load_sum(rq->clock - running, &rq->avg_irq, 0, 0, 0); - ret += ___update_load_sum(rq->clock, rq->cpu, &rq->avg_irq, + ret += ___update_load_sum(rq->clock, &rq->avg_irq, 1, 1, 1); diff --git a/kernel/sched/pelt.h b/kernel/sched/pelt.h index 7e56b489ff32..7489d5f56960 100644 --- a/kernel/sched/pelt.h +++ b/kernel/sched/pelt.h @@ -1,8 +1,9 @@ #ifdef CONFIG_SMP +#include "sched-pelt.h" -int __update_load_avg_blocked_se(u64 now, int cpu, struct sched_entity *se); -int __update_load_avg_se(u64 now, int cpu, struct cfs_rq *cfs_rq, struct sched_entity *se); -int __update_load_avg_cfs_rq(u64 now, int cpu, struct cfs_rq *cfs_rq); +int __update_load_avg_blocked_se(u64 now, struct sched_entity *se); +int __update_load_avg_se(u64 now, struct cfs_rq *cfs_rq, struct sched_entity *se); +int __update_load_avg_cfs_rq(u64 now, struct cfs_rq *cfs_rq); int update_rt_rq_load_avg(u64 now, struct rq *rq, int running); int update_dl_rq_load_avg(u64 now, struct rq *rq, int running); @@ -42,6 +43,101 @@ static inline void cfs_se_util_change(struct sched_avg *avg) WRITE_ONCE(avg->util_est.enqueued, enqueued); } +/* + * The clock_pelt scales the time to reflect the effective amount of + * computation done during the running delta time but then sync back to + * clock_task when rq is idle. + * + * + * absolute time | 1| 2| 3| 4| 5| 6| 7| 8| 9|10|11|12|13|14|15|16 + * @ max capacity ------******---------------******--------------- + * @ half capacity ------************---------************--------- + * clock pelt | 1| 2| 3| 4| 7| 8| 9| 10| 11|14|15|16 + * + */ +static inline void update_rq_clock_pelt(struct rq *rq, s64 delta) +{ + if (unlikely(is_idle_task(rq->curr))) { + /* The rq is idle, we can sync to clock_task */ + rq->clock_pelt = rq_clock_task(rq); + return; + } + + /* + * When a rq runs at a lower compute capacity, it will need + * more time to do the same amount of work than at max + * capacity. In order to be invariant, we scale the delta to + * reflect how much work has been really done. + * Running longer results in stealing idle time that will + * disturb the load signal compared to max capacity. This + * stolen idle time will be automatically reflected when the + * rq will be idle and the clock will be synced with + * rq_clock_task. + */ + + /* + * Scale the elapsed time to reflect the real amount of + * computation + */ + delta = cap_scale(delta, arch_scale_cpu_capacity(NULL, cpu_of(rq))); + delta = cap_scale(delta, arch_scale_freq_capacity(cpu_of(rq))); + + rq->clock_pelt += delta; +} + +/* + * When rq becomes idle, we have to check if it has lost idle time + * because it was fully busy. A rq is fully used when the /Sum util_sum + * is greater or equal to: + * (LOAD_AVG_MAX - 1024 + rq->cfs.avg.period_contrib) << SCHED_CAPACITY_SHIFT; + * For optimization and computing rounding purpose, we don't take into account + * the position in the current window (period_contrib) and we use the higher + * bound of util_sum to decide. + */ +static inline void update_idle_rq_clock_pelt(struct rq *rq) +{ + u32 divider = ((LOAD_AVG_MAX - 1024) << SCHED_CAPACITY_SHIFT) - LOAD_AVG_MAX; + u32 util_sum = rq->cfs.avg.util_sum; + util_sum += rq->avg_rt.util_sum; + util_sum += rq->avg_dl.util_sum; + + /* + * Reflecting stolen time makes sense only if the idle + * phase would be present at max capacity. As soon as the + * utilization of a rq has reached the maximum value, it is + * considered as an always runnig rq without idle time to + * steal. This potential idle time is considered as lost in + * this case. We keep track of this lost idle time compare to + * rq's clock_task. + */ + if (util_sum >= divider) + rq->lost_idle_time += rq_clock_task(rq) - rq->clock_pelt; +} + +static inline u64 rq_clock_pelt(struct rq *rq) +{ + lockdep_assert_held(&rq->lock); + assert_clock_updated(rq); + + return rq->clock_pelt - rq->lost_idle_time; +} + +#ifdef CONFIG_CFS_BANDWIDTH +/* rq->task_clock normalized against any time this cfs_rq has spent throttled */ +static inline u64 cfs_rq_clock_pelt(struct cfs_rq *cfs_rq) +{ + if (unlikely(cfs_rq->throttle_count)) + return cfs_rq->throttled_clock_task - cfs_rq->throttled_clock_task_time; + + return rq_clock_pelt(rq_of(cfs_rq)) - cfs_rq->throttled_clock_task_time; +} +#else +static inline u64 cfs_rq_clock_pelt(struct cfs_rq *cfs_rq) +{ + return rq_clock_pelt(rq_of(cfs_rq)); +} +#endif + #else static inline int @@ -67,6 +163,18 @@ update_irq_load_avg(struct rq *rq, u64 running) { return 0; } + +static inline u64 rq_clock_pelt(struct rq *rq) +{ + return rq_clock_task(rq); +} + +static inline void +update_rq_clock_pelt(struct rq *rq, s64 delta) { } + +static inline void +update_idle_rq_clock_pelt(struct rq *rq) { } + #endif diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index e4f398ad9e73..90fa23d36565 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -1587,7 +1587,7 @@ pick_next_task_rt(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) * rt task */ if (rq->curr->sched_class != &rt_sched_class) - update_rt_rq_load_avg(rq_clock_task(rq), rq, 0); + update_rt_rq_load_avg(rq_clock_pelt(rq), rq, 0); return p; } @@ -1596,7 +1596,7 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p) { update_curr_rt(rq); - update_rt_rq_load_avg(rq_clock_task(rq), rq, 1); + update_rt_rq_load_avg(rq_clock_pelt(rq), rq, 1); /* * The previous task needs to be made eligible for pushing @@ -2325,7 +2325,7 @@ static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued) struct sched_rt_entity *rt_se = &p->rt; update_curr_rt(rq); - update_rt_rq_load_avg(rq_clock_task(rq), rq, 1); + update_rt_rq_load_avg(rq_clock_pelt(rq), rq, 1); watchdog(rq, p); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 0ed130fae2a9..fe31bc472f3e 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -861,7 +861,10 @@ struct rq { unsigned int clock_update_flags; u64 clock; - u64 clock_task; + /* Ensure that all clocks are in the same cache line */ + u64 clock_task ____cacheline_aligned; + u64 clock_pelt; + unsigned long lost_idle_time; atomic_t nr_iowait; -- cgit v1.2.3 From c546951d9c9300065bad253ecdf1ac59ce9d06c8 Mon Sep 17 00:00:00 2001 From: Andrea Parri Date: Mon, 21 Jan 2019 16:52:40 +0100 Subject: sched/core: Use READ_ONCE()/WRITE_ONCE() in move_queued_task()/task_rq_lock() move_queued_task() synchronizes with task_rq_lock() as follows: move_queued_task() task_rq_lock() [S] ->on_rq = MIGRATING [L] rq = task_rq() WMB (__set_task_cpu()) ACQUIRE (rq->lock); [S] ->cpu = new_cpu [L] ->on_rq where "[L] rq = task_rq()" is ordered before "ACQUIRE (rq->lock)" by an address dependency and, in turn, "ACQUIRE (rq->lock)" is ordered before "[L] ->on_rq" by the ACQUIRE itself. Use READ_ONCE() to load ->cpu in task_rq() (c.f., task_cpu()) to honor this address dependency. Also, mark the accesses to ->cpu and ->on_rq with READ_ONCE()/WRITE_ONCE() to comply with the LKMM. Signed-off-by: Andrea Parri Signed-off-by: Peter Zijlstra (Intel) Cc: Alan Stern Cc: Linus Torvalds Cc: Mike Galbraith Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Will Deacon Link: https://lkml.kernel.org/r/20190121155240.27173-1-andrea.parri@amarulasolutions.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 4 ++-- kernel/sched/core.c | 9 +++++---- kernel/sched/sched.h | 6 +++--- 3 files changed, 10 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 351c0fe64c85..4112639c2a85 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1745,9 +1745,9 @@ static __always_inline bool need_resched(void) static inline unsigned int task_cpu(const struct task_struct *p) { #ifdef CONFIG_THREAD_INFO_IN_TASK - return p->cpu; + return READ_ONCE(p->cpu); #else - return task_thread_info(p)->cpu; + return READ_ONCE(task_thread_info(p)->cpu); #endif } diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 32e06704565e..ec1b67a195cc 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -107,11 +107,12 @@ struct rq *task_rq_lock(struct task_struct *p, struct rq_flags *rf) * [L] ->on_rq * RELEASE (rq->lock) * - * If we observe the old CPU in task_rq_lock, the acquire of + * If we observe the old CPU in task_rq_lock(), the acquire of * the old rq->lock will fully serialize against the stores. * - * If we observe the new CPU in task_rq_lock, the acquire will - * pair with the WMB to ensure we must then also see migrating. + * If we observe the new CPU in task_rq_lock(), the address + * dependency headed by '[L] rq = task_rq()' and the acquire + * will pair with the WMB to ensure we then also see migrating. */ if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) { rq_pin_lock(rq, rf); @@ -916,7 +917,7 @@ static struct rq *move_queued_task(struct rq *rq, struct rq_flags *rf, { lockdep_assert_held(&rq->lock); - p->on_rq = TASK_ON_RQ_MIGRATING; + WRITE_ONCE(p->on_rq, TASK_ON_RQ_MIGRATING); dequeue_task(rq, p, DEQUEUE_NOCLOCK); set_task_cpu(p, new_cpu); rq_unlock(rq, rf); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 99e2a7772d16..c688ef5012e5 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1479,9 +1479,9 @@ static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu) */ smp_wmb(); #ifdef CONFIG_THREAD_INFO_IN_TASK - p->cpu = cpu; + WRITE_ONCE(p->cpu, cpu); #else - task_thread_info(p)->cpu = cpu; + WRITE_ONCE(task_thread_info(p)->cpu, cpu); #endif p->wake_cpu = cpu; #endif @@ -1582,7 +1582,7 @@ static inline int task_on_rq_queued(struct task_struct *p) static inline int task_on_rq_migrating(struct task_struct *p) { - return p->on_rq == TASK_ON_RQ_MIGRATING; + return READ_ONCE(p->on_rq) == TASK_ON_RQ_MIGRATING; } /* -- cgit v1.2.3 From 77000bc43da17d5d6bc4ebfaf44d52d43bb69492 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 4 Feb 2019 16:31:04 +0100 Subject: uio: remove the unused iov_for_each macro Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- .clang-format | 1 - include/linux/uio.h | 8 -------- 2 files changed, 9 deletions(-) (limited to 'include/linux') diff --git a/.clang-format b/.clang-format index e6080f5834a3..c144d9c24d5d 100644 --- a/.clang-format +++ b/.clang-format @@ -259,7 +259,6 @@ ForEachMacros: - 'idr_for_each_entry_ul' - 'inet_bind_bucket_for_each' - 'inet_lhash2_for_each_icsk_rcu' - - 'iov_for_each' - 'key_for_each' - 'key_for_each_safe' - 'klp_for_each_func' diff --git a/include/linux/uio.h b/include/linux/uio.h index ecf584f6b82d..87477e1640f9 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -110,14 +110,6 @@ static inline struct iovec iov_iter_iovec(const struct iov_iter *iter) }; } -#define iov_for_each(iov, iter, start) \ - if (iov_iter_type(start) == ITER_IOVEC || \ - iov_iter_type(start) == ITER_KVEC) \ - for (iter = (start); \ - (iter).count && \ - ((iov = iov_iter_iovec(&(iter))), 1); \ - iov_iter_advance(&(iter), (iov).iov_len)) - size_t iov_iter_copy_from_user_atomic(struct page *page, struct iov_iter *i, unsigned long offset, size_t bytes); void iov_iter_advance(struct iov_iter *i, size_t bytes); -- cgit v1.2.3 From 960587285a56ec3cafb4d1e6b25c19eced4d0bce Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 2 Feb 2019 10:16:59 +0100 Subject: netfilter: nat: remove module dependency on ipv6 core nf_nat_ipv6 calls two ipv6 core functions, so add those to v6ops to avoid the module dependency. This is a prerequisite for merging ipv4 and ipv6 nat implementations. Add wrappers to avoid the indirection if ipv6 is builtin. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_ipv6.h | 6 ++++++ net/ipv6/netfilter.c | 4 ++++ net/ipv6/netfilter/nf_nat_l3proto_ipv6.c | 17 ++++++++++++++++- net/ipv6/netfilter/nf_nat_masquerade_ipv6.c | 21 +++++++++++++++++++-- 4 files changed, 45 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h index c0dc4dd78887..ad4223c10488 100644 --- a/include/linux/netfilter_ipv6.h +++ b/include/linux/netfilter_ipv6.h @@ -33,6 +33,12 @@ struct nf_ipv6_ops { int (*route)(struct net *net, struct dst_entry **dst, struct flowi *fl, bool strict); int (*reroute)(struct sk_buff *skb, const struct nf_queue_entry *entry); +#if IS_MODULE(CONFIG_IPV6) + int (*route_me_harder)(struct net *net, struct sk_buff *skb); + int (*dev_get_saddr)(struct net *net, const struct net_device *dev, + const struct in6_addr *daddr, unsigned int srcprefs, + struct in6_addr *saddr); +#endif }; #ifdef CONFIG_NETFILTER diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 8b075f0bc351..0a5caf263889 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -112,6 +112,10 @@ static const struct nf_ipv6_ops ipv6ops = { .fragment = ip6_fragment, .route = nf_ip6_route, .reroute = nf_ip6_reroute, +#if IS_MODULE(CONFIG_IPV6) + .route_me_harder = ip6_route_me_harder, + .dev_get_saddr = ipv6_dev_get_saddr, +#endif }; int __init ipv6_netfilter_init(void) diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c index 9c914db44bec..b52026adb3e7 100644 --- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -317,6 +318,20 @@ nf_nat_ipv6_out(void *priv, struct sk_buff *skb, return ret; } +static int nat_route_me_harder(struct net *net, struct sk_buff *skb) +{ +#ifdef CONFIG_IPV6_MODULE + const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops(); + + if (!v6_ops) + return -EHOSTUNREACH; + + return v6_ops->route_me_harder(net, skb); +#else + return ip6_route_me_harder(net, skb); +#endif +} + static unsigned int nf_nat_ipv6_local_fn(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) @@ -333,7 +348,7 @@ nf_nat_ipv6_local_fn(void *priv, struct sk_buff *skb, if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3, &ct->tuplehash[!dir].tuple.src.u3)) { - err = ip6_route_me_harder(state->net, skb); + err = nat_route_me_harder(state->net, skb); if (err < 0) ret = NF_DROP_ERR(err); } diff --git a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c index 0ad0da5a2600..fd313b726263 100644 --- a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c +++ b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c @@ -24,6 +24,23 @@ static atomic_t v6_worker_count; +static int +nat_ipv6_dev_get_saddr(struct net *net, const struct net_device *dev, + const struct in6_addr *daddr, unsigned int srcprefs, + struct in6_addr *saddr) +{ +#ifdef CONFIG_IPV6_MODULE + const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops(); + + if (!v6_ops) + return -EHOSTUNREACH; + + return v6_ops->dev_get_saddr(net, dev, daddr, srcprefs, saddr); +#else + return ipv6_dev_get_saddr(net, dev, daddr, srcprefs, saddr); +#endif +} + unsigned int nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range, const struct net_device *out) @@ -38,8 +55,8 @@ nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range, WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY))); - if (ipv6_dev_get_saddr(nf_ct_net(ct), out, - &ipv6_hdr(skb)->daddr, 0, &src) < 0) + if (nat_ipv6_dev_get_saddr(nf_ct_net(ct), out, + &ipv6_hdr(skb)->daddr, 0, &src) < 0) return NF_DROP; nat = nf_ct_nat_ext_add(ct); -- cgit v1.2.3 From ac02bcf9cc1e4aefb0a7156a2ae26e8396b15f24 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 2 Feb 2019 10:17:00 +0100 Subject: netfilter: ipv6: avoid indirect calls for IPV6=y case indirect calls are only needed if ipv6 is a module. Add helpers to abstract the v6ops indirections and use them instead. fragment, reroute and route_input are kept as indirect calls. The first two are not not used in hot path and route_input is only used by bridge netfilter. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_ipv6.h | 64 +++++++++++++++++++++++++++++++-------- net/ipv6/netfilter.c | 15 ++++----- net/ipv6/netfilter/nft_fib_ipv6.c | 9 ++---- net/netfilter/utils.c | 6 ++-- net/netfilter/xt_addrtype.c | 16 +++------- 5 files changed, 68 insertions(+), 42 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h index ad4223c10488..471e9467105b 100644 --- a/include/linux/netfilter_ipv6.h +++ b/include/linux/netfilter_ipv6.h @@ -25,29 +25,24 @@ struct nf_queue_entry; * if IPv6 is a module. */ struct nf_ipv6_ops { +#if IS_MODULE(CONFIG_IPV6) int (*chk_addr)(struct net *net, const struct in6_addr *addr, const struct net_device *dev, int strict); - void (*route_input)(struct sk_buff *skb); - int (*fragment)(struct net *net, struct sock *sk, struct sk_buff *skb, - int (*output)(struct net *, struct sock *, struct sk_buff *)); - int (*route)(struct net *net, struct dst_entry **dst, struct flowi *fl, - bool strict); - int (*reroute)(struct sk_buff *skb, const struct nf_queue_entry *entry); -#if IS_MODULE(CONFIG_IPV6) int (*route_me_harder)(struct net *net, struct sk_buff *skb); int (*dev_get_saddr)(struct net *net, const struct net_device *dev, const struct in6_addr *daddr, unsigned int srcprefs, struct in6_addr *saddr); + int (*route)(struct net *net, struct dst_entry **dst, struct flowi *fl, + bool strict); #endif + void (*route_input)(struct sk_buff *skb); + int (*fragment)(struct net *net, struct sock *sk, struct sk_buff *skb, + int (*output)(struct net *, struct sock *, struct sk_buff *)); + int (*reroute)(struct sk_buff *skb, const struct nf_queue_entry *entry); }; #ifdef CONFIG_NETFILTER -int ip6_route_me_harder(struct net *net, struct sk_buff *skb); -__sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook, - unsigned int dataoff, u_int8_t protocol); - -int ipv6_netfilter_init(void); -void ipv6_netfilter_fini(void); +#include extern const struct nf_ipv6_ops __rcu *nf_ipv6_ops; static inline const struct nf_ipv6_ops *nf_get_ipv6_ops(void) @@ -55,6 +50,49 @@ static inline const struct nf_ipv6_ops *nf_get_ipv6_ops(void) return rcu_dereference(nf_ipv6_ops); } +static inline int nf_ipv6_chk_addr(struct net *net, const struct in6_addr *addr, + const struct net_device *dev, int strict) +{ +#if IS_MODULE(CONFIG_IPV6) + const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops(); + + if (!v6_ops) + return 1; + + return v6_ops->chk_addr(net, addr, dev, strict); +#else + return ipv6_chk_addr(net, addr, dev, strict); +#endif +} + +int __nf_ip6_route(struct net *net, struct dst_entry **dst, + struct flowi *fl, bool strict); + +static inline int nf_ip6_route(struct net *net, struct dst_entry **dst, + struct flowi *fl, bool strict) +{ +#if IS_MODULE(CONFIG_IPV6) + const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops(); + + if (v6ops) + return v6ops->route(net, dst, fl, strict); + + return -EHOSTUNREACH; +#endif +#if IS_BUILTIN(CONFIG_IPV6) + return __nf_ip6_route(net, dst, fl, strict); +#else + return -EHOSTUNREACH; +#endif +} + +int ip6_route_me_harder(struct net *net, struct sk_buff *skb); +__sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook, + unsigned int dataoff, u_int8_t protocol); + +int ipv6_netfilter_init(void); +void ipv6_netfilter_fini(void); + #else /* CONFIG_NETFILTER */ static inline int ipv6_netfilter_init(void) { return 0; } static inline void ipv6_netfilter_fini(void) { return; } diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 0a5caf263889..a8263031f3a6 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -84,8 +84,8 @@ static int nf_ip6_reroute(struct sk_buff *skb, return 0; } -static int nf_ip6_route(struct net *net, struct dst_entry **dst, - struct flowi *fl, bool strict) +int __nf_ip6_route(struct net *net, struct dst_entry **dst, + struct flowi *fl, bool strict) { static const struct ipv6_pinfo fake_pinfo; static const struct inet_sock fake_sk = { @@ -105,17 +105,18 @@ static int nf_ip6_route(struct net *net, struct dst_entry **dst, *dst = result; return err; } +EXPORT_SYMBOL_GPL(__nf_ip6_route); static const struct nf_ipv6_ops ipv6ops = { - .chk_addr = ipv6_chk_addr, - .route_input = ip6_route_input, - .fragment = ip6_fragment, - .route = nf_ip6_route, - .reroute = nf_ip6_reroute, #if IS_MODULE(CONFIG_IPV6) + .chk_addr = ipv6_chk_addr, .route_me_harder = ip6_route_me_harder, .dev_get_saddr = ipv6_dev_get_saddr, + .route = __nf_ip6_route, #endif + .route_input = ip6_route_input, + .fragment = ip6_fragment, + .reroute = nf_ip6_reroute, }; int __init ipv6_netfilter_init(void) diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c index 36be3cf0adef..73cdc0bc63f7 100644 --- a/net/ipv6/netfilter/nft_fib_ipv6.c +++ b/net/ipv6/netfilter/nft_fib_ipv6.c @@ -59,7 +59,6 @@ static u32 __nft_fib6_eval_type(const struct nft_fib *priv, struct ipv6hdr *iph) { const struct net_device *dev = NULL; - const struct nf_ipv6_ops *v6ops; int route_err, addrtype; struct rt6_info *rt; struct flowi6 fl6 = { @@ -68,10 +67,6 @@ static u32 __nft_fib6_eval_type(const struct nft_fib *priv, }; u32 ret = 0; - v6ops = nf_get_ipv6_ops(); - if (!v6ops) - return RTN_UNREACHABLE; - if (priv->flags & NFTA_FIB_F_IIF) dev = nft_in(pkt); else if (priv->flags & NFTA_FIB_F_OIF) @@ -79,10 +74,10 @@ static u32 __nft_fib6_eval_type(const struct nft_fib *priv, nft_fib6_flowi_init(&fl6, priv, pkt, dev, iph); - if (dev && v6ops->chk_addr(nft_net(pkt), &fl6.daddr, dev, true)) + if (dev && nf_ipv6_chk_addr(nft_net(pkt), &fl6.daddr, dev, true)) ret = RTN_LOCAL; - route_err = v6ops->route(nft_net(pkt), (struct dst_entry **)&rt, + route_err = nf_ip6_route(nft_net(pkt), (struct dst_entry **)&rt, flowi6_to_flowi(&fl6), false); if (route_err) goto err; diff --git a/net/netfilter/utils.c b/net/netfilter/utils.c index 55af9f247993..06dc55590441 100644 --- a/net/netfilter/utils.c +++ b/net/netfilter/utils.c @@ -162,7 +162,7 @@ EXPORT_SYMBOL_GPL(nf_checksum_partial); int nf_route(struct net *net, struct dst_entry **dst, struct flowi *fl, bool strict, unsigned short family) { - const struct nf_ipv6_ops *v6ops; + const struct nf_ipv6_ops *v6ops __maybe_unused; int ret = 0; switch (family) { @@ -170,9 +170,7 @@ int nf_route(struct net *net, struct dst_entry **dst, struct flowi *fl, ret = nf_ip_route(net, dst, fl, strict); break; case AF_INET6: - v6ops = rcu_dereference(nf_ipv6_ops); - if (v6ops) - ret = v6ops->route(net, dst, fl, strict); + ret = nf_ip6_route(net, dst, fl, strict); break; } diff --git a/net/netfilter/xt_addrtype.c b/net/netfilter/xt_addrtype.c index 89e281b3bfc2..29987ff03621 100644 --- a/net/netfilter/xt_addrtype.c +++ b/net/netfilter/xt_addrtype.c @@ -36,7 +36,6 @@ MODULE_ALIAS("ip6t_addrtype"); static u32 match_lookup_rt6(struct net *net, const struct net_device *dev, const struct in6_addr *addr, u16 mask) { - const struct nf_ipv6_ops *v6ops; struct flowi6 flow; struct rt6_info *rt; u32 ret = 0; @@ -47,18 +46,13 @@ static u32 match_lookup_rt6(struct net *net, const struct net_device *dev, if (dev) flow.flowi6_oif = dev->ifindex; - v6ops = nf_get_ipv6_ops(); - if (v6ops) { - if (dev && (mask & XT_ADDRTYPE_LOCAL)) { - if (v6ops->chk_addr(net, addr, dev, true)) - ret = XT_ADDRTYPE_LOCAL; - } - route_err = v6ops->route(net, (struct dst_entry **)&rt, - flowi6_to_flowi(&flow), false); - } else { - route_err = 1; + if (dev && (mask & XT_ADDRTYPE_LOCAL)) { + if (nf_ipv6_chk_addr(net, addr, dev, true)) + ret = XT_ADDRTYPE_LOCAL; } + route_err = nf_ip6_route(net, (struct dst_entry **)&rt, + flowi6_to_flowi(&flow), false); if (route_err) return XT_ADDRTYPE_UNREACHABLE; -- cgit v1.2.3 From 278311e417be60f7caef6fcb12bda4da2711ceff Mon Sep 17 00:00:00 2001 From: Kairui Song Date: Mon, 21 Jan 2019 17:59:29 +0800 Subject: kexec, KEYS: Make use of platform keyring for signature verify This patch allows the kexec_file_load syscall to verify the PE signed kernel image signature based on the preboot keys stored in the .platform keyring, as fall back, if the signature verification failed due to not finding the public key in the secondary or builtin keyrings. This commit adds a VERIFY_USE_PLATFORM_KEYRING similar to previous VERIFY_USE_SECONDARY_KEYRING indicating that verify_pkcs7_signature should verify the signature using platform keyring. Also, decrease the error message log level when verification failed with -ENOKEY, so that if called tried multiple time with different keyring it won't generate extra noises. Signed-off-by: Kairui Song Cc: David Howells Acked-by: Dave Young (for kexec_file_load part) [zohar@linux.ibm.com: tweaked the first paragraph of the patch description, and fixed checkpatch warning.] Signed-off-by: Mimi Zohar --- arch/x86/kernel/kexec-bzimage64.c | 14 +++++++++++--- certs/system_keyring.c | 13 ++++++++++++- include/linux/verification.h | 1 + 3 files changed, 24 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c index 278cd07228dd..e1215a600064 100644 --- a/arch/x86/kernel/kexec-bzimage64.c +++ b/arch/x86/kernel/kexec-bzimage64.c @@ -531,9 +531,17 @@ static int bzImage64_cleanup(void *loader_data) #ifdef CONFIG_KEXEC_BZIMAGE_VERIFY_SIG static int bzImage64_verify_sig(const char *kernel, unsigned long kernel_len) { - return verify_pefile_signature(kernel, kernel_len, - VERIFY_USE_SECONDARY_KEYRING, - VERIFYING_KEXEC_PE_SIGNATURE); + int ret; + + ret = verify_pefile_signature(kernel, kernel_len, + VERIFY_USE_SECONDARY_KEYRING, + VERIFYING_KEXEC_PE_SIGNATURE); + if (ret == -ENOKEY && IS_ENABLED(CONFIG_INTEGRITY_PLATFORM_KEYRING)) { + ret = verify_pefile_signature(kernel, kernel_len, + VERIFY_USE_PLATFORM_KEYRING, + VERIFYING_KEXEC_PE_SIGNATURE); + } + return ret; } #endif diff --git a/certs/system_keyring.c b/certs/system_keyring.c index da055e901df4..c05c29ae4d5d 100644 --- a/certs/system_keyring.c +++ b/certs/system_keyring.c @@ -240,11 +240,22 @@ int verify_pkcs7_signature(const void *data, size_t len, #else trusted_keys = builtin_trusted_keys; #endif + } else if (trusted_keys == VERIFY_USE_PLATFORM_KEYRING) { +#ifdef CONFIG_INTEGRITY_PLATFORM_KEYRING + trusted_keys = platform_trusted_keys; +#else + trusted_keys = NULL; +#endif + if (!trusted_keys) { + ret = -ENOKEY; + pr_devel("PKCS#7 platform keyring is not available\n"); + goto error; + } } ret = pkcs7_validate_trust(pkcs7, trusted_keys); if (ret < 0) { if (ret == -ENOKEY) - pr_err("PKCS#7 signature not signed with a trusted key\n"); + pr_devel("PKCS#7 signature not signed with a trusted key\n"); goto error; } diff --git a/include/linux/verification.h b/include/linux/verification.h index cfa4730d607a..018fb5f13d44 100644 --- a/include/linux/verification.h +++ b/include/linux/verification.h @@ -17,6 +17,7 @@ * should be used. */ #define VERIFY_USE_SECONDARY_KEYRING ((struct key *)1UL) +#define VERIFY_USE_PLATFORM_KEYRING ((struct key *)2UL) /* * The use to which an asymmetric key is being put. -- cgit v1.2.3 From fdb2410f7702f25f82804a261f90ad03422bd2c3 Mon Sep 17 00:00:00 2001 From: Mimi Zohar Date: Tue, 22 Jan 2019 14:06:49 -0600 Subject: ima: define ima_post_create_tmpfile() hook and add missing call If tmpfiles can be made persistent, then newly created tmpfiles need to be treated like any other new files in policy. This patch indicates which newly created tmpfiles are in policy, causing the file hash to be calculated on __fput(). Reported-by: Ignaz Forster [rgoldwyn@suse.com: Call ima_post_create_tmpfile() in vfs_tmpfile() as opposed to do_tmpfile(). This will help the case for overlayfs where copy_up is denied while overwriting a file.] Signed-off-by: Goldwyn Rodrigues Signed-off-by: Mimi Zohar --- fs/namei.c | 1 + include/linux/ima.h | 5 +++++ security/integrity/ima/ima_main.c | 35 +++++++++++++++++++++++++++++++++-- 3 files changed, 39 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/fs/namei.c b/fs/namei.c index 914178cdbe94..373a7ec4b09d 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3462,6 +3462,7 @@ struct dentry *vfs_tmpfile(struct dentry *dentry, umode_t mode, int open_flag) inode->i_state |= I_LINKABLE; spin_unlock(&inode->i_lock); } + ima_post_create_tmpfile(inode); return child; out_err: diff --git a/include/linux/ima.h b/include/linux/ima.h index b5e16b8c50b7..dc12fbcf484c 100644 --- a/include/linux/ima.h +++ b/include/linux/ima.h @@ -18,6 +18,7 @@ struct linux_binprm; #ifdef CONFIG_IMA extern int ima_bprm_check(struct linux_binprm *bprm); extern int ima_file_check(struct file *file, int mask); +extern void ima_post_create_tmpfile(struct inode *inode); extern void ima_file_free(struct file *file); extern int ima_file_mmap(struct file *file, unsigned long prot); extern int ima_load_data(enum kernel_load_data_id id); @@ -56,6 +57,10 @@ static inline int ima_file_check(struct file *file, int mask) return 0; } +static inline void ima_post_create_tmpfile(struct inode *inode) +{ +} + static inline void ima_file_free(struct file *file) { return; diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c index 4ffac4f5c647..357edd140c09 100644 --- a/security/integrity/ima/ima_main.c +++ b/security/integrity/ima/ima_main.c @@ -396,6 +396,33 @@ int ima_file_check(struct file *file, int mask) } EXPORT_SYMBOL_GPL(ima_file_check); +/** + * ima_post_create_tmpfile - mark newly created tmpfile as new + * @file : newly created tmpfile + * + * No measuring, appraising or auditing of newly created tmpfiles is needed. + * Skip calling process_measurement(), but indicate which newly, created + * tmpfiles are in policy. + */ +void ima_post_create_tmpfile(struct inode *inode) +{ + struct integrity_iint_cache *iint; + int must_appraise; + + must_appraise = ima_must_appraise(inode, MAY_ACCESS, FILE_CHECK); + if (!must_appraise) + return; + + /* Nothing to do if we can't allocate memory */ + iint = integrity_inode_get(inode); + if (!iint) + return; + + /* needed for writing the security xattrs */ + set_bit(IMA_UPDATE_XATTR, &iint->atomic_flags); + iint->ima_file_status = INTEGRITY_PASS; +} + /** * ima_post_path_mknod - mark as a new inode * @dentry: newly created dentry @@ -413,9 +440,13 @@ void ima_post_path_mknod(struct dentry *dentry) if (!must_appraise) return; + /* Nothing to do if we can't allocate memory */ iint = integrity_inode_get(inode); - if (iint) - iint->flags |= IMA_NEW_FILE; + if (!iint) + return; + + /* needed for re-opening empty files */ + iint->flags |= IMA_NEW_FILE; } /** -- cgit v1.2.3 From 5468e82f7034f0ae175a3ce075441356099bdaa3 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Mon, 4 Feb 2019 11:26:18 +0100 Subject: net: phy: fixed-phy: Drop GPIO from fixed_phy_add() All users of the fixed_phy_add() pass -1 as GPIO number to the fixed phy driver, and all users of fixed_phy_register() pass -1 as GPIO number as well, except for the device tree MDIO bus. Any new users should create a proper device and pass the GPIO as a descriptor associated with the device so delete the GPIO argument from the calls and drop the code looking requesting a GPIO in fixed_phy_add(). In fixed phy_register(), investigate the "fixed-link" node and pick the GPIO descriptor from "link-gpios" if this property exists. Move the corresponding code out of of_mdio.c as the fixed phy code anyways requires OF to be in use. Tested-by: Andrew Lunn Signed-off-by: Linus Walleij Signed-off-by: David S. Miller --- .../networking/device_drivers/stmicro/stmmac.txt | 2 +- arch/m68k/coldfire/m5272.c | 2 +- arch/mips/ar7/platform.c | 4 +- arch/mips/bcm47xx/setup.c | 2 +- drivers/net/dsa/dsa_loop.c | 2 +- drivers/net/ethernet/broadcom/bgmac.c | 2 +- drivers/net/ethernet/broadcom/genet/bcmmii.c | 2 +- drivers/net/phy/fixed_phy.c | 82 ++++++++++++++++------ drivers/net/usb/lan78xx.c | 3 +- drivers/of/of_mdio.c | 9 +-- include/linux/phy_fixed.h | 8 +-- 11 files changed, 72 insertions(+), 46 deletions(-) (limited to 'include/linux') diff --git a/Documentation/networking/device_drivers/stmicro/stmmac.txt b/Documentation/networking/device_drivers/stmicro/stmmac.txt index 2bb07078f535..1ae979fd90d2 100644 --- a/Documentation/networking/device_drivers/stmicro/stmmac.txt +++ b/Documentation/networking/device_drivers/stmicro/stmmac.txt @@ -267,7 +267,7 @@ static struct fixed_phy_status stmmac0_fixed_phy_status = { During the board's device_init we can configure the first MAC for fixed_link by calling: - fixed_phy_add(PHY_POLL, 1, &stmmac0_fixed_phy_status, -1); + fixed_phy_add(PHY_POLL, 1, &stmmac0_fixed_phy_status); and the second one, with a real PHY device attached to the bus, by using the stmmac_mdio_bus_data structure (to provide the id, the reset procedure etc). diff --git a/arch/m68k/coldfire/m5272.c b/arch/m68k/coldfire/m5272.c index ad1185c68df7..6b3ab583c698 100644 --- a/arch/m68k/coldfire/m5272.c +++ b/arch/m68k/coldfire/m5272.c @@ -127,7 +127,7 @@ static struct fixed_phy_status nettel_fixed_phy_status __initdata = { static int __init init_BSP(void) { m5272_uarts_init(); - fixed_phy_add(PHY_POLL, 0, &nettel_fixed_phy_status, -1); + fixed_phy_add(PHY_POLL, 0, &nettel_fixed_phy_status); return 0; } diff --git a/arch/mips/ar7/platform.c b/arch/mips/ar7/platform.c index f09262e0a72f..10ff07b7721e 100644 --- a/arch/mips/ar7/platform.c +++ b/arch/mips/ar7/platform.c @@ -683,7 +683,7 @@ static int __init ar7_register_devices(void) if (ar7_has_high_cpmac()) { res = fixed_phy_add(PHY_POLL, cpmac_high.id, - &fixed_phy_status, -1); + &fixed_phy_status); if (!res) { cpmac_get_mac(1, cpmac_high_data.dev_addr); @@ -696,7 +696,7 @@ static int __init ar7_register_devices(void) } else cpmac_low_data.phy_mask = 0xffffffff; - res = fixed_phy_add(PHY_POLL, cpmac_low.id, &fixed_phy_status, -1); + res = fixed_phy_add(PHY_POLL, cpmac_low.id, &fixed_phy_status); if (!res) { cpmac_get_mac(0, cpmac_low_data.dev_addr); res = platform_device_register(&cpmac_low); diff --git a/arch/mips/bcm47xx/setup.c b/arch/mips/bcm47xx/setup.c index fe3773539eff..82627c264964 100644 --- a/arch/mips/bcm47xx/setup.c +++ b/arch/mips/bcm47xx/setup.c @@ -274,7 +274,7 @@ static int __init bcm47xx_register_bus_complete(void) bcm47xx_leds_register(); bcm47xx_workarounds(); - fixed_phy_add(PHY_POLL, 0, &bcm47xx_fixed_phy_status, -1); + fixed_phy_add(PHY_POLL, 0, &bcm47xx_fixed_phy_status); return 0; } device_initcall(bcm47xx_register_bus_complete); diff --git a/drivers/net/dsa/dsa_loop.c b/drivers/net/dsa/dsa_loop.c index 816f34d64736..17482ae09aa5 100644 --- a/drivers/net/dsa/dsa_loop.c +++ b/drivers/net/dsa/dsa_loop.c @@ -343,7 +343,7 @@ static int __init dsa_loop_init(void) unsigned int i; for (i = 0; i < NUM_FIXED_PHYS; i++) - phydevs[i] = fixed_phy_register(PHY_POLL, &status, -1, NULL); + phydevs[i] = fixed_phy_register(PHY_POLL, &status, NULL); return mdio_driver_register(&dsa_loop_drv); } diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c index 2d3a44c40221..4632dd5dbad1 100644 --- a/drivers/net/ethernet/broadcom/bgmac.c +++ b/drivers/net/ethernet/broadcom/bgmac.c @@ -1446,7 +1446,7 @@ int bgmac_phy_connect_direct(struct bgmac *bgmac) struct phy_device *phy_dev; int err; - phy_dev = fixed_phy_register(PHY_POLL, &fphy_status, -1, NULL); + phy_dev = fixed_phy_register(PHY_POLL, &fphy_status, NULL); if (!phy_dev || IS_ERR(phy_dev)) { dev_err(bgmac->dev, "Failed to register fixed PHY device\n"); return -ENODEV; diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c index aceb9b7b55bd..51880d83131a 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmmii.c +++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c @@ -525,7 +525,7 @@ static int bcmgenet_mii_pd_init(struct bcmgenet_priv *priv) .asym_pause = 0, }; - phydev = fixed_phy_register(PHY_POLL, &fphy_status, -1, NULL); + phydev = fixed_phy_register(PHY_POLL, &fphy_status, NULL); if (!phydev || IS_ERR(phydev)) { dev_err(kdev, "failed to register fixed PHY device\n"); return -ENODEV; diff --git a/drivers/net/phy/fixed_phy.c b/drivers/net/phy/fixed_phy.c index 47a8cb574c45..f136a23c1a35 100644 --- a/drivers/net/phy/fixed_phy.c +++ b/drivers/net/phy/fixed_phy.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include #include @@ -38,7 +38,7 @@ struct fixed_phy { bool no_carrier; int (*link_update)(struct net_device *, struct fixed_phy_status *); struct list_head node; - int link_gpio; + struct gpio_desc *link_gpiod; }; static struct platform_device *pdev; @@ -67,8 +67,8 @@ EXPORT_SYMBOL_GPL(fixed_phy_change_carrier); static void fixed_phy_update(struct fixed_phy *fp) { - if (!fp->no_carrier && gpio_is_valid(fp->link_gpio)) - fp->status.link = !!gpio_get_value_cansleep(fp->link_gpio); + if (!fp->no_carrier && fp->link_gpiod) + fp->status.link = !!gpiod_get_value_cansleep(fp->link_gpiod); } static int fixed_mdio_read(struct mii_bus *bus, int phy_addr, int reg_num) @@ -133,9 +133,9 @@ int fixed_phy_set_link_update(struct phy_device *phydev, } EXPORT_SYMBOL_GPL(fixed_phy_set_link_update); -int fixed_phy_add(unsigned int irq, int phy_addr, - struct fixed_phy_status *status, - int link_gpio) +static int fixed_phy_add_gpiod(unsigned int irq, int phy_addr, + struct fixed_phy_status *status, + struct gpio_desc *gpiod) { int ret; struct fixed_mdio_bus *fmb = &platform_fmb; @@ -156,24 +156,19 @@ int fixed_phy_add(unsigned int irq, int phy_addr, fp->addr = phy_addr; fp->status = *status; - fp->link_gpio = link_gpio; - - if (gpio_is_valid(fp->link_gpio)) { - ret = gpio_request_one(fp->link_gpio, GPIOF_DIR_IN, - "fixed-link-gpio-link"); - if (ret) - goto err_regs; - } + fp->link_gpiod = gpiod; fixed_phy_update(fp); list_add_tail(&fp->node, &fmb->phys); return 0; +} -err_regs: - kfree(fp); - return ret; +int fixed_phy_add(unsigned int irq, int phy_addr, + struct fixed_phy_status *status) { + + return fixed_phy_add_gpiod(irq, phy_addr, status, NULL); } EXPORT_SYMBOL_GPL(fixed_phy_add); @@ -187,8 +182,8 @@ static void fixed_phy_del(int phy_addr) list_for_each_entry_safe(fp, tmp, &fmb->phys, node) { if (fp->addr == phy_addr) { list_del(&fp->node); - if (gpio_is_valid(fp->link_gpio)) - gpio_free(fp->link_gpio); + if (fp->link_gpiod) + gpiod_put(fp->link_gpiod); kfree(fp); ida_simple_remove(&phy_fixed_ida, phy_addr); return; @@ -196,12 +191,50 @@ static void fixed_phy_del(int phy_addr) } } +#ifdef CONFIG_OF_GPIO +static struct gpio_desc *fixed_phy_get_gpiod(struct device_node *np) +{ + struct device_node *fixed_link_node; + struct gpio_desc *gpiod; + + if (!np) + return NULL; + + fixed_link_node = of_get_child_by_name(np, "fixed-link"); + if (!fixed_link_node) + return NULL; + + /* + * As the fixed link is just a device tree node without any + * Linux device associated with it, we simply have obtain + * the GPIO descriptor from the device tree like this. + */ + gpiod = gpiod_get_from_of_node(fixed_link_node, "link-gpios", 0, + GPIOD_IN, "mdio"); + of_node_put(fixed_link_node); + if (IS_ERR(gpiod)) { + if (PTR_ERR(gpiod) == -EPROBE_DEFER) + return gpiod; + pr_err("error getting GPIO for fixed link %pOF, proceed without\n", + fixed_link_node); + gpiod = NULL; + } + + return gpiod; +} +#else +static struct gpio_desc *fixed_phy_get_gpiod(struct device_node *np) +{ + return NULL; +} +#endif + struct phy_device *fixed_phy_register(unsigned int irq, struct fixed_phy_status *status, - int link_gpio, struct device_node *np) { struct fixed_mdio_bus *fmb = &platform_fmb; + struct gpio_desc *gpiod = NULL; struct phy_device *phy; int phy_addr; int ret; @@ -209,12 +242,17 @@ struct phy_device *fixed_phy_register(unsigned int irq, if (!fmb->mii_bus || fmb->mii_bus->state != MDIOBUS_REGISTERED) return ERR_PTR(-EPROBE_DEFER); + /* Check if we have a GPIO associated with this fixed phy */ + gpiod = fixed_phy_get_gpiod(np); + if (IS_ERR(gpiod)) + return ERR_CAST(gpiod); + /* Get the next available PHY address, up to PHY_MAX_ADDR */ phy_addr = ida_simple_get(&phy_fixed_ida, 0, PHY_MAX_ADDR, GFP_KERNEL); if (phy_addr < 0) return ERR_PTR(phy_addr); - ret = fixed_phy_add(irq, phy_addr, status, link_gpio); + ret = fixed_phy_add_gpiod(irq, phy_addr, status, gpiod); if (ret < 0) { ida_simple_remove(&phy_fixed_ida, phy_addr); return ERR_PTR(ret); diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index e96bc0c6140f..3d92ea6fcc02 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -2051,8 +2051,7 @@ static struct phy_device *lan7801_phy_init(struct lan78xx_net *dev) phydev = phy_find_first(dev->mdiobus); if (!phydev) { netdev_dbg(dev->net, "PHY Not Found!! Registering Fixed PHY\n"); - phydev = fixed_phy_register(PHY_POLL, &fphy_status, -1, - NULL); + phydev = fixed_phy_register(PHY_POLL, &fphy_status, NULL); if (IS_ERR(phydev)) { netdev_err(dev->net, "No PHY/fixed_PHY found\n"); return NULL; diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c index 5ad1342f5682..de6157357e26 100644 --- a/drivers/of/of_mdio.c +++ b/drivers/of/of_mdio.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include @@ -463,7 +462,6 @@ int of_phy_register_fixed_link(struct device_node *np) struct device_node *fixed_link_node; u32 fixed_link_prop[5]; const char *managed; - int link_gpio = -1; if (of_property_read_string(np, "managed", &managed) == 0 && strcmp(managed, "in-band-status") == 0) { @@ -485,11 +483,7 @@ int of_phy_register_fixed_link(struct device_node *np) status.pause = of_property_read_bool(fixed_link_node, "pause"); status.asym_pause = of_property_read_bool(fixed_link_node, "asym-pause"); - link_gpio = of_get_named_gpio_flags(fixed_link_node, - "link-gpios", 0, NULL); of_node_put(fixed_link_node); - if (link_gpio == -EPROBE_DEFER) - return -EPROBE_DEFER; goto register_phy; } @@ -508,8 +502,7 @@ int of_phy_register_fixed_link(struct device_node *np) return -ENODEV; register_phy: - return PTR_ERR_OR_ZERO(fixed_phy_register(PHY_POLL, &status, link_gpio, - np)); + return PTR_ERR_OR_ZERO(fixed_phy_register(PHY_POLL, &status, np)); } EXPORT_SYMBOL(of_phy_register_fixed_link); diff --git a/include/linux/phy_fixed.h b/include/linux/phy_fixed.h index 9525567b1951..c78fc203db43 100644 --- a/include/linux/phy_fixed.h +++ b/include/linux/phy_fixed.h @@ -15,11 +15,9 @@ struct device_node; #if IS_ENABLED(CONFIG_FIXED_PHY) extern int fixed_phy_change_carrier(struct net_device *dev, bool new_carrier); extern int fixed_phy_add(unsigned int irq, int phy_id, - struct fixed_phy_status *status, - int link_gpio); + struct fixed_phy_status *status); extern struct phy_device *fixed_phy_register(unsigned int irq, struct fixed_phy_status *status, - int link_gpio, struct device_node *np); extern void fixed_phy_unregister(struct phy_device *phydev); extern int fixed_phy_set_link_update(struct phy_device *phydev, @@ -27,14 +25,12 @@ extern int fixed_phy_set_link_update(struct phy_device *phydev, struct fixed_phy_status *)); #else static inline int fixed_phy_add(unsigned int irq, int phy_id, - struct fixed_phy_status *status, - int link_gpio) + struct fixed_phy_status *status) { return -ENODEV; } static inline struct phy_device *fixed_phy_register(unsigned int irq, struct fixed_phy_status *status, - int gpio_link, struct device_node *np) { return ERR_PTR(-ENODEV); -- cgit v1.2.3 From 809ab9371ca0a96b44d9866ad82849410759a45b Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Sat, 26 Jan 2019 00:52:26 -0500 Subject: XArray: Update xa_erase family descriptions xa_erase does not allocate memory and doesn't have a gfp parameter. Update the descriptions of all four variants to be more useful. Signed-off-by: Matthew Wilcox --- include/linux/xarray.h | 12 ++++++------ lib/xarray.c | 17 ++++++++--------- 2 files changed, 14 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/xarray.h b/include/linux/xarray.h index 5d9d318bcf7a..e11841537631 100644 --- a/include/linux/xarray.h +++ b/include/linux/xarray.h @@ -526,9 +526,9 @@ static inline void *xa_store_irq(struct xarray *xa, unsigned long index, * @xa: XArray. * @index: Index of entry. * - * This function is the equivalent of calling xa_store() with %NULL as - * the third argument. The XArray does not need to allocate memory, so - * the user does not need to provide GFP flags. + * After this function returns, loading from @index will return %NULL. + * If the index is part of a multi-index entry, all indices will be erased + * and none of the entries will be part of a multi-index entry. * * Context: Any context. Takes and releases the xa_lock while * disabling softirqs. @@ -550,9 +550,9 @@ static inline void *xa_erase_bh(struct xarray *xa, unsigned long index) * @xa: XArray. * @index: Index of entry. * - * This function is the equivalent of calling xa_store() with %NULL as - * the third argument. The XArray does not need to allocate memory, so - * the user does not need to provide GFP flags. + * After this function returns, loading from @index will return %NULL. + * If the index is part of a multi-index entry, all indices will be erased + * and none of the entries will be part of a multi-index entry. * * Context: Process context. Takes and releases the xa_lock while * disabling interrupts. diff --git a/lib/xarray.c b/lib/xarray.c index 81c3171ddde9..fb783bf2a441 100644 --- a/lib/xarray.c +++ b/lib/xarray.c @@ -1294,13 +1294,12 @@ static void *xas_result(struct xa_state *xas, void *curr) * @xa: XArray. * @index: Index into array. * - * If the entry at this index is a multi-index entry then all indices will - * be erased, and the entry will no longer be a multi-index entry. - * This function expects the xa_lock to be held on entry. + * After this function returns, loading from @index will return %NULL. + * If the index is part of a multi-index entry, all indices will be erased + * and none of the entries will be part of a multi-index entry. * - * Context: Any context. Expects xa_lock to be held on entry. May - * release and reacquire xa_lock if @gfp flags permit. - * Return: The old entry at this index. + * Context: Any context. Expects xa_lock to be held on entry. + * Return: The entry which used to be at this index. */ void *__xa_erase(struct xarray *xa, unsigned long index) { @@ -1314,9 +1313,9 @@ EXPORT_SYMBOL(__xa_erase); * @xa: XArray. * @index: Index of entry. * - * This function is the equivalent of calling xa_store() with %NULL as - * the third argument. The XArray does not need to allocate memory, so - * the user does not need to provide GFP flags. + * After this function returns, loading from @index will return %NULL. + * If the index is part of a multi-index entry, all indices will be erased + * and none of the entries will be part of a multi-index entry. * * Context: Any context. Takes and releases the xa_lock. * Return: The entry which used to be at this index. -- cgit v1.2.3 From fe6f42cf6eb3183ebd6ab6b0b7dcbee2600c2baa Mon Sep 17 00:00:00 2001 From: Nava kishore Manne Date: Wed, 6 Feb 2019 16:37:19 +0530 Subject: firmware: xilinx: Add zynqmp_pm_get_chipid() API This patch adds a new API to provide access to the hardware related data like soc revision, IDCODE... etc. Signed-off-by: Nava kishore Manne Signed-off-by: Michal Simek --- drivers/firmware/xilinx/zynqmp.c | 24 ++++++++++++++++++++++++ include/linux/firmware/xlnx-zynqmp.h | 2 ++ 2 files changed, 26 insertions(+) (limited to 'include/linux') diff --git a/drivers/firmware/xilinx/zynqmp.c b/drivers/firmware/xilinx/zynqmp.c index 70b50377ae5f..16a23bc4c2c3 100644 --- a/drivers/firmware/xilinx/zynqmp.c +++ b/drivers/firmware/xilinx/zynqmp.c @@ -186,6 +186,29 @@ static int zynqmp_pm_get_api_version(u32 *version) return ret; } +/** + * zynqmp_pm_get_chipid - Get silicon ID registers + * @idcode: IDCODE register + * @version: version register + * + * Return: Returns the status of the operation and the idcode and version + * registers in @idcode and @version. + */ +static int zynqmp_pm_get_chipid(u32 *idcode, u32 *version) +{ + u32 ret_payload[PAYLOAD_ARG_CNT]; + int ret; + + if (!idcode || !version) + return -EINVAL; + + ret = zynqmp_pm_invoke_fn(PM_GET_CHIPID, 0, 0, 0, 0, ret_payload); + *idcode = ret_payload[1]; + *version = ret_payload[2]; + + return ret; +} + /** * zynqmp_pm_get_trustzone_version() - Get secure trustzone firmware version * @version: Returned version value @@ -509,6 +532,7 @@ static int zynqmp_pm_reset_get_status(const enum zynqmp_pm_reset reset, static const struct zynqmp_eemi_ops eemi_ops = { .get_api_version = zynqmp_pm_get_api_version, + .get_chipid = zynqmp_pm_get_chipid, .query_data = zynqmp_pm_query_data, .clock_enable = zynqmp_pm_clock_enable, .clock_disable = zynqmp_pm_clock_disable, diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h index 07c587a0b06e..5a1f19848100 100644 --- a/include/linux/firmware/xlnx-zynqmp.h +++ b/include/linux/firmware/xlnx-zynqmp.h @@ -36,6 +36,7 @@ enum pm_api_id { PM_GET_API_VERSION = 1, PM_RESET_ASSERT = 17, PM_RESET_GET_STATUS, + PM_GET_CHIPID = 24, PM_IOCTL = 34, PM_QUERY_DATA, PM_CLOCK_ENABLE, @@ -224,6 +225,7 @@ struct zynqmp_pm_query_data { struct zynqmp_eemi_ops { int (*get_api_version)(u32 *version); + int (*get_chipid)(u32 *idcode, u32 *version); int (*query_data)(struct zynqmp_pm_query_data qdata, u32 *out); int (*clock_enable)(u32 clock_id); int (*clock_disable)(u32 clock_id); -- cgit v1.2.3 From 2292822e1576c89191a65c3d0da584d75d3c033f Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Sat, 19 Jan 2019 13:16:53 +0100 Subject: i2c: algo-bit: include main i2c header We are using symbols from it, so we should include it directly. Found after sorting includes in a driver. Signed-off-by: Wolfram Sang Reviewed-by: Simon Horman Signed-off-by: Wolfram Sang --- include/linux/i2c-algo-bit.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/i2c-algo-bit.h b/include/linux/i2c-algo-bit.h index 63904ba6887e..d64cebc6e65a 100644 --- a/include/linux/i2c-algo-bit.h +++ b/include/linux/i2c-algo-bit.h @@ -25,6 +25,8 @@ #ifndef _LINUX_I2C_ALGO_BIT_H #define _LINUX_I2C_ALGO_BIT_H +#include + /* --- Defines for bit-adapters --------------------------------------- */ /* * This struct contains the hw-dependent functions of bit-style adapters to -- cgit v1.2.3 From 738ac0679b969776a638daf2cfb5011049d467da Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Sat, 19 Jan 2019 13:16:54 +0100 Subject: i2c: algo-bit: convert to SPDX header And use kernel style for the remaining comments in the header. Signed-off-by: Wolfram Sang Reviewed-by: Simon Horman Signed-off-by: Wolfram Sang --- drivers/i2c/algos/i2c-algo-bit.c | 25 ++++++++----------------- include/linux/i2c-algo-bit.h | 31 ++++++++----------------------- 2 files changed, 16 insertions(+), 40 deletions(-) (limited to 'include/linux') diff --git a/drivers/i2c/algos/i2c-algo-bit.c b/drivers/i2c/algos/i2c-algo-bit.c index c33dcfb87993..5e5990a83da5 100644 --- a/drivers/i2c/algos/i2c-algo-bit.c +++ b/drivers/i2c/algos/i2c-algo-bit.c @@ -1,21 +1,12 @@ -/* ------------------------------------------------------------------------- - * i2c-algo-bit.c i2c driver algorithms for bit-shift adapters - * ------------------------------------------------------------------------- +// SPDX-License-Identifier: GPL-2.0+ +/* + * i2c-algo-bit.c: i2c driver algorithms for bit-shift adapters + * * Copyright (C) 1995-2000 Simon G. Vogl - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - * ------------------------------------------------------------------------- */ - -/* With some changes from Frodo Looijaard , Kyösti Mälkki - and Jean Delvare */ + * + * With some changes from Frodo Looijaard , Kyösti Mälkki + * and Jean Delvare + */ #include #include diff --git a/include/linux/i2c-algo-bit.h b/include/linux/i2c-algo-bit.h index d64cebc6e65a..69045df78e2d 100644 --- a/include/linux/i2c-algo-bit.h +++ b/include/linux/i2c-algo-bit.h @@ -1,26 +1,11 @@ -/* ------------------------------------------------------------------------- */ -/* i2c-algo-bit.h i2c driver algorithms for bit-shift adapters */ -/* ------------------------------------------------------------------------- */ -/* Copyright (C) 1995-99 Simon G. Vogl - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, - MA 02110-1301 USA. */ -/* ------------------------------------------------------------------------- */ - -/* With some changes from Kyösti Mälkki and even - Frodo Looijaard */ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * i2c-algo-bit.h: i2c driver algorithms for bit-shift adapters + * + * Copyright (C) 1995-99 Simon G. Vogl + * With some changes from Kyösti Mälkki and even + * Frodo Looijaard + */ #ifndef _LINUX_I2C_ALGO_BIT_H #define _LINUX_I2C_ALGO_BIT_H -- cgit v1.2.3 From b525903c254dab2491410f0f23707691b7c2c317 Mon Sep 17 00:00:00 2001 From: Julien Thierry Date: Thu, 31 Jan 2019 14:53:58 +0000 Subject: genirq: Provide basic NMI management for interrupt lines Add functionality to allocate interrupt lines that will deliver IRQs as Non-Maskable Interrupts. These allocations are only successful if the irqchip provides the necessary support and allows NMI delivery for the interrupt line. Interrupt lines allocated for NMI delivery must be enabled/disabled through enable_nmi/disable_nmi_nosync to keep their state consistent. To treat a PERCPU IRQ as NMI, the interrupt must not be shared nor threaded, the irqchip directly managing the IRQ must be the root irqchip and the irqchip cannot be behind a slow bus. Signed-off-by: Julien Thierry Reviewed-by: Marc Zyngier Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Marc Zyngier Signed-off-by: Marc Zyngier --- include/linux/interrupt.h | 9 ++ include/linux/irq.h | 7 ++ kernel/irq/debugfs.c | 6 +- kernel/irq/internals.h | 2 + kernel/irq/manage.c | 228 +++++++++++++++++++++++++++++++++++++++++++++- 5 files changed, 249 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index c672f34235e7..9941d1a8d83c 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -156,6 +156,10 @@ __request_percpu_irq(unsigned int irq, irq_handler_t handler, unsigned long flags, const char *devname, void __percpu *percpu_dev_id); +extern int __must_check +request_nmi(unsigned int irq, irq_handler_t handler, unsigned long flags, + const char *name, void *dev); + static inline int __must_check request_percpu_irq(unsigned int irq, irq_handler_t handler, const char *devname, void __percpu *percpu_dev_id) @@ -167,6 +171,8 @@ request_percpu_irq(unsigned int irq, irq_handler_t handler, extern const void *free_irq(unsigned int, void *); extern void free_percpu_irq(unsigned int, void __percpu *); +extern const void *free_nmi(unsigned int irq, void *dev_id); + struct device; extern int __must_check @@ -217,6 +223,9 @@ extern void enable_percpu_irq(unsigned int irq, unsigned int type); extern bool irq_percpu_is_enabled(unsigned int irq); extern void irq_wake_thread(unsigned int irq, void *dev_id); +extern void disable_nmi_nosync(unsigned int irq); +extern void enable_nmi(unsigned int irq); + /* The following three functions are for the core kernel use only. */ extern void suspend_device_irqs(void); extern void resume_device_irqs(void); diff --git a/include/linux/irq.h b/include/linux/irq.h index def2b2aac8b1..a7298e4998c8 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -442,6 +442,8 @@ static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d) * @irq_set_vcpu_affinity: optional to target a vCPU in a virtual machine * @ipi_send_single: send a single IPI to destination cpus * @ipi_send_mask: send an IPI to destination cpus in cpumask + * @irq_nmi_setup: function called from core code before enabling an NMI + * @irq_nmi_teardown: function called from core code after disabling an NMI * @flags: chip specific flags */ struct irq_chip { @@ -490,6 +492,9 @@ struct irq_chip { void (*ipi_send_single)(struct irq_data *data, unsigned int cpu); void (*ipi_send_mask)(struct irq_data *data, const struct cpumask *dest); + int (*irq_nmi_setup)(struct irq_data *data); + void (*irq_nmi_teardown)(struct irq_data *data); + unsigned long flags; }; @@ -505,6 +510,7 @@ struct irq_chip { * IRQCHIP_ONESHOT_SAFE: One shot does not require mask/unmask * IRQCHIP_EOI_THREADED: Chip requires eoi() on unmask in threaded mode * IRQCHIP_SUPPORTS_LEVEL_MSI Chip can provide two doorbells for Level MSIs + * IRQCHIP_SUPPORTS_NMI: Chip can deliver NMIs, only for root irqchips */ enum { IRQCHIP_SET_TYPE_MASKED = (1 << 0), @@ -515,6 +521,7 @@ enum { IRQCHIP_ONESHOT_SAFE = (1 << 5), IRQCHIP_EOI_THREADED = (1 << 6), IRQCHIP_SUPPORTS_LEVEL_MSI = (1 << 7), + IRQCHIP_SUPPORTS_NMI = (1 << 8), }; #include diff --git a/kernel/irq/debugfs.c b/kernel/irq/debugfs.c index 6f636136cccc..59a04d2a66df 100644 --- a/kernel/irq/debugfs.c +++ b/kernel/irq/debugfs.c @@ -56,6 +56,7 @@ static const struct irq_bit_descr irqchip_flags[] = { BIT_MASK_DESCR(IRQCHIP_ONESHOT_SAFE), BIT_MASK_DESCR(IRQCHIP_EOI_THREADED), BIT_MASK_DESCR(IRQCHIP_SUPPORTS_LEVEL_MSI), + BIT_MASK_DESCR(IRQCHIP_SUPPORTS_NMI), }; static void @@ -140,6 +141,7 @@ static const struct irq_bit_descr irqdesc_istates[] = { BIT_MASK_DESCR(IRQS_WAITING), BIT_MASK_DESCR(IRQS_PENDING), BIT_MASK_DESCR(IRQS_SUSPENDED), + BIT_MASK_DESCR(IRQS_NMI), }; @@ -203,8 +205,8 @@ static ssize_t irq_debug_write(struct file *file, const char __user *user_buf, chip_bus_lock(desc); raw_spin_lock_irqsave(&desc->lock, flags); - if (irq_settings_is_level(desc)) { - /* Can't do level, sorry */ + if (irq_settings_is_level(desc) || desc->istate & IRQS_NMI) { + /* Can't do level nor NMIs, sorry */ err = -EINVAL; } else { desc->istate |= IRQS_PENDING; diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index ca6afa267070..2a77cdd27ca9 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h @@ -49,6 +49,7 @@ enum { * IRQS_WAITING - irq is waiting * IRQS_PENDING - irq is pending and replayed later * IRQS_SUSPENDED - irq is suspended + * IRQS_NMI - irq line is used to deliver NMIs */ enum { IRQS_AUTODETECT = 0x00000001, @@ -60,6 +61,7 @@ enum { IRQS_PENDING = 0x00000200, IRQS_SUSPENDED = 0x00000800, IRQS_TIMINGS = 0x00001000, + IRQS_NMI = 0x00002000, }; #include "debug.h" diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index a4888ce4667a..9472ae987946 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -341,7 +341,7 @@ irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify) /* The release function is promised process context */ might_sleep(); - if (!desc) + if (!desc || desc->istate & IRQS_NMI) return -EINVAL; /* Complete initialisation of *notify */ @@ -550,6 +550,21 @@ bool disable_hardirq(unsigned int irq) } EXPORT_SYMBOL_GPL(disable_hardirq); +/** + * disable_nmi_nosync - disable an nmi without waiting + * @irq: Interrupt to disable + * + * Disable the selected interrupt line. Disables and enables are + * nested. + * The interrupt to disable must have been requested through request_nmi. + * Unlike disable_nmi(), this function does not ensure existing + * instances of the IRQ handler have completed before returning. + */ +void disable_nmi_nosync(unsigned int irq) +{ + disable_irq_nosync(irq); +} + void __enable_irq(struct irq_desc *desc) { switch (desc->depth) { @@ -606,6 +621,20 @@ out: } EXPORT_SYMBOL(enable_irq); +/** + * enable_nmi - enable handling of an nmi + * @irq: Interrupt to enable + * + * The interrupt to enable must have been requested through request_nmi. + * Undoes the effect of one call to disable_nmi(). If this + * matches the last disable, processing of interrupts on this + * IRQ line is re-enabled. + */ +void enable_nmi(unsigned int irq) +{ + enable_irq(irq); +} + static int set_irq_wake_real(unsigned int irq, unsigned int on) { struct irq_desc *desc = irq_to_desc(irq); @@ -641,6 +670,12 @@ int irq_set_irq_wake(unsigned int irq, unsigned int on) if (!desc) return -EINVAL; + /* Don't use NMIs as wake up interrupts please */ + if (desc->istate & IRQS_NMI) { + ret = -EINVAL; + goto out_unlock; + } + /* wakeup-capable irqs can be shared between drivers that * don't need to have the same sleep mode behaviors. */ @@ -663,6 +698,8 @@ int irq_set_irq_wake(unsigned int irq, unsigned int on) irqd_clear(&desc->irq_data, IRQD_WAKEUP_STATE); } } + +out_unlock: irq_put_desc_busunlock(desc, flags); return ret; } @@ -1125,6 +1162,39 @@ static void irq_release_resources(struct irq_desc *desc) c->irq_release_resources(d); } +static bool irq_supports_nmi(struct irq_desc *desc) +{ + struct irq_data *d = irq_desc_get_irq_data(desc); + +#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY + /* Only IRQs directly managed by the root irqchip can be set as NMI */ + if (d->parent_data) + return false; +#endif + /* Don't support NMIs for chips behind a slow bus */ + if (d->chip->irq_bus_lock || d->chip->irq_bus_sync_unlock) + return false; + + return d->chip->flags & IRQCHIP_SUPPORTS_NMI; +} + +static int irq_nmi_setup(struct irq_desc *desc) +{ + struct irq_data *d = irq_desc_get_irq_data(desc); + struct irq_chip *c = d->chip; + + return c->irq_nmi_setup ? c->irq_nmi_setup(d) : -EINVAL; +} + +static void irq_nmi_teardown(struct irq_desc *desc) +{ + struct irq_data *d = irq_desc_get_irq_data(desc); + struct irq_chip *c = d->chip; + + if (c->irq_nmi_teardown) + c->irq_nmi_teardown(d); +} + static int setup_irq_thread(struct irqaction *new, unsigned int irq, bool secondary) { @@ -1299,9 +1369,17 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) * fields must have IRQF_SHARED set and the bits which * set the trigger type must match. Also all must * agree on ONESHOT. + * Interrupt lines used for NMIs cannot be shared. */ unsigned int oldtype; + if (desc->istate & IRQS_NMI) { + pr_err("Invalid attempt to share NMI for %s (irq %d) on irqchip %s.\n", + new->name, irq, desc->irq_data.chip->name); + ret = -EINVAL; + goto out_unlock; + } + /* * If nobody did set the configuration before, inherit * the one provided by the requester. @@ -1753,6 +1831,59 @@ const void *free_irq(unsigned int irq, void *dev_id) } EXPORT_SYMBOL(free_irq); +/* This function must be called with desc->lock held */ +static const void *__cleanup_nmi(unsigned int irq, struct irq_desc *desc) +{ + const char *devname = NULL; + + desc->istate &= ~IRQS_NMI; + + if (!WARN_ON(desc->action == NULL)) { + irq_pm_remove_action(desc, desc->action); + devname = desc->action->name; + unregister_handler_proc(irq, desc->action); + + kfree(desc->action); + desc->action = NULL; + } + + irq_settings_clr_disable_unlazy(desc); + irq_shutdown(desc); + + irq_release_resources(desc); + + irq_chip_pm_put(&desc->irq_data); + module_put(desc->owner); + + return devname; +} + +const void *free_nmi(unsigned int irq, void *dev_id) +{ + struct irq_desc *desc = irq_to_desc(irq); + unsigned long flags; + const void *devname; + + if (!desc || WARN_ON(!(desc->istate & IRQS_NMI))) + return NULL; + + if (WARN_ON(irq_settings_is_per_cpu_devid(desc))) + return NULL; + + /* NMI still enabled */ + if (WARN_ON(desc->depth == 0)) + disable_nmi_nosync(irq); + + raw_spin_lock_irqsave(&desc->lock, flags); + + irq_nmi_teardown(desc); + devname = __cleanup_nmi(irq, desc); + + raw_spin_unlock_irqrestore(&desc->lock, flags); + + return devname; +} + /** * request_threaded_irq - allocate an interrupt line * @irq: Interrupt line to allocate @@ -1922,6 +2053,101 @@ int request_any_context_irq(unsigned int irq, irq_handler_t handler, } EXPORT_SYMBOL_GPL(request_any_context_irq); +/** + * request_nmi - allocate an interrupt line for NMI delivery + * @irq: Interrupt line to allocate + * @handler: Function to be called when the IRQ occurs. + * Threaded handler for threaded interrupts. + * @irqflags: Interrupt type flags + * @name: An ascii name for the claiming device + * @dev_id: A cookie passed back to the handler function + * + * This call allocates interrupt resources and enables the + * interrupt line and IRQ handling. It sets up the IRQ line + * to be handled as an NMI. + * + * An interrupt line delivering NMIs cannot be shared and IRQ handling + * cannot be threaded. + * + * Interrupt lines requested for NMI delivering must produce per cpu + * interrupts and have auto enabling setting disabled. + * + * Dev_id must be globally unique. Normally the address of the + * device data structure is used as the cookie. Since the handler + * receives this value it makes sense to use it. + * + * If the interrupt line cannot be used to deliver NMIs, function + * will fail and return a negative value. + */ +int request_nmi(unsigned int irq, irq_handler_t handler, + unsigned long irqflags, const char *name, void *dev_id) +{ + struct irqaction *action; + struct irq_desc *desc; + unsigned long flags; + int retval; + + if (irq == IRQ_NOTCONNECTED) + return -ENOTCONN; + + /* NMI cannot be shared, used for Polling */ + if (irqflags & (IRQF_SHARED | IRQF_COND_SUSPEND | IRQF_IRQPOLL)) + return -EINVAL; + + if (!(irqflags & IRQF_PERCPU)) + return -EINVAL; + + if (!handler) + return -EINVAL; + + desc = irq_to_desc(irq); + + if (!desc || irq_settings_can_autoenable(desc) || + !irq_settings_can_request(desc) || + WARN_ON(irq_settings_is_per_cpu_devid(desc)) || + !irq_supports_nmi(desc)) + return -EINVAL; + + action = kzalloc(sizeof(struct irqaction), GFP_KERNEL); + if (!action) + return -ENOMEM; + + action->handler = handler; + action->flags = irqflags | IRQF_NO_THREAD | IRQF_NOBALANCING; + action->name = name; + action->dev_id = dev_id; + + retval = irq_chip_pm_get(&desc->irq_data); + if (retval < 0) + goto err_out; + + retval = __setup_irq(irq, desc, action); + if (retval) + goto err_irq_setup; + + raw_spin_lock_irqsave(&desc->lock, flags); + + /* Setup NMI state */ + desc->istate |= IRQS_NMI; + retval = irq_nmi_setup(desc); + if (retval) { + __cleanup_nmi(irq, desc); + raw_spin_unlock_irqrestore(&desc->lock, flags); + return -EINVAL; + } + + raw_spin_unlock_irqrestore(&desc->lock, flags); + + return 0; + +err_irq_setup: + irq_chip_pm_put(&desc->irq_data); +err_out: + kfree(action); + + return retval; +} + void enable_percpu_irq(unsigned int irq, unsigned int type) { unsigned int cpu = smp_processor_id(); -- cgit v1.2.3 From 4b078c3f1a26487c39363089ba0d5c6b09f2a89f Mon Sep 17 00:00:00 2001 From: Julien Thierry Date: Thu, 31 Jan 2019 14:53:59 +0000 Subject: genirq: Provide NMI management for percpu_devid interrupts Add support for percpu_devid interrupts treated as NMIs. Percpu_devid NMIs need to be setup/torn down on each CPU they target. The same restrictions as for global NMIs still apply for percpu_devid NMIs. Signed-off-by: Julien Thierry Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Marc Zyngier Signed-off-by: Marc Zyngier --- include/linux/interrupt.h | 9 +++ kernel/irq/manage.c | 177 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 186 insertions(+) (limited to 'include/linux') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 9941d1a8d83c..831ddcdc5597 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -168,10 +168,15 @@ request_percpu_irq(unsigned int irq, irq_handler_t handler, devname, percpu_dev_id); } +extern int __must_check +request_percpu_nmi(unsigned int irq, irq_handler_t handler, + const char *devname, void __percpu *dev); + extern const void *free_irq(unsigned int, void *); extern void free_percpu_irq(unsigned int, void __percpu *); extern const void *free_nmi(unsigned int irq, void *dev_id); +extern void free_percpu_nmi(unsigned int irq, void __percpu *percpu_dev_id); struct device; @@ -224,7 +229,11 @@ extern bool irq_percpu_is_enabled(unsigned int irq); extern void irq_wake_thread(unsigned int irq, void *dev_id); extern void disable_nmi_nosync(unsigned int irq); +extern void disable_percpu_nmi(unsigned int irq); extern void enable_nmi(unsigned int irq); +extern void enable_percpu_nmi(unsigned int irq, unsigned int type); +extern int prepare_percpu_nmi(unsigned int irq); +extern void teardown_percpu_nmi(unsigned int irq); /* The following three functions are for the core kernel use only. */ extern void suspend_device_irqs(void); diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 9472ae987946..0a1ebc004a59 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -2182,6 +2182,11 @@ out: } EXPORT_SYMBOL_GPL(enable_percpu_irq); +void enable_percpu_nmi(unsigned int irq, unsigned int type) +{ + enable_percpu_irq(irq, type); +} + /** * irq_percpu_is_enabled - Check whether the per cpu irq is enabled * @irq: Linux irq number to check for @@ -2221,6 +2226,11 @@ void disable_percpu_irq(unsigned int irq) } EXPORT_SYMBOL_GPL(disable_percpu_irq); +void disable_percpu_nmi(unsigned int irq) +{ + disable_percpu_irq(irq); +} + /* * Internal function to unregister a percpu irqaction. */ @@ -2252,6 +2262,8 @@ static struct irqaction *__free_percpu_irq(unsigned int irq, void __percpu *dev_ /* Found it - now remove it from the list of entries: */ desc->action = NULL; + desc->istate &= ~IRQS_NMI; + raw_spin_unlock_irqrestore(&desc->lock, flags); unregister_handler_proc(irq, action); @@ -2305,6 +2317,19 @@ void free_percpu_irq(unsigned int irq, void __percpu *dev_id) } EXPORT_SYMBOL_GPL(free_percpu_irq); +void free_percpu_nmi(unsigned int irq, void __percpu *dev_id) +{ + struct irq_desc *desc = irq_to_desc(irq); + + if (!desc || !irq_settings_is_per_cpu_devid(desc)) + return; + + if (WARN_ON(!(desc->istate & IRQS_NMI))) + return; + + kfree(__free_percpu_irq(irq, dev_id)); +} + /** * setup_percpu_irq - setup a per-cpu interrupt * @irq: Interrupt line to setup @@ -2394,6 +2419,158 @@ int __request_percpu_irq(unsigned int irq, irq_handler_t handler, } EXPORT_SYMBOL_GPL(__request_percpu_irq); +/** + * request_percpu_nmi - allocate a percpu interrupt line for NMI delivery + * @irq: Interrupt line to allocate + * @handler: Function to be called when the IRQ occurs. + * @name: An ascii name for the claiming device + * @dev_id: A percpu cookie passed back to the handler function + * + * This call allocates interrupt resources for a per CPU NMI. Per CPU NMIs + * have to be setup on each CPU by calling ready_percpu_nmi() before being + * enabled on the same CPU by using enable_percpu_nmi(). + * + * Dev_id must be globally unique. It is a per-cpu variable, and + * the handler gets called with the interrupted CPU's instance of + * that variable. + * + * Interrupt lines requested for NMI delivering should have auto enabling + * setting disabled. + * + * If the interrupt line cannot be used to deliver NMIs, function + * will fail returning a negative value. + */ +int request_percpu_nmi(unsigned int irq, irq_handler_t handler, + const char *name, void __percpu *dev_id) +{ + struct irqaction *action; + struct irq_desc *desc; + unsigned long flags; + int retval; + + if (!handler) + return -EINVAL; + + desc = irq_to_desc(irq); + + if (!desc || !irq_settings_can_request(desc) || + !irq_settings_is_per_cpu_devid(desc) || + irq_settings_can_autoenable(desc) || + !irq_supports_nmi(desc)) + return -EINVAL; + + /* The line cannot already be NMI */ + if (desc->istate & IRQS_NMI) + return -EINVAL; + + action = kzalloc(sizeof(struct irqaction), GFP_KERNEL); + if (!action) + return -ENOMEM; + + action->handler = handler; + action->flags = IRQF_PERCPU | IRQF_NO_SUSPEND | IRQF_NO_THREAD + | IRQF_NOBALANCING; + action->name = name; + action->percpu_dev_id = dev_id; + + retval = irq_chip_pm_get(&desc->irq_data); + if (retval < 0) + goto err_out; + + retval = __setup_irq(irq, desc, action); + if (retval) + goto err_irq_setup; + + raw_spin_lock_irqsave(&desc->lock, flags); + desc->istate |= IRQS_NMI; + raw_spin_unlock_irqrestore(&desc->lock, flags); + + return 0; + +err_irq_setup: + irq_chip_pm_put(&desc->irq_data); +err_out: + kfree(action); + + return retval; +} + +/** + * prepare_percpu_nmi - performs CPU local setup for NMI delivery + * @irq: Interrupt line to prepare for NMI delivery + * + * This call prepares an interrupt line to deliver NMI on the current CPU, + * before that interrupt line gets enabled with enable_percpu_nmi(). + * + * As a CPU local operation, this should be called from non-preemptible + * context. + * + * If the interrupt line cannot be used to deliver NMIs, function + * will fail returning a negative value. + */ +int prepare_percpu_nmi(unsigned int irq) +{ + unsigned long flags; + struct irq_desc *desc; + int ret = 0; + + WARN_ON(preemptible()); + + desc = irq_get_desc_lock(irq, &flags, + IRQ_GET_DESC_CHECK_PERCPU); + if (!desc) + return -EINVAL; + + if (WARN(!(desc->istate & IRQS_NMI), + KERN_ERR "prepare_percpu_nmi called for a non-NMI interrupt: irq %u\n", + irq)) { + ret = -EINVAL; + goto out; + } + + ret = irq_nmi_setup(desc); + if (ret) { + pr_err("Failed to setup NMI delivery: irq %u\n", irq); + goto out; + } + +out: + irq_put_desc_unlock(desc, flags); + return ret; +} + +/** + * teardown_percpu_nmi - undoes NMI setup of IRQ line + * @irq: Interrupt line from which CPU local NMI configuration should be + * removed + * + * This call undoes the setup done by prepare_percpu_nmi(). + * + * IRQ line should not be enabled for the current CPU. + * + * As a CPU local operation, this should be called from non-preemptible + * context. + */ +void teardown_percpu_nmi(unsigned int irq) +{ + unsigned long flags; + struct irq_desc *desc; + + WARN_ON(preemptible()); + + desc = irq_get_desc_lock(irq, &flags, + IRQ_GET_DESC_CHECK_PERCPU); + if (!desc) + return; + + if (WARN_ON(!(desc->istate & IRQS_NMI))) + goto out; + + irq_nmi_teardown(desc); +out: + irq_put_desc_unlock(desc, flags); +} + /** * irq_get_irqchip_state - returns the irqchip state of a interrupt. * @irq: Interrupt line that is forwarded to a VM -- cgit v1.2.3 From 2dcf1fbcad352baaa5f47b17e57c5743c8eedbad Mon Sep 17 00:00:00 2001 From: Julien Thierry Date: Thu, 31 Jan 2019 14:54:00 +0000 Subject: genirq: Provide NMI handlers Provide flow handlers that are NMI safe for interrupts and percpu_devid interrupts. Signed-off-by: Julien Thierry Acked-by: Marc Zyngier Cc: Thomas Gleixner Cc: Marc Zyngier Cc: Peter Zijlstra Signed-off-by: Marc Zyngier --- include/linux/irq.h | 3 +++ kernel/irq/chip.c | 54 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index a7298e4998c8..5e91f6bcaacd 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -601,6 +601,9 @@ extern void handle_percpu_devid_irq(struct irq_desc *desc); extern void handle_bad_irq(struct irq_desc *desc); extern void handle_nested_irq(unsigned int irq); +extern void handle_fasteoi_nmi(struct irq_desc *desc); +extern void handle_percpu_devid_fasteoi_nmi(struct irq_desc *desc); + extern int irq_chip_compose_msi_msg(struct irq_data *data, struct msi_msg *msg); extern int irq_chip_pm_get(struct irq_data *data); extern int irq_chip_pm_put(struct irq_data *data); diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 34e969069488..c32d5f386f68 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -729,6 +729,37 @@ out: } EXPORT_SYMBOL_GPL(handle_fasteoi_irq); +/** + * handle_fasteoi_nmi - irq handler for NMI interrupt lines + * @desc: the interrupt description structure for this irq + * + * A simple NMI-safe handler, considering the restrictions + * from request_nmi. + * + * Only a single callback will be issued to the chip: an ->eoi() + * call when the interrupt has been serviced. This enables support + * for modern forms of interrupt handlers, which handle the flow + * details in hardware, transparently. + */ +void handle_fasteoi_nmi(struct irq_desc *desc) +{ + struct irq_chip *chip = irq_desc_get_chip(desc); + struct irqaction *action = desc->action; + unsigned int irq = irq_desc_get_irq(desc); + irqreturn_t res; + + trace_irq_handler_entry(irq, action); + /* + * NMIs cannot be shared, there is only one action. + */ + res = action->handler(irq, action->dev_id); + trace_irq_handler_exit(irq, action, res); + + if (chip->irq_eoi) + chip->irq_eoi(&desc->irq_data); +} +EXPORT_SYMBOL_GPL(handle_fasteoi_nmi); + /** * handle_edge_irq - edge type IRQ handler * @desc: the interrupt description structure for this irq @@ -908,6 +939,29 @@ void handle_percpu_devid_irq(struct irq_desc *desc) chip->irq_eoi(&desc->irq_data); } +/** + * handle_percpu_devid_fasteoi_nmi - Per CPU local NMI handler with per cpu + * dev ids + * @desc: the interrupt description structure for this irq + * + * Similar to handle_fasteoi_nmi, but handling the dev_id cookie + * as a percpu pointer. + */ +void handle_percpu_devid_fasteoi_nmi(struct irq_desc *desc) +{ + struct irq_chip *chip = irq_desc_get_chip(desc); + struct irqaction *action = desc->action; + unsigned int irq = irq_desc_get_irq(desc); + irqreturn_t res; + + trace_irq_handler_entry(irq, action); + res = action->handler(irq, raw_cpu_ptr(action->percpu_dev_id)); + trace_irq_handler_exit(irq, action, res); + + if (chip->irq_eoi) + chip->irq_eoi(&desc->irq_data); +} + static void __irq_do_set_handler(struct irq_desc *desc, irq_flow_handler_t handle, int is_chained, const char *name) -- cgit v1.2.3 From 6e4933a006616343f66c4702dc4fc56bb25e7b02 Mon Sep 17 00:00:00 2001 From: Julien Thierry Date: Thu, 31 Jan 2019 14:54:01 +0000 Subject: irqdesc: Add domain handler for NMIs NMI handling code should be executed between calls to nmi_enter and nmi_exit. Add a separate domain handler to properly setup NMI context when handling an interrupt requested as NMI. Signed-off-by: Julien Thierry Acked-by: Marc Zyngier Cc: Thomas Gleixner Cc: Marc Zyngier Cc: Will Deacon Cc: Peter Zijlstra Signed-off-by: Marc Zyngier --- include/linux/irqdesc.h | 5 +++++ kernel/irq/irqdesc.c | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index dd1e40ddac7d..ba05b0d6401a 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -171,6 +171,11 @@ static inline int handle_domain_irq(struct irq_domain *domain, { return __handle_domain_irq(domain, hwirq, true, regs); } + +#ifdef CONFIG_IRQ_DOMAIN +int handle_domain_nmi(struct irq_domain *domain, unsigned int hwirq, + struct pt_regs *regs); +#endif #endif /* Test to see if a driver has successfully requested an irq */ diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index ee062b7939d3..a1d7a7d484e0 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -669,6 +669,41 @@ int __handle_domain_irq(struct irq_domain *domain, unsigned int hwirq, set_irq_regs(old_regs); return ret; } + +#ifdef CONFIG_IRQ_DOMAIN +/** + * handle_domain_nmi - Invoke the handler for a HW irq belonging to a domain + * @domain: The domain where to perform the lookup + * @hwirq: The HW irq number to convert to a logical one + * @regs: Register file coming from the low-level handling code + * + * Returns: 0 on success, or -EINVAL if conversion has failed + */ +int handle_domain_nmi(struct irq_domain *domain, unsigned int hwirq, + struct pt_regs *regs) +{ + struct pt_regs *old_regs = set_irq_regs(regs); + unsigned int irq; + int ret = 0; + + nmi_enter(); + + irq = irq_find_mapping(domain, hwirq); + + /* + * ack_bad_irq is not NMI-safe, just report + * an invalid interrupt. + */ + if (likely(irq)) + generic_handle_irq(irq); + else + ret = -EINVAL; + + nmi_exit(); + set_irq_regs(old_regs); + return ret; +} +#endif #endif /* Dynamic interrupt handling */ -- cgit v1.2.3 From 013e6292aaf5e4b083a50a0f9e17e93628616860 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Tue, 20 Nov 2018 11:57:20 +0100 Subject: mtd: rawnand: Simplify the locking nand_get_device() was complex for apparently no good reason. Let's replace this locking scheme with 2 mutexes: one attached to the controller and another one attached to the chip. Every time the core calls nand_get_device(), it will first lock the chip and if the chip is not suspended, will then lock the controller. nand_release_device() will release both lock in the reverse order. nand_get_device() can sleep, just like the previous implementation, which means you should never call that from an atomic context. We also get rid of - the chip->state field, since all it was used for was flagging the chip as suspended. We replace it by a field called chip->suspended and directly set it from nand_suspend/resume() - the controller->wq and controller->active fields which are no longer needed since the new controller->lock (now a mutex) guarantees that all operations are serialized at the controller level - panic_nand_get_device() which would anyway be a no-op. Talking about panic write, I keep thinking the rawnand implementation is unsafe because there's not negotiation with the controller to know when it's actually done with it's previous operation. I don't intend to fix that here, but that's probably something we should look at, or maybe we should consider dropping the ->_panic_write() implementation Last important change to mention: we now return -EBUSY when someone tries to access a device that as been suspended, and propagate this error to the upper layer. Signed-off-by: Boris Brezillon Signed-off-by: Miquel Raynal --- drivers/mtd/nand/raw/nand_base.c | 111 ++++++++++++++++----------------------- include/linux/mtd/rawnand.h | 24 ++++----- 2 files changed, 54 insertions(+), 81 deletions(-) (limited to 'include/linux') diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c index cca4b24d2ffa..96cadead262e 100644 --- a/drivers/mtd/nand/raw/nand_base.c +++ b/drivers/mtd/nand/raw/nand_base.c @@ -278,11 +278,8 @@ EXPORT_SYMBOL_GPL(nand_deselect_target); static void nand_release_device(struct nand_chip *chip) { /* Release the controller and the chip */ - spin_lock(&chip->controller->lock); - chip->controller->active = NULL; - chip->state = FL_READY; - wake_up(&chip->controller->wq); - spin_unlock(&chip->controller->lock); + mutex_unlock(&chip->controller->lock); + mutex_unlock(&chip->lock); } /** @@ -330,58 +327,24 @@ static int nand_isbad_bbm(struct nand_chip *chip, loff_t ofs) return nand_block_bad(chip, ofs); } -/** - * panic_nand_get_device - [GENERIC] Get chip for selected access - * @chip: the nand chip descriptor - * @new_state: the state which is requested - * - * Used when in panic, no locks are taken. - */ -static void panic_nand_get_device(struct nand_chip *chip, int new_state) -{ - /* Hardware controller shared among independent devices */ - chip->controller->active = chip; - chip->state = new_state; -} - /** * nand_get_device - [GENERIC] Get chip for selected access * @chip: NAND chip structure - * @new_state: the state which is requested * - * Get the device and lock it for exclusive access + * Lock the device and its controller for exclusive access + * + * Return: -EBUSY if the chip has been suspended, 0 otherwise */ -static int -nand_get_device(struct nand_chip *chip, int new_state) +static int nand_get_device(struct nand_chip *chip) { - spinlock_t *lock = &chip->controller->lock; - wait_queue_head_t *wq = &chip->controller->wq; - DECLARE_WAITQUEUE(wait, current); -retry: - spin_lock(lock); - - /* Hardware controller shared among independent devices */ - if (!chip->controller->active) - chip->controller->active = chip; - - if (chip->controller->active == chip && chip->state == FL_READY) { - chip->state = new_state; - spin_unlock(lock); - return 0; - } - if (new_state == FL_PM_SUSPENDED) { - if (chip->controller->active->state == FL_PM_SUSPENDED) { - chip->state = FL_PM_SUSPENDED; - spin_unlock(lock); - return 0; - } + mutex_lock(&chip->lock); + if (chip->suspended) { + mutex_unlock(&chip->lock); + return -EBUSY; } - set_current_state(TASK_UNINTERRUPTIBLE); - add_wait_queue(wq, &wait); - spin_unlock(lock); - schedule(); - remove_wait_queue(wq, &wait); - goto retry; + mutex_lock(&chip->controller->lock); + + return 0; } /** @@ -602,7 +565,10 @@ static int nand_block_markbad_lowlevel(struct nand_chip *chip, loff_t ofs) nand_erase_nand(chip, &einfo, 0); /* Write bad block marker to OOB */ - nand_get_device(chip, FL_WRITING); + ret = nand_get_device(chip); + if (ret) + return ret; + ret = nand_markbad_bbm(chip, ofs); nand_release_device(chip); } @@ -3580,7 +3546,9 @@ static int nand_read_oob(struct mtd_info *mtd, loff_t from, ops->mode != MTD_OPS_RAW) return -ENOTSUPP; - nand_get_device(chip, FL_READING); + ret = nand_get_device(chip); + if (ret) + return ret; if (!ops->datbuf) ret = nand_do_read_oob(chip, from, ops); @@ -4099,9 +4067,6 @@ static int panic_nand_write(struct mtd_info *mtd, loff_t to, size_t len, struct mtd_oob_ops ops; int ret; - /* Grab the device */ - panic_nand_get_device(chip, FL_WRITING); - nand_select_target(chip, chipnr); /* Wait for the device to get ready */ @@ -4132,7 +4097,9 @@ static int nand_write_oob(struct mtd_info *mtd, loff_t to, ops->retlen = 0; - nand_get_device(chip, FL_WRITING); + ret = nand_get_device(chip); + if (ret) + return ret; switch (ops->mode) { case MTD_OPS_PLACE_OOB: @@ -4205,7 +4172,9 @@ int nand_erase_nand(struct nand_chip *chip, struct erase_info *instr, return -EINVAL; /* Grab the lock and see if the device is available */ - nand_get_device(chip, FL_ERASING); + ret = nand_get_device(chip); + if (ret) + return ret; /* Shift to get first page */ page = (int)(instr->addr >> chip->page_shift); @@ -4298,7 +4267,7 @@ static void nand_sync(struct mtd_info *mtd) pr_debug("%s: called\n", __func__); /* Grab the lock and see if the device is available */ - nand_get_device(chip, FL_SYNCING); + WARN_ON(nand_get_device(chip)); /* Release it and go back */ nand_release_device(chip); } @@ -4315,7 +4284,10 @@ static int nand_block_isbad(struct mtd_info *mtd, loff_t offs) int ret; /* Select the NAND device */ - nand_get_device(chip, FL_READING); + ret = nand_get_device(chip); + if (ret) + return ret; + nand_select_target(chip, chipnr); ret = nand_block_checkbad(chip, offs, 0); @@ -4388,7 +4360,13 @@ static int nand_max_bad_blocks(struct mtd_info *mtd, loff_t ofs, size_t len) */ static int nand_suspend(struct mtd_info *mtd) { - return nand_get_device(mtd_to_nand(mtd), FL_PM_SUSPENDED); + struct nand_chip *chip = mtd_to_nand(mtd); + + mutex_lock(&chip->lock); + chip->suspended = 1; + mutex_unlock(&chip->lock); + + return 0; } /** @@ -4399,11 +4377,13 @@ static void nand_resume(struct mtd_info *mtd) { struct nand_chip *chip = mtd_to_nand(mtd); - if (chip->state == FL_PM_SUSPENDED) - nand_release_device(chip); + mutex_lock(&chip->lock); + if (chip->suspended) + chip->suspended = 0; else pr_err("%s called for a chip which is not in suspended state\n", __func__); + mutex_unlock(&chip->lock); } /** @@ -4413,7 +4393,7 @@ static void nand_resume(struct mtd_info *mtd) */ static void nand_shutdown(struct mtd_info *mtd) { - nand_get_device(mtd_to_nand(mtd), FL_PM_SUSPENDED); + nand_suspend(mtd); } /* Set default functions */ @@ -5018,6 +4998,8 @@ static int nand_scan_ident(struct nand_chip *chip, unsigned int maxchips, /* Assume all dies are deselected when we enter nand_scan_ident(). */ chip->cur_cs = -1; + mutex_init(&chip->lock); + /* Enforce the right timings for reset/detection */ onfi_fill_data_interface(chip, NAND_SDR_IFACE, 0); @@ -5717,9 +5699,6 @@ static int nand_scan_tail(struct nand_chip *chip) } chip->subpagesize = mtd->writesize >> mtd->subpage_sft; - /* Initialize state */ - chip->state = FL_READY; - /* Invalidate the pagebuffer reference */ chip->pagebuf = -1; diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 33e240acdc6d..17d2d9ae33bf 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -16,13 +16,12 @@ #ifndef __LINUX_MTD_RAWNAND_H #define __LINUX_MTD_RAWNAND_H -#include -#include #include #include #include #include #include +#include #include #include @@ -897,25 +896,17 @@ struct nand_controller_ops { /** * struct nand_controller - Structure used to describe a NAND controller * - * @lock: protection lock - * @active: the mtd device which holds the controller currently - * @wq: wait queue to sleep on if a NAND operation is in - * progress used instead of the per chip wait queue - * when a hw controller is available. + * @lock: lock used to serialize accesses to the NAND controller * @ops: NAND controller operations. */ struct nand_controller { - spinlock_t lock; - struct nand_chip *active; - wait_queue_head_t wq; + struct mutex lock; const struct nand_controller_ops *ops; }; static inline void nand_controller_init(struct nand_controller *nfc) { - nfc->active = NULL; - spin_lock_init(&nfc->lock); - init_waitqueue_head(&nfc->wq); + mutex_init(&nfc->lock); } /** @@ -983,7 +974,6 @@ struct nand_legacy { * setting the read-retry mode. Mostly needed for MLC NAND. * @ecc: [BOARDSPECIFIC] ECC control structure * @buf_align: minimum buffer alignment required by a platform - * @state: [INTERN] the current state of the NAND device * @oob_poi: "poison value buffer," used for laying out OOB data * before writing * @page_shift: [INTERN] number of address bits in a page (column @@ -1034,6 +1024,9 @@ struct nand_legacy { * cur_cs < numchips. NAND Controller drivers should not * modify this value, but they're allowed to read it. * @read_retries: [INTERN] the number of read retry modes supported + * @lock: lock protecting the suspended field. Also used to + * serialize accesses to the NAND device. + * @suspended: set to 1 when the device is suspended, 0 when it's not. * @bbt: [INTERN] bad block table pointer * @bbt_td: [REPLACEABLE] bad block table descriptor for flash * lookup. @@ -1088,7 +1081,8 @@ struct nand_chip { int read_retries; - flstate_t state; + struct mutex lock; + unsigned int suspended : 1; uint8_t *oob_poi; struct nand_controller *controller; -- cgit v1.2.3 From 2d73f3d66b7052c0175f9f33d271ae50826c222e Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 21 Jan 2019 15:32:07 +0900 Subject: mtd: rawnand: remove ->legacy.erase and single_erase() Now that the last user of this hook, denali.c, stopped using it, we can remove the erase hook from nand_legacy. I squashed single_erase() because only the difference between single_erase() and nand_erase_op() is the number of bit shifts. The status/ret conversion in nand_erase_nand() is unneeded since commit eb94555e9e97 ("mtd: nand: use usual return values for the ->erase() hook"). Cleaned it up now. Signed-off-by: Masahiro Yamada Reviewed-by: Boris Brezillon Signed-off-by: Miquel Raynal --- drivers/mtd/nand/raw/nand_base.c | 31 ++++--------------------------- include/linux/mtd/rawnand.h | 2 -- 2 files changed, 4 insertions(+), 29 deletions(-) (limited to 'include/linux') diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c index e05ecf2e4269..cf207d6e7a81 100644 --- a/drivers/mtd/nand/raw/nand_base.c +++ b/drivers/mtd/nand/raw/nand_base.c @@ -4121,23 +4121,6 @@ out: return ret; } -/** - * single_erase - [GENERIC] NAND standard block erase command function - * @chip: NAND chip object - * @page: the page address of the block which will be erased - * - * Standard erase command for NAND chips. Returns NAND status. - */ -static int single_erase(struct nand_chip *chip, int page) -{ - unsigned int eraseblock; - - /* Send commands to erase a block */ - eraseblock = page >> (chip->phys_erase_shift - chip->page_shift); - - return nand_erase_op(chip, eraseblock); -} - /** * nand_erase - [MTD Interface] erase block(s) * @mtd: MTD device structure @@ -4161,7 +4144,7 @@ static int nand_erase(struct mtd_info *mtd, struct erase_info *instr) int nand_erase_nand(struct nand_chip *chip, struct erase_info *instr, int allowbbt) { - int page, status, pages_per_block, ret, chipnr; + int page, pages_per_block, ret, chipnr; loff_t len; pr_debug("%s: start = 0x%012llx, len = %llu\n", @@ -4215,17 +4198,11 @@ int nand_erase_nand(struct nand_chip *chip, struct erase_info *instr, (page + pages_per_block)) chip->pagebuf = -1; - if (chip->legacy.erase) - status = chip->legacy.erase(chip, - page & chip->pagemask); - else - status = single_erase(chip, page & chip->pagemask); - - /* See if block erase succeeded */ - if (status) { + ret = nand_erase_op(chip, (page & chip->pagemask) >> + (chip->phys_erase_shift - chip->page_shift)); + if (ret) { pr_debug("%s: failed erase, page 0x%08x\n", __func__, page); - ret = -EIO; instr->fail_addr = ((loff_t)page << chip->page_shift); goto erase_exit; diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 17d2d9ae33bf..b7445a44a814 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -927,7 +927,6 @@ static inline void nand_controller_init(struct nand_controller *nfc) * @waitfunc: hardware specific function for wait on ready. * @block_bad: check if a block is bad, using OOB markers * @block_markbad: mark a block bad - * @erase: erase function * @set_features: set the NAND chip features * @get_features: get the NAND chip features * @chip_delay: chip dependent delay for transferring data from array to read @@ -953,7 +952,6 @@ struct nand_legacy { int (*waitfunc)(struct nand_chip *chip); int (*block_bad)(struct nand_chip *chip, loff_t ofs); int (*block_markbad)(struct nand_chip *chip, loff_t ofs); - int (*erase)(struct nand_chip *chip, int page); int (*set_features)(struct nand_chip *chip, int feature_addr, u8 *subfeature_para); int (*get_features)(struct nand_chip *chip, int feature_addr, -- cgit v1.2.3 From 278bca7f318e6a29f482eabbca52db538dc5d4e6 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 10 Jan 2019 21:00:27 +0200 Subject: vfio-mdev: Switch to use new generic UUID API There are new types and helpers that are supposed to be used in new code. As a preparation to get rid of legacy types and API functions do the conversion here. Cc: Kirti Wankhede Cc: Alex Williamson Signed-off-by: Andy Shevchenko Reviewed-by: Christoph Hellwig Signed-off-by: Alex Williamson --- drivers/vfio/mdev/mdev_core.c | 16 ++++++++-------- drivers/vfio/mdev/mdev_private.h | 5 +++-- drivers/vfio/mdev/mdev_sysfs.c | 6 +++--- include/linux/mdev.h | 2 +- samples/vfio-mdev/mtty.c | 8 ++++---- 5 files changed, 19 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/drivers/vfio/mdev/mdev_core.c b/drivers/vfio/mdev/mdev_core.c index 0212f0ee8aea..b96fedc77ee5 100644 --- a/drivers/vfio/mdev/mdev_core.c +++ b/drivers/vfio/mdev/mdev_core.c @@ -60,9 +60,9 @@ struct mdev_device *mdev_from_dev(struct device *dev) } EXPORT_SYMBOL(mdev_from_dev); -uuid_le mdev_uuid(struct mdev_device *mdev) +const guid_t *mdev_uuid(struct mdev_device *mdev) { - return mdev->uuid; + return &mdev->uuid; } EXPORT_SYMBOL(mdev_uuid); @@ -88,8 +88,7 @@ static void mdev_release_parent(struct kref *kref) put_device(dev); } -static -inline struct mdev_parent *mdev_get_parent(struct mdev_parent *parent) +static inline struct mdev_parent *mdev_get_parent(struct mdev_parent *parent) { if (parent) kref_get(&parent->ref); @@ -276,7 +275,8 @@ static void mdev_device_release(struct device *dev) kfree(mdev); } -int mdev_device_create(struct kobject *kobj, struct device *dev, uuid_le uuid) +int mdev_device_create(struct kobject *kobj, + struct device *dev, const guid_t *uuid) { int ret; struct mdev_device *mdev, *tmp; @@ -291,7 +291,7 @@ int mdev_device_create(struct kobject *kobj, struct device *dev, uuid_le uuid) /* Check for duplicate */ list_for_each_entry(tmp, &mdev_list, next) { - if (!uuid_le_cmp(tmp->uuid, uuid)) { + if (guid_equal(&tmp->uuid, uuid)) { mutex_unlock(&mdev_list_lock); ret = -EEXIST; goto mdev_fail; @@ -305,7 +305,7 @@ int mdev_device_create(struct kobject *kobj, struct device *dev, uuid_le uuid) goto mdev_fail; } - memcpy(&mdev->uuid, &uuid, sizeof(uuid_le)); + guid_copy(&mdev->uuid, uuid); list_add(&mdev->next, &mdev_list); mutex_unlock(&mdev_list_lock); @@ -315,7 +315,7 @@ int mdev_device_create(struct kobject *kobj, struct device *dev, uuid_le uuid) mdev->dev.parent = dev; mdev->dev.bus = &mdev_bus_type; mdev->dev.release = mdev_device_release; - dev_set_name(&mdev->dev, "%pUl", uuid.b); + dev_set_name(&mdev->dev, "%pUl", uuid); ret = device_register(&mdev->dev); if (ret) { diff --git a/drivers/vfio/mdev/mdev_private.h b/drivers/vfio/mdev/mdev_private.h index b5819b7d7ef7..379758c52b1b 100644 --- a/drivers/vfio/mdev/mdev_private.h +++ b/drivers/vfio/mdev/mdev_private.h @@ -28,7 +28,7 @@ struct mdev_parent { struct mdev_device { struct device dev; struct mdev_parent *parent; - uuid_le uuid; + guid_t uuid; void *driver_data; struct kref ref; struct list_head next; @@ -58,7 +58,8 @@ void parent_remove_sysfs_files(struct mdev_parent *parent); int mdev_create_sysfs_files(struct device *dev, struct mdev_type *type); void mdev_remove_sysfs_files(struct device *dev, struct mdev_type *type); -int mdev_device_create(struct kobject *kobj, struct device *dev, uuid_le uuid); +int mdev_device_create(struct kobject *kobj, + struct device *dev, const guid_t *uuid); int mdev_device_remove(struct device *dev, bool force_remove); #endif /* MDEV_PRIVATE_H */ diff --git a/drivers/vfio/mdev/mdev_sysfs.c b/drivers/vfio/mdev/mdev_sysfs.c index ce5dd219f2c8..5193a0e0ce5a 100644 --- a/drivers/vfio/mdev/mdev_sysfs.c +++ b/drivers/vfio/mdev/mdev_sysfs.c @@ -55,7 +55,7 @@ static ssize_t create_store(struct kobject *kobj, struct device *dev, const char *buf, size_t count) { char *str; - uuid_le uuid; + guid_t uuid; int ret; if ((count < UUID_STRING_LEN) || (count > UUID_STRING_LEN + 1)) @@ -65,12 +65,12 @@ static ssize_t create_store(struct kobject *kobj, struct device *dev, if (!str) return -ENOMEM; - ret = uuid_le_to_bin(str, &uuid); + ret = guid_parse(str, &uuid); kfree(str); if (ret) return ret; - ret = mdev_device_create(kobj, dev, uuid); + ret = mdev_device_create(kobj, dev, &uuid); if (ret) return ret; diff --git a/include/linux/mdev.h b/include/linux/mdev.h index b6e048e1045f..d7aee90e5da5 100644 --- a/include/linux/mdev.h +++ b/include/linux/mdev.h @@ -120,7 +120,7 @@ struct mdev_driver { extern void *mdev_get_drvdata(struct mdev_device *mdev); extern void mdev_set_drvdata(struct mdev_device *mdev, void *data); -extern uuid_le mdev_uuid(struct mdev_device *mdev); +extern const guid_t *mdev_uuid(struct mdev_device *mdev); extern struct bus_type mdev_bus_type; diff --git a/samples/vfio-mdev/mtty.c b/samples/vfio-mdev/mtty.c index f6732aa16bb1..19cd29071ab0 100644 --- a/samples/vfio-mdev/mtty.c +++ b/samples/vfio-mdev/mtty.c @@ -156,15 +156,15 @@ static const struct file_operations vd_fops = { /* function prototypes */ -static int mtty_trigger_interrupt(uuid_le uuid); +static int mtty_trigger_interrupt(const guid_t *uuid); /* Helper functions */ -static struct mdev_state *find_mdev_state_by_uuid(uuid_le uuid) +static struct mdev_state *find_mdev_state_by_uuid(const guid_t *uuid) { struct mdev_state *mds; list_for_each_entry(mds, &mdev_devices_list, next) { - if (uuid_le_cmp(mdev_uuid(mds->mdev), uuid) == 0) + if (guid_equal(mdev_uuid(mds->mdev), uuid)) return mds; } @@ -1032,7 +1032,7 @@ static int mtty_set_irqs(struct mdev_device *mdev, uint32_t flags, return ret; } -static int mtty_trigger_interrupt(uuid_le uuid) +static int mtty_trigger_interrupt(const guid_t *uuid) { int ret = -1; struct mdev_state *mdev_state; -- cgit v1.2.3 From 972248e9111ee6fe9fb56c24ecfd7434f3d713ac Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 29 Jan 2019 09:32:03 +0100 Subject: scsi: bsg-lib: handle bidi requests without block layer help We can just stash away the second request in struct bsg_job instead of using the block layer req->next_rq field, allowing for the eventual removal of the latter. Signed-off-by: Christoph Hellwig Acked-by: Jens Axboe Signed-off-by: Martin K. Petersen --- block/bsg-lib.c | 44 +++++++++++++++++++++---- block/bsg.c | 68 ++++++++------------------------------- drivers/scsi/scsi_transport_sas.c | 1 - include/linux/bsg-lib.h | 4 +++ 4 files changed, 56 insertions(+), 61 deletions(-) (limited to 'include/linux') diff --git a/block/bsg-lib.c b/block/bsg-lib.c index 192129856342..005e2b75d775 100644 --- a/block/bsg-lib.c +++ b/block/bsg-lib.c @@ -51,11 +51,40 @@ static int bsg_transport_fill_hdr(struct request *rq, struct sg_io_v4 *hdr, fmode_t mode) { struct bsg_job *job = blk_mq_rq_to_pdu(rq); + int ret; job->request_len = hdr->request_len; job->request = memdup_user(uptr64(hdr->request), hdr->request_len); + if (IS_ERR(job->request)) + return PTR_ERR(job->request); + + if (hdr->dout_xfer_len && hdr->din_xfer_len) { + job->bidi_rq = blk_get_request(rq->q, REQ_OP_SCSI_IN, 0); + if (IS_ERR(job->bidi_rq)) { + ret = PTR_ERR(job->bidi_rq); + goto out; + } + + ret = blk_rq_map_user(rq->q, job->bidi_rq, NULL, + uptr64(hdr->din_xferp), hdr->din_xfer_len, + GFP_KERNEL); + if (ret) + goto out_free_bidi_rq; + + job->bidi_bio = job->bidi_rq->bio; + } else { + job->bidi_rq = NULL; + job->bidi_bio = NULL; + } - return PTR_ERR_OR_ZERO(job->request); + return 0; + +out_free_bidi_rq: + if (job->bidi_rq) + blk_put_request(job->bidi_rq); +out: + kfree(job->request); + return ret; } static int bsg_transport_complete_rq(struct request *rq, struct sg_io_v4 *hdr) @@ -93,7 +122,7 @@ static int bsg_transport_complete_rq(struct request *rq, struct sg_io_v4 *hdr) /* we assume all request payload was transferred, residual == 0 */ hdr->dout_resid = 0; - if (rq->next_rq) { + if (job->bidi_rq) { unsigned int rsp_len = job->reply_payload.payload_len; if (WARN_ON(job->reply_payload_rcv_len > rsp_len)) @@ -111,6 +140,11 @@ static void bsg_transport_free_rq(struct request *rq) { struct bsg_job *job = blk_mq_rq_to_pdu(rq); + if (job->bidi_rq) { + blk_rq_unmap_user(job->bidi_bio); + blk_put_request(job->bidi_rq); + } + kfree(job->request); } @@ -200,7 +234,6 @@ static int bsg_map_buffer(struct bsg_buffer *buf, struct request *req) */ static bool bsg_prepare_job(struct device *dev, struct request *req) { - struct request *rsp = req->next_rq; struct bsg_job *job = blk_mq_rq_to_pdu(req); int ret; @@ -211,8 +244,8 @@ static bool bsg_prepare_job(struct device *dev, struct request *req) if (ret) goto failjob_rls_job; } - if (rsp && rsp->bio) { - ret = bsg_map_buffer(&job->reply_payload, rsp); + if (job->bidi_rq) { + ret = bsg_map_buffer(&job->reply_payload, job->bidi_rq); if (ret) goto failjob_rls_rqst_payload; } @@ -369,7 +402,6 @@ struct request_queue *bsg_setup_queue(struct device *dev, const char *name, } q->queuedata = dev; - blk_queue_flag_set(QUEUE_FLAG_BIDI, q); blk_queue_rq_timeout(q, BLK_DEFAULT_SG_TIMEOUT); ret = bsg_register_queue(q, dev, name, &bsg_transport_ops); diff --git a/block/bsg.c b/block/bsg.c index a799b0ace55c..f306853c6b08 100644 --- a/block/bsg.c +++ b/block/bsg.c @@ -74,6 +74,11 @@ static int bsg_scsi_fill_hdr(struct request *rq, struct sg_io_v4 *hdr, { struct scsi_request *sreq = scsi_req(rq); + if (hdr->dout_xfer_len && hdr->din_xfer_len) { + pr_warn_once("BIDI support in bsg has been removed.\n"); + return -EOPNOTSUPP; + } + sreq->cmd_len = hdr->request_len; if (sreq->cmd_len > BLK_MAX_CDB) { sreq->cmd = kzalloc(sreq->cmd_len, GFP_KERNEL); @@ -114,14 +119,10 @@ static int bsg_scsi_complete_rq(struct request *rq, struct sg_io_v4 *hdr) hdr->response_len = len; } - if (rq->next_rq) { - hdr->dout_resid = sreq->resid_len; - hdr->din_resid = scsi_req(rq->next_rq)->resid_len; - } else if (rq_data_dir(rq) == READ) { + if (rq_data_dir(rq) == READ) hdr->din_resid = sreq->resid_len; - } else { + else hdr->dout_resid = sreq->resid_len; - } return ret; } @@ -140,8 +141,8 @@ static const struct bsg_ops bsg_scsi_ops = { static int bsg_sg_io(struct request_queue *q, fmode_t mode, void __user *uarg) { - struct request *rq, *next_rq = NULL; - struct bio *bio, *bidi_bio = NULL; + struct request *rq; + struct bio *bio; struct sg_io_v4 hdr; int ret; @@ -164,7 +165,7 @@ static int bsg_sg_io(struct request_queue *q, fmode_t mode, void __user *uarg) ret = q->bsg_dev.ops->fill_hdr(rq, &hdr, mode); if (ret) - goto out; + return ret; rq->timeout = msecs_to_jiffies(hdr.timeout); if (!rq->timeout) @@ -174,29 +175,6 @@ static int bsg_sg_io(struct request_queue *q, fmode_t mode, void __user *uarg) if (rq->timeout < BLK_MIN_SG_TIMEOUT) rq->timeout = BLK_MIN_SG_TIMEOUT; - if (hdr.dout_xfer_len && hdr.din_xfer_len) { - if (!test_bit(QUEUE_FLAG_BIDI, &q->queue_flags)) { - ret = -EOPNOTSUPP; - goto out; - } - - pr_warn_once( - "BIDI support in bsg has been deprecated and might be removed. " - "Please report your use case to linux-scsi@vger.kernel.org\n"); - - next_rq = blk_get_request(q, REQ_OP_SCSI_IN, 0); - if (IS_ERR(next_rq)) { - ret = PTR_ERR(next_rq); - goto out; - } - - rq->next_rq = next_rq; - ret = blk_rq_map_user(q, next_rq, NULL, uptr64(hdr.din_xferp), - hdr.din_xfer_len, GFP_KERNEL); - if (ret) - goto out_free_nextrq; - } - if (hdr.dout_xfer_len) { ret = blk_rq_map_user(q, rq, NULL, uptr64(hdr.dout_xferp), hdr.dout_xfer_len, GFP_KERNEL); @@ -206,38 +184,20 @@ static int bsg_sg_io(struct request_queue *q, fmode_t mode, void __user *uarg) } if (ret) - goto out_unmap_nextrq; + goto out_free_rq; bio = rq->bio; - if (rq->next_rq) - bidi_bio = rq->next_rq->bio; blk_execute_rq(q, NULL, rq, !(hdr.flags & BSG_FLAG_Q_AT_TAIL)); ret = rq->q->bsg_dev.ops->complete_rq(rq, &hdr); - - if (rq->next_rq) { - blk_rq_unmap_user(bidi_bio); - blk_put_request(rq->next_rq); - } - blk_rq_unmap_user(bio); + +out_free_rq: rq->q->bsg_dev.ops->free_rq(rq); blk_put_request(rq); - - if (copy_to_user(uarg, &hdr, sizeof(hdr))) + if (!ret && copy_to_user(uarg, &hdr, sizeof(hdr))) return -EFAULT; return ret; - -out_unmap_nextrq: - if (rq->next_rq) - blk_rq_unmap_user(rq->next_rq->bio); -out_free_nextrq: - if (rq->next_rq) - blk_put_request(rq->next_rq); -out: - q->bsg_dev.ops->free_rq(rq); - blk_put_request(rq); - return ret; } static struct bsg_device *bsg_alloc_device(void) diff --git a/drivers/scsi/scsi_transport_sas.c b/drivers/scsi/scsi_transport_sas.c index 692b46937e52..60f1a81d2034 100644 --- a/drivers/scsi/scsi_transport_sas.c +++ b/drivers/scsi/scsi_transport_sas.c @@ -213,7 +213,6 @@ static int sas_bsg_initialize(struct Scsi_Host *shost, struct sas_rphy *rphy) to_sas_host_attrs(shost)->q = q; } - blk_queue_flag_set(QUEUE_FLAG_BIDI, q); return 0; } diff --git a/include/linux/bsg-lib.h b/include/linux/bsg-lib.h index b356e0006731..7f14517a559b 100644 --- a/include/linux/bsg-lib.h +++ b/include/linux/bsg-lib.h @@ -69,6 +69,10 @@ struct bsg_job { int result; unsigned int reply_payload_rcv_len; + /* BIDI support */ + struct request *bidi_rq; + struct bio *bidi_bio; + void *dd_data; /* Used for driver-specific storage */ }; -- cgit v1.2.3 From 69ed175c195595c73901e18366cb0ebeaeb68b8a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 9 Nov 2018 19:35:11 +0100 Subject: scsi: block: remove req->special No users left. Signed-off-by: Christoph Hellwig Acked-by: Jens Axboe Signed-off-by: Martin K. Petersen --- block/blk-mq.c | 1 - drivers/scsi/sd.c | 2 -- include/linux/blkdev.h | 2 -- 3 files changed, 5 deletions(-) (limited to 'include/linux') diff --git a/block/blk-mq.c b/block/blk-mq.c index 3ba37b9e15e9..502cbf964a3b 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -331,7 +331,6 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data, #if defined(CONFIG_BLK_DEV_INTEGRITY) rq->nr_integrity_segments = 0; #endif - rq->special = NULL; /* tag was already set */ rq->extra_len = 0; WRITE_ONCE(rq->deadline, 0); diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 3db9b1fe7516..c124459041dc 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -1171,8 +1171,6 @@ static blk_status_t sd_setup_read_write_cmnd(struct scsi_cmnd *cmd) if (ret != BLK_STS_OK) return ret; - WARN_ON_ONCE(cmd != rq->special); - if (!scsi_device_online(sdp) || sdp->changed) { scmd_printk(KERN_ERR, cmd, "device offline or changed\n"); return BLK_STS_IOERR; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 338604dff7d0..fd1450d53f1c 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -216,8 +216,6 @@ struct request { unsigned short write_hint; unsigned short ioprio; - void *special; /* opaque pointer available for LLD use */ - unsigned int extra_len; /* length of alignment and padding */ enum mq_rq_state state; -- cgit v1.2.3 From 8b3238cabd50e2715b6544e724e74685209b190a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 6 Dec 2018 08:01:10 -0800 Subject: scsi: block: remove bidi support Unused now, and another field in struct request bites the dust. Signed-off-by: Christoph Hellwig Acked-by: Jens Axboe Signed-off-by: Martin K. Petersen --- block/blk-mq-debugfs.c | 1 - block/blk-mq.c | 3 --- include/linux/blkdev.h | 6 ------ 3 files changed, 10 deletions(-) (limited to 'include/linux') diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 90d68760af08..ac832547160a 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -115,7 +115,6 @@ static int queue_pm_only_show(void *data, struct seq_file *m) static const char *const blk_queue_flag_name[] = { QUEUE_FLAG_NAME(STOPPED), QUEUE_FLAG_NAME(DYING), - QUEUE_FLAG_NAME(BIDI), QUEUE_FLAG_NAME(NOMERGES), QUEUE_FLAG_NAME(SAME_COMP), QUEUE_FLAG_NAME(FAIL_IO), diff --git a/block/blk-mq.c b/block/blk-mq.c index 502cbf964a3b..820d131a6893 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -339,7 +339,6 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data, rq->end_io = NULL; rq->end_io_data = NULL; - rq->next_rq = NULL; data->ctx->rq_dispatched[op_is_sync(op)]++; refcount_set(&rq->ref, 1); @@ -549,8 +548,6 @@ inline void __blk_mq_end_request(struct request *rq, blk_status_t error) rq_qos_done(rq->q, rq); rq->end_io(rq, error); } else { - if (unlikely(blk_bidi_rq(rq))) - blk_mq_free_request(rq->next_rq); blk_mq_free_request(rq); } } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index fd1450d53f1c..21beb456b97a 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -234,9 +234,6 @@ struct request { */ rq_end_io_fn *end_io; void *end_io_data; - - /* for bidi */ - struct request *next_rq; }; static inline bool blk_op_is_scsi(unsigned int op) @@ -572,7 +569,6 @@ struct request_queue { #define QUEUE_FLAG_STOPPED 1 /* queue is stopped */ #define QUEUE_FLAG_DYING 2 /* queue being torn down */ -#define QUEUE_FLAG_BIDI 4 /* queue supports bidi requests */ #define QUEUE_FLAG_NOMERGES 5 /* disable merge attempts */ #define QUEUE_FLAG_SAME_COMP 6 /* complete on same CPU-group */ #define QUEUE_FLAG_FAIL_IO 7 /* fake timeout */ @@ -644,8 +640,6 @@ static inline bool blk_account_rq(struct request *rq) return (rq->rq_flags & RQF_STARTED) && !blk_rq_is_passthrough(rq); } -#define blk_bidi_rq(rq) ((rq)->next_rq != NULL) - #define list_entry_rq(ptr) list_entry((ptr), struct request, queuelist) #define rq_data_dir(rq) (op_is_write(req_op(rq)) ? WRITE : READ) -- cgit v1.2.3 From 5870970b9a828d8693aa6d15742573289d7dbcd0 Mon Sep 17 00:00:00 2001 From: Julien Thierry Date: Thu, 31 Jan 2019 14:58:39 +0000 Subject: arm64: Fix HCR.TGE status for NMI contexts When using VHE, the host needs to clear HCR_EL2.TGE bit in order to interact with guest TLBs, switching from EL2&0 translation regime to EL1&0. However, some non-maskable asynchronous event could happen while TGE is cleared like SDEI. Because of this address translation operations relying on EL2&0 translation regime could fail (tlb invalidation, userspace access, ...). Fix this by properly setting HCR_EL2.TGE when entering NMI context and clear it if necessary when returning to the interrupted context. Signed-off-by: Julien Thierry Suggested-by: Marc Zyngier Reviewed-by: Marc Zyngier Reviewed-by: James Morse Cc: Arnd Bergmann Cc: Will Deacon Cc: Marc Zyngier Cc: James Morse Cc: linux-arch@vger.kernel.org Cc: stable@vger.kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/hardirq.h | 31 +++++++++++++++++++++++++++++++ arch/arm64/kernel/irq.c | 3 +++ include/linux/hardirq.h | 7 +++++++ 3 files changed, 41 insertions(+) (limited to 'include/linux') diff --git a/arch/arm64/include/asm/hardirq.h b/arch/arm64/include/asm/hardirq.h index 1473fc2f7ab7..89691c86640a 100644 --- a/arch/arm64/include/asm/hardirq.h +++ b/arch/arm64/include/asm/hardirq.h @@ -17,8 +17,12 @@ #define __ASM_HARDIRQ_H #include +#include #include +#include #include +#include +#include #define NR_IPI 7 @@ -37,6 +41,33 @@ u64 smp_irq_stat_cpu(unsigned int cpu); #define __ARCH_IRQ_EXIT_IRQS_DISABLED 1 +struct nmi_ctx { + u64 hcr; +}; + +DECLARE_PER_CPU(struct nmi_ctx, nmi_contexts); + +#define arch_nmi_enter() \ + do { \ + if (is_kernel_in_hyp_mode()) { \ + struct nmi_ctx *nmi_ctx = this_cpu_ptr(&nmi_contexts); \ + nmi_ctx->hcr = read_sysreg(hcr_el2); \ + if (!(nmi_ctx->hcr & HCR_TGE)) { \ + write_sysreg(nmi_ctx->hcr | HCR_TGE, hcr_el2); \ + isb(); \ + } \ + } \ + } while (0) + +#define arch_nmi_exit() \ + do { \ + if (is_kernel_in_hyp_mode()) { \ + struct nmi_ctx *nmi_ctx = this_cpu_ptr(&nmi_contexts); \ + if (!(nmi_ctx->hcr & HCR_TGE)) \ + write_sysreg(nmi_ctx->hcr, hcr_el2); \ + } \ + } while (0) + static inline void ack_bad_irq(unsigned int irq) { extern unsigned long irq_err_count; diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c index 780a12f59a8f..92fa81798fb9 100644 --- a/arch/arm64/kernel/irq.c +++ b/arch/arm64/kernel/irq.c @@ -33,6 +33,9 @@ unsigned long irq_err_count; +/* Only access this in an NMI enter/exit */ +DEFINE_PER_CPU(struct nmi_ctx, nmi_contexts); + DEFINE_PER_CPU(unsigned long *, irq_stack_ptr); int arch_show_interrupts(struct seq_file *p, int prec) diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 0fbbcdf0c178..da0af631ded5 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -60,8 +60,14 @@ extern void irq_enter(void); */ extern void irq_exit(void); +#ifndef arch_nmi_enter +#define arch_nmi_enter() do { } while (0) +#define arch_nmi_exit() do { } while (0) +#endif + #define nmi_enter() \ do { \ + arch_nmi_enter(); \ printk_nmi_enter(); \ lockdep_off(); \ ftrace_nmi_enter(); \ @@ -80,6 +86,7 @@ extern void irq_exit(void); ftrace_nmi_exit(); \ lockdep_on(); \ printk_nmi_exit(); \ + arch_nmi_exit(); \ } while (0) #endif /* LINUX_HARDIRQ_H */ -- cgit v1.2.3 From 13b210ddf474d9f3368766008a89fe82a6f90b48 Mon Sep 17 00:00:00 2001 From: Julien Thierry Date: Thu, 31 Jan 2019 14:58:49 +0000 Subject: efi: Let architectures decide the flags that should be saved/restored Currently, irqflags are saved before calling runtime services and checked for mismatch on return. Provide a pair of overridable macros to save and restore (if needed) the state that need to be preserved on return from a runtime service. This allows to check for flags that are not necesarly related to irqflags. Signed-off-by: Julien Thierry Acked-by: Catalin Marinas Acked-by: Ard Biesheuvel Acked-by: Marc Zyngier Cc: Ard Biesheuvel Cc: linux-efi@vger.kernel.org Signed-off-by: Catalin Marinas --- drivers/firmware/efi/runtime-wrappers.c | 17 +++++++++++++++-- include/linux/efi.h | 5 +++-- 2 files changed, 18 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/firmware/efi/runtime-wrappers.c b/drivers/firmware/efi/runtime-wrappers.c index 8903b9ccfc2b..c70df5ae7c4a 100644 --- a/drivers/firmware/efi/runtime-wrappers.c +++ b/drivers/firmware/efi/runtime-wrappers.c @@ -89,11 +89,24 @@ exit: \ efi_rts_work.status; \ }) +#ifndef arch_efi_save_flags +#define arch_efi_save_flags(state_flags) local_save_flags(state_flags) +#define arch_efi_restore_flags(state_flags) local_irq_restore(state_flags) +#endif + +unsigned long efi_call_virt_save_flags(void) +{ + unsigned long flags; + + arch_efi_save_flags(flags); + return flags; +} + void efi_call_virt_check_flags(unsigned long flags, const char *call) { unsigned long cur_flags, mismatch; - local_save_flags(cur_flags); + cur_flags = efi_call_virt_save_flags(); mismatch = flags ^ cur_flags; if (!WARN_ON_ONCE(mismatch & ARCH_EFI_IRQ_FLAGS_MASK)) @@ -102,7 +115,7 @@ void efi_call_virt_check_flags(unsigned long flags, const char *call) add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_NOW_UNRELIABLE); pr_err_ratelimited(FW_BUG "IRQ flags corrupted (0x%08lx=>0x%08lx) by EFI %s\n", flags, cur_flags, call); - local_irq_restore(flags); + arch_efi_restore_flags(flags); } /* diff --git a/include/linux/efi.h b/include/linux/efi.h index 45ff763fba76..bd80b7ec35db 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1607,6 +1607,7 @@ efi_status_t efi_setup_gop(efi_system_table_t *sys_table_arg, bool efi_runtime_disabled(void); extern void efi_call_virt_check_flags(unsigned long flags, const char *call); +extern unsigned long efi_call_virt_save_flags(void); enum efi_secureboot_mode { efi_secureboot_mode_unset, @@ -1652,7 +1653,7 @@ void efi_retrieve_tpm2_eventlog(efi_system_table_t *sys_table); \ arch_efi_call_virt_setup(); \ \ - local_save_flags(__flags); \ + __flags = efi_call_virt_save_flags(); \ __s = arch_efi_call_virt(p, f, args); \ efi_call_virt_check_flags(__flags, __stringify(f)); \ \ @@ -1667,7 +1668,7 @@ void efi_retrieve_tpm2_eventlog(efi_system_table_t *sys_table); \ arch_efi_call_virt_setup(); \ \ - local_save_flags(__flags); \ + __flags = efi_call_virt_save_flags(); \ arch_efi_call_virt(p, f, args); \ efi_call_virt_check_flags(__flags, __stringify(f)); \ \ -- cgit v1.2.3 From 840018668ce2d96783356204ff282d6c9b0e5f66 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Thu, 31 Jan 2019 11:47:08 -0700 Subject: perf/aux: Make perf_event accessible to setup_aux() When pmu::setup_aux() is called the coresight PMU needs to know which sink to use for the session by looking up the information in the event's attr::config2 field. As such simply replace the cpu information by the complete perf_event structure and change all affected customers. Signed-off-by: Mathieu Poirier Reviewed-by: Suzuki Poulouse Acked-by: Peter Zijlstra Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexei Starovoitov Cc: Greg Kroah-Hartman Cc: H. Peter Anvin Cc: Heiko Carstens Cc: Jiri Olsa Cc: Mark Rutland Cc: Martin Schwidefsky Cc: Namhyung Kim Cc: Thomas Gleixner Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linux-s390@vger.kernel.org Link: http://lkml.kernel.org/r/20190131184714.20388-2-mathieu.poirier@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- arch/s390/kernel/perf_cpum_sf.c | 6 +++--- arch/x86/events/intel/bts.c | 4 +++- arch/x86/events/intel/pt.c | 5 +++-- drivers/hwtracing/coresight/coresight-etm-perf.c | 6 +++--- drivers/perf/arm_spe_pmu.c | 6 +++--- include/linux/perf_event.h | 2 +- kernel/events/ring_buffer.c | 2 +- 7 files changed, 17 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index bfabeb1889cc..1266194afb02 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -1600,7 +1600,7 @@ static void aux_sdb_init(unsigned long sdb) /* * aux_buffer_setup() - Setup AUX buffer for diagnostic mode sampling - * @cpu: On which to allocate, -1 means current + * @event: Event the buffer is setup for, event->cpu == -1 means current * @pages: Array of pointers to buffer pages passed from perf core * @nr_pages: Total pages * @snapshot: Flag for snapshot mode @@ -1612,8 +1612,8 @@ static void aux_sdb_init(unsigned long sdb) * * Return the private AUX buffer structure if success or NULL if fails. */ -static void *aux_buffer_setup(int cpu, void **pages, int nr_pages, - bool snapshot) +static void *aux_buffer_setup(struct perf_event *event, void **pages, + int nr_pages, bool snapshot) { struct sf_buffer *sfb; struct aux_buffer *aux; diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c index a01ef1b0f883..7cdd7b13bbda 100644 --- a/arch/x86/events/intel/bts.c +++ b/arch/x86/events/intel/bts.c @@ -77,10 +77,12 @@ static size_t buf_size(struct page *page) } static void * -bts_buffer_setup_aux(int cpu, void **pages, int nr_pages, bool overwrite) +bts_buffer_setup_aux(struct perf_event *event, void **pages, + int nr_pages, bool overwrite) { struct bts_buffer *buf; struct page *page; + int cpu = event->cpu; int node = (cpu == -1) ? cpu : cpu_to_node(cpu); unsigned long offset; size_t size = nr_pages << PAGE_SHIFT; diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c index 9494ca68fd9d..c0e86ff21f81 100644 --- a/arch/x86/events/intel/pt.c +++ b/arch/x86/events/intel/pt.c @@ -1114,10 +1114,11 @@ static int pt_buffer_init_topa(struct pt_buffer *buf, unsigned long nr_pages, * Return: Our private PT buffer structure. */ static void * -pt_buffer_setup_aux(int cpu, void **pages, int nr_pages, bool snapshot) +pt_buffer_setup_aux(struct perf_event *event, void **pages, + int nr_pages, bool snapshot) { struct pt_buffer *buf; - int node, ret; + int node, ret, cpu = event->cpu; if (!nr_pages) return NULL; diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c index abe8249b893b..f21eb28b6782 100644 --- a/drivers/hwtracing/coresight/coresight-etm-perf.c +++ b/drivers/hwtracing/coresight/coresight-etm-perf.c @@ -177,15 +177,15 @@ static void etm_free_aux(void *data) schedule_work(&event_data->work); } -static void *etm_setup_aux(int event_cpu, void **pages, +static void *etm_setup_aux(struct perf_event *event, void **pages, int nr_pages, bool overwrite) { - int cpu; + int cpu = event->cpu; cpumask_t *mask; struct coresight_device *sink; struct etm_event_data *event_data = NULL; - event_data = alloc_event_data(event_cpu); + event_data = alloc_event_data(cpu); if (!event_data) return NULL; INIT_WORK(&event_data->work, free_event_data); diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c index 8e46a9dad2fa..7cb766dafe85 100644 --- a/drivers/perf/arm_spe_pmu.c +++ b/drivers/perf/arm_spe_pmu.c @@ -824,10 +824,10 @@ static void arm_spe_pmu_read(struct perf_event *event) { } -static void *arm_spe_pmu_setup_aux(int cpu, void **pages, int nr_pages, - bool snapshot) +static void *arm_spe_pmu_setup_aux(struct perf_event *event, void **pages, + int nr_pages, bool snapshot) { - int i; + int i, cpu = event->cpu; struct page **pglist; struct arm_spe_pmu_buf *buf; diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 6cb5d483ab34..d9c3610e0e25 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -410,7 +410,7 @@ struct pmu { /* * Set up pmu-private data structures for an AUX area */ - void *(*setup_aux) (int cpu, void **pages, + void *(*setup_aux) (struct perf_event *event, void **pages, int nr_pages, bool overwrite); /* optional */ diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 805f0423ee0b..70ae2422cbaf 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -657,7 +657,7 @@ int rb_alloc_aux(struct ring_buffer *rb, struct perf_event *event, goto out; } - rb->aux_priv = event->pmu->setup_aux(event->cpu, rb->aux_pages, nr_pages, + rb->aux_priv = event->pmu->setup_aux(event, rb->aux_pages, nr_pages, overwrite); if (!rb->aux_priv) goto out; -- cgit v1.2.3 From bb8e370bdc141ddff526e5e5ee74210c91fee0b8 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Thu, 31 Jan 2019 11:47:09 -0700 Subject: coresight: perf: Add "sinks" group to PMU directory Add a "sinks" directory entry so that users can see all the sinks available in the system in a single place. Individual sink are added as they are registered with the coresight bus. Committer tests: Test built on a ubuntu 18.04 container with a cross build environment to arm64, the new field is there, need to find a machine with this feature to do further testing in the future. root@d15263e5734a:/git/perf# grep CORESIGHT /tmp/build/v5.0-rc2+/.config CONFIG_CORESIGHT=y CONFIG_CORESIGHT_LINKS_AND_SINKS=y CONFIG_CORESIGHT_LINK_AND_SINK_TMC=y CONFIG_CORESIGHT_CATU=y CONFIG_CORESIGHT_SINK_TPIU=y CONFIG_CORESIGHT_SINK_ETBV10=y CONFIG_CORESIGHT_SOURCE_ETM4X=y CONFIG_CORESIGHT_DYNAMIC_REPLICATOR=y CONFIG_CORESIGHT_STM=y CONFIG_CORESIGHT_CPU_DEBUG=m root@d15263e5734a:/git/perf# root@d15263e5734a:/git/perf# file /tmp/build/v5.0-rc2+/drivers/hwtracing/coresight/*.o .../coresight/coresight-catu.o: ELF 64-bit MSB relocatable, ARM aarch64, version 1 (SYSV), not stripped .../coresight/coresight-cpu-debug.mod.o: ELF 64-bit MSB relocatable, ARM aarch64, version 1 (SYSV), not stripped .../coresight/coresight-cpu-debug.o: ELF 64-bit MSB relocatable, ARM aarch64, version 1 (SYSV), not stripped .../coresight/coresight-dynamic-replicator.o: ELF 64-bit MSB relocatable, ARM aarch64, version 1 (SYSV), not stripped .../coresight/coresight-etb10.o: ELF 64-bit MSB relocatable, ARM aarch64, version 1 (SYSV), not stripped .../coresight/coresight-etm-perf.o: ELF 64-bit MSB relocatable, ARM aarch64, version 1 (SYSV), not stripped .../coresight/coresight-etm4x-sysfs.o: ELF 64-bit MSB relocatable, ARM aarch64, version 1 (SYSV), not stripped .../coresight/coresight-etm4x.o: ELF 64-bit MSB relocatable, ARM aarch64, version 1 (SYSV), not stripped .../coresight/coresight-funnel.o: ELF 64-bit MSB relocatable, ARM aarch64, version 1 (SYSV), not stripped .../coresight/coresight-replicator.o: ELF 64-bit MSB relocatable, ARM aarch64, version 1 (SYSV), not stripped .../coresight/coresight-stm.o: ELF 64-bit MSB relocatable, ARM aarch64, version 1 (SYSV), not stripped .../coresight/coresight-tmc-etf.o: ELF 64-bit MSB relocatable, ARM aarch64, version 1 (SYSV), not stripped .../coresight/coresight-tmc-etr.o: ELF 64-bit MSB relocatable, ARM aarch64, version 1 (SYSV), not stripped .../coresight/coresight-tmc.o: ELF 64-bit MSB relocatable, ARM aarch64, version 1 (SYSV), not stripped .../coresight/coresight-tpiu.o: ELF 64-bit MSB relocatable, ARM aarch64, version 1 (SYSV), not stripped .../coresight/coresight.o: ELF 64-bit MSB relocatable, ARM aarch64, version 1 (SYSV), not stripped .../coresight/of_coresight.o: ELF 64-bit MSB relocatable, ARM aarch64, version 1 (SYSV), not stripped root@d15263e5734a:/git/perf# root@d15263e5734a:/git/perf# pahole -C coresight_device /tmp/build/v5.0-rc2+/drivers/hwtracing/coresight/coresight.o struct coresight_device { struct coresight_connection * conns; /* 0 8 */ int nr_inport; /* 8 4 */ int nr_outport; /* 12 4 */ enum coresight_dev_type type; /* 16 4 */ union coresight_dev_subtype subtype; /* 20 8 */ /* XXX 4 bytes hole, try to pack */ const struct coresight_ops * ops; /* 32 8 */ struct device dev; /* 40 1408 */ /* XXX last struct has 7 bytes of padding */ /* --- cacheline 22 boundary (1408 bytes) was 40 bytes ago --- */ atomic_t * refcnt; /* 1448 8 */ bool orphan; /* 1456 1 */ bool enable; /* 1457 1 */ bool activated; /* 1458 1 */ /* XXX 5 bytes hole, try to pack */ struct dev_ext_attribute * ea; /* 1464 8 */ /* size: 1472, cachelines: 23, members: 12 */ /* sum members: 1463, holes: 2, sum holes: 9 */ /* paddings: 1, sum paddings: 7 */ }; root@d15263e5734a:/git/perf# Signed-off-by: Mathieu Poirier Reviewed-by: Suzuki K Poulose Acked-by: Peter Zijlstra Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexei Starovoitov Cc: Greg Kroah-Hartman Cc: H. Peter Anvin Cc: Heiko Carstens Cc: Jiri Olsa Cc: Mark Rutland Cc: Martin Schwidefsky Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linux-s390@vger.kernel.org Link: http://lkml.kernel.org/r/20190131184714.20388-3-mathieu.poirier@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- drivers/hwtracing/coresight/coresight-etm-perf.c | 82 ++++++++++++++++++++++++ drivers/hwtracing/coresight/coresight-etm-perf.h | 6 +- drivers/hwtracing/coresight/coresight.c | 18 ++++++ include/linux/coresight.h | 7 +- 4 files changed, 110 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c index f21eb28b6782..cdbdb28dc175 100644 --- a/drivers/hwtracing/coresight/coresight-etm-perf.c +++ b/drivers/hwtracing/coresight/coresight-etm-perf.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -43,8 +44,18 @@ static const struct attribute_group etm_pmu_format_group = { .attrs = etm_config_formats_attr, }; +static struct attribute *etm_config_sinks_attr[] = { + NULL, +}; + +static const struct attribute_group etm_pmu_sinks_group = { + .name = "sinks", + .attrs = etm_config_sinks_attr, +}; + static const struct attribute_group *etm_pmu_attr_groups[] = { &etm_pmu_format_group, + &etm_pmu_sinks_group, NULL, }; @@ -479,6 +490,77 @@ int etm_perf_symlink(struct coresight_device *csdev, bool link) return 0; } +static ssize_t etm_perf_sink_name_show(struct device *dev, + struct device_attribute *dattr, + char *buf) +{ + struct dev_ext_attribute *ea; + + ea = container_of(dattr, struct dev_ext_attribute, attr); + return scnprintf(buf, PAGE_SIZE, "0x%lx\n", (unsigned long)(ea->var)); +} + +int etm_perf_add_symlink_sink(struct coresight_device *csdev) +{ + int ret; + unsigned long hash; + const char *name; + struct device *pmu_dev = etm_pmu.dev; + struct device *pdev = csdev->dev.parent; + struct dev_ext_attribute *ea; + + if (csdev->type != CORESIGHT_DEV_TYPE_SINK && + csdev->type != CORESIGHT_DEV_TYPE_LINKSINK) + return -EINVAL; + + if (csdev->ea != NULL) + return -EINVAL; + + if (!etm_perf_up) + return -EPROBE_DEFER; + + ea = devm_kzalloc(pdev, sizeof(*ea), GFP_KERNEL); + if (!ea) + return -ENOMEM; + + name = dev_name(pdev); + /* See function coresight_get_sink_by_id() to know where this is used */ + hash = hashlen_hash(hashlen_string(NULL, name)); + + ea->attr.attr.name = devm_kstrdup(pdev, name, GFP_KERNEL); + if (!ea->attr.attr.name) + return -ENOMEM; + + ea->attr.attr.mode = 0444; + ea->attr.show = etm_perf_sink_name_show; + ea->var = (unsigned long *)hash; + + ret = sysfs_add_file_to_group(&pmu_dev->kobj, + &ea->attr.attr, "sinks"); + + if (!ret) + csdev->ea = ea; + + return ret; +} + +void etm_perf_del_symlink_sink(struct coresight_device *csdev) +{ + struct device *pmu_dev = etm_pmu.dev; + struct dev_ext_attribute *ea = csdev->ea; + + if (csdev->type != CORESIGHT_DEV_TYPE_SINK && + csdev->type != CORESIGHT_DEV_TYPE_LINKSINK) + return; + + if (!ea) + return; + + sysfs_remove_file_from_group(&pmu_dev->kobj, + &ea->attr.attr, "sinks"); + csdev->ea = NULL; +} + static int __init etm_perf_init(void) { int ret; diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.h b/drivers/hwtracing/coresight/coresight-etm-perf.h index da7d9336a15c..015213abe00a 100644 --- a/drivers/hwtracing/coresight/coresight-etm-perf.h +++ b/drivers/hwtracing/coresight/coresight-etm-perf.h @@ -59,6 +59,8 @@ struct etm_event_data { #ifdef CONFIG_CORESIGHT int etm_perf_symlink(struct coresight_device *csdev, bool link); +int etm_perf_add_symlink_sink(struct coresight_device *csdev); +void etm_perf_del_symlink_sink(struct coresight_device *csdev); static inline void *etm_perf_sink_config(struct perf_output_handle *handle) { struct etm_event_data *data = perf_get_aux(handle); @@ -70,7 +72,9 @@ static inline void *etm_perf_sink_config(struct perf_output_handle *handle) #else static inline int etm_perf_symlink(struct coresight_device *csdev, bool link) { return -EINVAL; } - +int etm_perf_add_symlink_sink(struct coresight_device *csdev) +{ return -EINVAL; } +void etm_perf_del_symlink_sink(struct coresight_device *csdev) {} static inline void *etm_perf_sink_config(struct perf_output_handle *handle) { return NULL; diff --git a/drivers/hwtracing/coresight/coresight.c b/drivers/hwtracing/coresight/coresight.c index 2b0df1a0a8df..d7fa90be6f42 100644 --- a/drivers/hwtracing/coresight/coresight.c +++ b/drivers/hwtracing/coresight/coresight.c @@ -18,6 +18,7 @@ #include #include +#include "coresight-etm-perf.h" #include "coresight-priv.h" static DEFINE_MUTEX(coresight_mutex); @@ -1167,6 +1168,22 @@ struct coresight_device *coresight_register(struct coresight_desc *desc) goto err_out; } + if (csdev->type == CORESIGHT_DEV_TYPE_SINK || + csdev->type == CORESIGHT_DEV_TYPE_LINKSINK) { + ret = etm_perf_add_symlink_sink(csdev); + + if (ret) { + device_unregister(&csdev->dev); + /* + * As with the above, all resources are free'd + * explicitly via coresight_device_release() triggered + * from put_device(), which is in turn called from + * function device_unregister(). + */ + goto err_out; + } + } + mutex_lock(&coresight_mutex); coresight_fixup_device_conns(csdev); @@ -1185,6 +1202,7 @@ EXPORT_SYMBOL_GPL(coresight_register); void coresight_unregister(struct coresight_device *csdev) { + etm_perf_del_symlink_sink(csdev); /* Remove references of that device in the topology */ coresight_remove_conns(csdev); device_unregister(&csdev->dev); diff --git a/include/linux/coresight.h b/include/linux/coresight.h index 46c67a764877..7b87965f7a65 100644 --- a/include/linux/coresight.h +++ b/include/linux/coresight.h @@ -154,8 +154,9 @@ struct coresight_connection { * @orphan: true if the component has connections that haven't been linked. * @enable: 'true' if component is currently part of an active path. * @activated: 'true' only if a _sink_ has been activated. A sink can be - activated but not yet enabled. Enabling for a _sink_ - happens when a source has been selected for that it. + * activated but not yet enabled. Enabling for a _sink_ + * appens when a source has been selected for that it. + * @ea: Device attribute for sink representation under PMU directory. */ struct coresight_device { struct coresight_connection *conns; @@ -168,7 +169,9 @@ struct coresight_device { atomic_t *refcnt; bool orphan; bool enable; /* true only if configured as part of a path */ + /* sink specific fields */ bool activated; /* true only if a sink is part of a path */ + struct dev_ext_attribute *ea; }; #define to_coresight_device(d) container_of(d, struct coresight_device, dev) -- cgit v1.2.3 From 5f02a877638472e83cb5e335f9eec27052b1c7c2 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 10 Jan 2019 19:04:28 +0200 Subject: fsnotify: annotate directory entry modification events "dirent" events are referring to events that modify directory entries, such as create,delete,rename. Those events should always be reported on a watched directory, regardless if FS_EVENT_ON_CHILD is set on the watch mask. fsnotify_nameremove() and fsnotify_move() were modified to no longer set the FS_EVENT_ON_CHILD event bit. This is a semantic change to align with the "dirent" event definition. It has no effect on any existing backend, because dnotify, inotify and audit always requets the child events and fanotify does not get the delete,rename events. The fsnotify_dirent() helper is used instead of fsnotify_parent() to report a dirent event to dentry->d_parent without FS_EVENT_ON_CHILD and regardless if parent has the FS_EVENT_ON_CHILD bit set. Unlike fsnotify_parent(), fsnotify_dirent() assumes that dentry->d_name and dentry->d_parent are stable. For fsnotify_create()/fsnotify_mkdir(), this assumption is abviously correct. For fsnotify_nameremove(), it is less trivial, so we use dget_parent() and take_dentry_name_snapshot() to grab stable references. Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- include/linux/fsnotify.h | 49 +++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 40 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index 2ccb08cb5d6a..39b22e88423d 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -17,8 +17,22 @@ #include #include +/* + * Notify this @dir inode about a change in the directory entry @dentry. + * + * Unlike fsnotify_parent(), the event will be reported regardless of the + * FS_EVENT_ON_CHILD mask on the parent inode. + */ +static inline int fsnotify_dirent(struct inode *dir, struct dentry *dentry, + __u32 mask) +{ + return fsnotify(dir, mask, d_inode(dentry), FSNOTIFY_EVENT_INODE, + dentry->d_name.name, 0); +} + /* Notify this dentry's parent about a child's events. */ -static inline int fsnotify_parent(const struct path *path, struct dentry *dentry, __u32 mask) +static inline int fsnotify_parent(const struct path *path, + struct dentry *dentry, __u32 mask) { if (!dentry) dentry = path->dentry; @@ -85,8 +99,8 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir, { struct inode *source = moved->d_inode; u32 fs_cookie = fsnotify_get_cookie(); - __u32 old_dir_mask = (FS_EVENT_ON_CHILD | FS_MOVED_FROM); - __u32 new_dir_mask = (FS_EVENT_ON_CHILD | FS_MOVED_TO); + __u32 old_dir_mask = FS_MOVED_FROM; + __u32 new_dir_mask = FS_MOVED_TO; const unsigned char *new_name = moved->d_name.name; if (old_dir == new_dir) @@ -128,15 +142,35 @@ static inline void fsnotify_vfsmount_delete(struct vfsmount *mnt) /* * fsnotify_nameremove - a filename was removed from a directory + * + * This is mostly called under parent vfs inode lock so name and + * dentry->d_parent should be stable. However there are some corner cases where + * inode lock is not held. So to be on the safe side and be reselient to future + * callers and out of tree users of d_delete(), we do not assume that d_parent + * and d_name are stable and we use dget_parent() and + * take_dentry_name_snapshot() to grab stable references. */ static inline void fsnotify_nameremove(struct dentry *dentry, int isdir) { + struct dentry *parent; + struct name_snapshot name; __u32 mask = FS_DELETE; + /* d_delete() of pseudo inode? (e.g. __ns_get_path() playing tricks) */ + if (IS_ROOT(dentry)) + return; + if (isdir) mask |= FS_ISDIR; - fsnotify_parent(NULL, dentry, mask); + parent = dget_parent(dentry); + take_dentry_name_snapshot(&name, dentry); + + fsnotify(d_inode(parent), mask, d_inode(dentry), FSNOTIFY_EVENT_INODE, + name.name, 0); + + release_dentry_name_snapshot(&name); + dput(parent); } /* @@ -155,7 +189,7 @@ static inline void fsnotify_create(struct inode *inode, struct dentry *dentry) { audit_inode_child(inode, dentry, AUDIT_TYPE_CHILD_CREATE); - fsnotify(inode, FS_CREATE, dentry->d_inode, FSNOTIFY_EVENT_INODE, dentry->d_name.name, 0); + fsnotify_dirent(inode, dentry, FS_CREATE); } /* @@ -176,12 +210,9 @@ static inline void fsnotify_link(struct inode *dir, struct inode *inode, struct */ static inline void fsnotify_mkdir(struct inode *inode, struct dentry *dentry) { - __u32 mask = (FS_CREATE | FS_ISDIR); - struct inode *d_inode = dentry->d_inode; - audit_inode_child(inode, dentry, AUDIT_TYPE_CHILD_CREATE); - fsnotify(inode, mask, d_inode, FSNOTIFY_EVENT_INODE, dentry->d_name.name, 0); + fsnotify_dirent(inode, dentry, FS_CREATE | FS_ISDIR); } /* -- cgit v1.2.3 From e220140ff6241e180d0c2fc294e61ee6bbc6a18e Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 10 Jan 2019 19:04:29 +0200 Subject: fsnotify: remove dirent events from FS_EVENTS_POSS_ON_CHILD mask "dirent" events are referring to events that modify directory entries, such as create,delete,rename. Those events are always be reported on a watched directory, regardless if FS_EVENT_ON_CHILD is set on the watch mask. ALL_FSNOTIFY_DIRENT_EVENTS defines all the dirent event types and those event types are removed from FS_EVENTS_POSS_ON_CHILD. That means for a directory with an inotify watch and only dirent events in the mask (i.e. create,delete,move), all children dentries will no longer have the DCACHE_FSNOTIFY_PARENT_WATCHED flag set. This will allow all events that happen on children to be optimized away in __fsnotify_parent() without the need to dereference child->d_parent->d_inode->i_fsnotify_mask. Since the dirent events are never repoted via __fsnotify_parent(), this results in no change of logic, but only an optimization. Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- include/linux/fsnotify_backend.h | 36 +++++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 7639774e7475..7f195d43efaf 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -59,27 +59,33 @@ * dnotify and inotify. */ #define FS_EVENT_ON_CHILD 0x08000000 -/* This is a list of all events that may get sent to a parernt based on fs event - * happening to inodes inside that directory */ -#define FS_EVENTS_POSS_ON_CHILD (FS_ACCESS | FS_MODIFY | FS_ATTRIB |\ - FS_CLOSE_WRITE | FS_CLOSE_NOWRITE | FS_OPEN |\ - FS_MOVED_FROM | FS_MOVED_TO | FS_CREATE |\ - FS_DELETE | FS_OPEN_PERM | FS_ACCESS_PERM | \ - FS_OPEN_EXEC | FS_OPEN_EXEC_PERM) - #define FS_MOVE (FS_MOVED_FROM | FS_MOVED_TO) +/* + * Directory entry modification events - reported only to directory + * where entry is modified and not to a watching parent. + * The watching parent may get an FS_ATTRIB|FS_EVENT_ON_CHILD event + * when a directory entry inside a child subdir changes. + */ +#define ALL_FSNOTIFY_DIRENT_EVENTS (FS_CREATE | FS_DELETE | FS_MOVE) + #define ALL_FSNOTIFY_PERM_EVENTS (FS_OPEN_PERM | FS_ACCESS_PERM | \ FS_OPEN_EXEC_PERM) +/* + * This is a list of all events that may get sent to a parent based on fs event + * happening to inodes inside that directory. + */ +#define FS_EVENTS_POSS_ON_CHILD (ALL_FSNOTIFY_PERM_EVENTS | \ + FS_ACCESS | FS_MODIFY | FS_ATTRIB | \ + FS_CLOSE_WRITE | FS_CLOSE_NOWRITE | \ + FS_OPEN | FS_OPEN_EXEC) + /* Events that can be reported to backends */ -#define ALL_FSNOTIFY_EVENTS (FS_ACCESS | FS_MODIFY | FS_ATTRIB | \ - FS_CLOSE_WRITE | FS_CLOSE_NOWRITE | FS_OPEN | \ - FS_MOVED_FROM | FS_MOVED_TO | FS_CREATE | \ - FS_DELETE | FS_DELETE_SELF | FS_MOVE_SELF | \ - FS_UNMOUNT | FS_Q_OVERFLOW | FS_IN_IGNORED | \ - FS_OPEN_PERM | FS_ACCESS_PERM | FS_DN_RENAME | \ - FS_OPEN_EXEC | FS_OPEN_EXEC_PERM) +#define ALL_FSNOTIFY_EVENTS (ALL_FSNOTIFY_DIRENT_EVENTS | \ + FS_EVENTS_POSS_ON_CHILD | \ + FS_DELETE_SELF | FS_MOVE_SELF | FS_DN_RENAME | \ + FS_UNMOUNT | FS_Q_OVERFLOW | FS_IN_IGNORED) /* Extra flags that may be reported with event or control handling of events */ #define ALL_FSNOTIFY_FLAGS (FS_EXCL_UNLINK | FS_ISDIR | FS_IN_ONESHOT | \ -- cgit v1.2.3 From a0a92d261f2922f4b5d2c0a98d6c41a89c7f5edd Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 10 Jan 2019 19:04:31 +0200 Subject: fsnotify: move mask out of struct fsnotify_event Common fsnotify_event helpers have no need for the mask field. It is only used by backend code, so move the field out of the abstract fsnotify_event struct and into the concrete backend event structs. This change packs struct inotify_event_info better on 64bit machine and will allow us to cram some more fields into struct fanotify_event_info. Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- fs/notify/fanotify/fanotify.c | 11 +++++++---- fs/notify/fanotify/fanotify.h | 1 + fs/notify/fanotify/fanotify_user.c | 10 +++++----- fs/notify/inotify/inotify.h | 1 + fs/notify/inotify/inotify_fsnotify.c | 9 +++++---- fs/notify/inotify/inotify_user.c | 5 +++-- fs/notify/notification.c | 22 +--------------------- include/linux/fsnotify_backend.h | 10 ++++++---- 8 files changed, 29 insertions(+), 40 deletions(-) (limited to 'include/linux') diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index 3723f3d18d20..98197802bbfb 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -36,20 +36,22 @@ static bool should_merge(struct fsnotify_event *old_fsn, static int fanotify_merge(struct list_head *list, struct fsnotify_event *event) { struct fsnotify_event *test_event; + struct fanotify_event_info *new; pr_debug("%s: list=%p event=%p\n", __func__, list, event); + new = FANOTIFY_E(event); /* * Don't merge a permission event with any other event so that we know * the event structure we have created in fanotify_handle_event() is the * one we should check for permission response. */ - if (fanotify_is_perm_event(event->mask)) + if (fanotify_is_perm_event(new->mask)) return 0; list_for_each_entry_reverse(test_event, list, list) { if (should_merge(test_event, event)) { - test_event->mask |= event->mask; + FANOTIFY_E(test_event)->mask |= new->mask; return 1; } } @@ -173,7 +175,8 @@ struct fanotify_event_info *fanotify_alloc_event(struct fsnotify_group *group, if (!event) goto out; init: __maybe_unused - fsnotify_init_event(&event->fse, inode, mask); + fsnotify_init_event(&event->fse, inode); + event->mask = mask; if (FAN_GROUP_FLAG(group, FAN_REPORT_TID)) event->pid = get_pid(task_pid(current)); else @@ -280,7 +283,7 @@ static void fanotify_free_event(struct fsnotify_event *fsn_event) event = FANOTIFY_E(fsn_event); path_put(&event->path); put_pid(event->pid); - if (fanotify_is_perm_event(fsn_event->mask)) { + if (fanotify_is_perm_event(event->mask)) { kmem_cache_free(fanotify_perm_event_cachep, FANOTIFY_PE(fsn_event)); return; diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h index ea05b8a401e7..e630d787d4c3 100644 --- a/fs/notify/fanotify/fanotify.h +++ b/fs/notify/fanotify/fanotify.h @@ -14,6 +14,7 @@ extern struct kmem_cache *fanotify_perm_event_cachep; */ struct fanotify_event_info { struct fsnotify_event fse; + u32 mask; /* * We hold ref to this path so it may be dereferenced at any point * during this object's lifetime diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 9c870b0d2b56..dea47d07cc29 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -131,9 +131,9 @@ static int fill_event_metadata(struct fsnotify_group *group, metadata->metadata_len = FAN_EVENT_METADATA_LEN; metadata->vers = FANOTIFY_METADATA_VERSION; metadata->reserved = 0; - metadata->mask = fsn_event->mask & FANOTIFY_OUTGOING_EVENTS; + metadata->mask = event->mask & FANOTIFY_OUTGOING_EVENTS; metadata->pid = pid_vnr(event->pid); - if (unlikely(fsn_event->mask & FAN_Q_OVERFLOW)) + if (unlikely(event->mask & FAN_Q_OVERFLOW)) metadata->fd = FAN_NOFD; else { metadata->fd = create_fd(group, event, file); @@ -230,7 +230,7 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, fanotify_event_metadata.event_len)) goto out_close_fd; - if (fanotify_is_perm_event(event->mask)) + if (fanotify_is_perm_event(FANOTIFY_E(event)->mask)) FANOTIFY_PE(event)->fd = fd; if (fd != FAN_NOFD) @@ -316,7 +316,7 @@ static ssize_t fanotify_read(struct file *file, char __user *buf, * Permission events get queued to wait for response. Other * events can be destroyed now. */ - if (!fanotify_is_perm_event(kevent->mask)) { + if (!fanotify_is_perm_event(FANOTIFY_E(kevent)->mask)) { fsnotify_destroy_event(group, kevent); } else { if (ret <= 0) { @@ -401,7 +401,7 @@ static int fanotify_release(struct inode *ignored, struct file *file) */ while (!fsnotify_notify_queue_is_empty(group)) { fsn_event = fsnotify_remove_first_event(group); - if (!(fsn_event->mask & FANOTIFY_PERM_EVENTS)) { + if (!(FANOTIFY_E(fsn_event)->mask & FANOTIFY_PERM_EVENTS)) { spin_unlock(&group->notification_lock); fsnotify_destroy_event(group, fsn_event); spin_lock(&group->notification_lock); diff --git a/fs/notify/inotify/inotify.h b/fs/notify/inotify/inotify.h index 7e4578d35b61..74ae60305189 100644 --- a/fs/notify/inotify/inotify.h +++ b/fs/notify/inotify/inotify.h @@ -5,6 +5,7 @@ struct inotify_event_info { struct fsnotify_event fse; + u32 mask; int wd; u32 sync_cookie; int name_len; diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c index f4184b4f3815..fe97299975f2 100644 --- a/fs/notify/inotify/inotify_fsnotify.c +++ b/fs/notify/inotify/inotify_fsnotify.c @@ -43,11 +43,11 @@ static bool event_compare(struct fsnotify_event *old_fsn, { struct inotify_event_info *old, *new; - if (old_fsn->mask & FS_IN_IGNORED) - return false; old = INOTIFY_E(old_fsn); new = INOTIFY_E(new_fsn); - if ((old_fsn->mask == new_fsn->mask) && + if (old->mask & FS_IN_IGNORED) + return false; + if ((old->mask == new->mask) && (old_fsn->inode == new_fsn->inode) && (old->name_len == new->name_len) && (!old->name_len || !strcmp(old->name, new->name))) @@ -114,7 +114,8 @@ int inotify_handle_event(struct fsnotify_group *group, } fsn_event = &event->fse; - fsnotify_init_event(fsn_event, inode, mask); + fsnotify_init_event(fsn_event, inode); + event->mask = mask; event->wd = i_mark->wd; event->sync_cookie = cookie; event->name_len = len; diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 798f1253141a..e2901fbb9f76 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -189,7 +189,7 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, */ pad_name_len = round_event_name_len(fsn_event); inotify_event.len = pad_name_len; - inotify_event.mask = inotify_mask_to_arg(fsn_event->mask); + inotify_event.mask = inotify_mask_to_arg(event->mask); inotify_event.wd = event->wd; inotify_event.cookie = event->sync_cookie; @@ -634,7 +634,8 @@ static struct fsnotify_group *inotify_new_group(unsigned int max_events) return ERR_PTR(-ENOMEM); } group->overflow_event = &oevent->fse; - fsnotify_init_event(group->overflow_event, NULL, FS_Q_OVERFLOW); + fsnotify_init_event(group->overflow_event, NULL); + oevent->mask = FS_Q_OVERFLOW; oevent->wd = -1; oevent->sync_cookie = 0; oevent->name_len = 0; diff --git a/fs/notify/notification.c b/fs/notify/notification.c index 3c3e36745f59..027d5d5bb90e 100644 --- a/fs/notify/notification.c +++ b/fs/notify/notification.c @@ -71,7 +71,7 @@ void fsnotify_destroy_event(struct fsnotify_group *group, struct fsnotify_event *event) { /* Overflow events are per-group and we don't want to free them */ - if (!event || event->mask == FS_Q_OVERFLOW) + if (!event || event == group->overflow_event) return; /* * If the event is still queued, we have a problem... Do an unreliable @@ -194,23 +194,3 @@ void fsnotify_flush_notify(struct fsnotify_group *group) } spin_unlock(&group->notification_lock); } - -/* - * fsnotify_create_event - Allocate a new event which will be sent to each - * group's handle_event function if the group was interested in this - * particular event. - * - * @inode the inode which is supposed to receive the event (sometimes a - * parent of the inode to which the event happened. - * @mask what actually happened. - * @data pointer to the object which was actually affected - * @data_type flag indication if the data is a file, path, inode, nothing... - * @name the filename, if available - */ -void fsnotify_init_event(struct fsnotify_event *event, struct inode *inode, - u32 mask) -{ - INIT_LIST_HEAD(&event->list); - event->inode = inode; - event->mask = mask; -} diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 7f195d43efaf..1e4b88bd1443 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -135,7 +135,6 @@ struct fsnotify_event { struct list_head list; /* inode may ONLY be dereferenced during handle_event(). */ struct inode *inode; /* either the inode the event happened to or its parent */ - u32 mask; /* the type of access, bitwise OR for FS_* event types */ }; /* @@ -485,9 +484,12 @@ extern void fsnotify_put_mark(struct fsnotify_mark *mark); extern void fsnotify_finish_user_wait(struct fsnotify_iter_info *iter_info); extern bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info); -/* put here because inotify does some weird stuff when destroying watches */ -extern void fsnotify_init_event(struct fsnotify_event *event, - struct inode *to_tell, u32 mask); +static inline void fsnotify_init_event(struct fsnotify_event *event, + struct inode *inode) +{ + INIT_LIST_HEAD(&event->list); + event->inode = inode; +} #else -- cgit v1.2.3 From d6cd33ad71029a3f77ba1686caf55d4dea58d916 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 29 Jan 2019 11:31:52 +0100 Subject: regulator: gpio: Convert to use descriptors This converts the GPIO regulator driver to use decriptors only. We have to let go of the array gpio handling: the fetched descriptors are handled individually anyway, and the array retrieveal function does not make it possible to retrieve each GPIO descriptor with unique flags. Instead get them one by one. We request the "enable" GPIO separately as before, and make sure that this line is requested as nonexclusive since enable lines can be shared and the regulator core expects this. Most users of the GPIO regulator are using device tree. There are two boards in the kernel using the gpio regulator from a non-devicetree path: PXA hx4700 and magician. Make sure to switch these over to use descriptors as well. Cc: Philipp Zabel # Magician Cc: Petr Cvek # Magician Cc: Robert Jarzmik # PXA Cc: Paul Parsons # hx4700 Cc: Kevin Hilman # Meson Cc: Neil Armstrong # Meson Tested-by: Marek Szyprowski Signed-off-by: Linus Walleij Signed-off-by: Mark Brown --- arch/arm/mach-pxa/hx4700.c | 23 +++-- arch/arm/mach-pxa/magician.c | 23 +++-- drivers/regulator/gpio-regulator.c | 150 ++++++++++++------------------- include/linux/regulator/gpio-regulator.h | 12 +-- 4 files changed, 95 insertions(+), 113 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/mach-pxa/hx4700.c b/arch/arm/mach-pxa/hx4700.c index b79b757fdd41..51d38d5e776a 100644 --- a/arch/arm/mach-pxa/hx4700.c +++ b/arch/arm/mach-pxa/hx4700.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -702,9 +703,7 @@ static struct regulator_init_data bq24022_init_data = { .consumer_supplies = bq24022_consumers, }; -static struct gpio bq24022_gpios[] = { - { GPIO96_HX4700_BQ24022_ISET2, GPIOF_OUT_INIT_LOW, "bq24022_iset2" }, -}; +static enum gpiod_flags bq24022_gpiod_gflags[] = { GPIOD_OUT_LOW }; static struct gpio_regulator_state bq24022_states[] = { { .value = 100000, .gpios = (0 << 0) }, @@ -714,12 +713,10 @@ static struct gpio_regulator_state bq24022_states[] = { static struct gpio_regulator_config bq24022_info = { .supply_name = "bq24022", - .enable_gpio = GPIO72_HX4700_BQ24022_nCHARGE_EN, - .enable_high = 0, .enabled_at_boot = 0, - .gpios = bq24022_gpios, - .nr_gpios = ARRAY_SIZE(bq24022_gpios), + .gflags = bq24022_gpiod_gflags, + .ngpios = ARRAY_SIZE(bq24022_gpiod_gflags), .states = bq24022_states, .nr_states = ARRAY_SIZE(bq24022_states), @@ -736,6 +733,17 @@ static struct platform_device bq24022 = { }, }; +static struct gpiod_lookup_table bq24022_gpiod_table = { + .dev_id = "gpio-regulator", + .table = { + GPIO_LOOKUP("gpio-pxa", GPIO96_HX4700_BQ24022_ISET2, + NULL, GPIO_ACTIVE_HIGH), + GPIO_LOOKUP("gpio-pxa", GPIO72_HX4700_BQ24022_nCHARGE_EN, + "enable", GPIO_ACTIVE_LOW), + { }, + }, +}; + /* * StrataFlash */ @@ -878,6 +886,7 @@ static void __init hx4700_init(void) pxa_set_btuart_info(NULL); pxa_set_stuart_info(NULL); + gpiod_add_lookup_table(&bq24022_gpiod_table); platform_add_devices(devices, ARRAY_SIZE(devices)); pwm_add_table(hx4700_pwm_lookup, ARRAY_SIZE(hx4700_pwm_lookup)); diff --git a/arch/arm/mach-pxa/magician.c b/arch/arm/mach-pxa/magician.c index 08b079653c3f..6538a7c0e504 100644 --- a/arch/arm/mach-pxa/magician.c +++ b/arch/arm/mach-pxa/magician.c @@ -645,9 +645,8 @@ static struct regulator_init_data bq24022_init_data = { .consumer_supplies = bq24022_consumers, }; -static struct gpio bq24022_gpios[] = { - { EGPIO_MAGICIAN_BQ24022_ISET2, GPIOF_OUT_INIT_LOW, "bq24022_iset2" }, -}; + +static enum gpiod_flags bq24022_gpiod_gflags[] = { GPIOD_OUT_LOW }; static struct gpio_regulator_state bq24022_states[] = { { .value = 100000, .gpios = (0 << 0) }, @@ -657,12 +656,10 @@ static struct gpio_regulator_state bq24022_states[] = { static struct gpio_regulator_config bq24022_info = { .supply_name = "bq24022", - .enable_gpio = GPIO30_MAGICIAN_BQ24022_nCHARGE_EN, - .enable_high = 0, .enabled_at_boot = 1, - .gpios = bq24022_gpios, - .nr_gpios = ARRAY_SIZE(bq24022_gpios), + .gflags = bq24022_gpiod_gflags, + .ngpios = ARRAY_SIZE(bq24022_gpiod_gflags), .states = bq24022_states, .nr_states = ARRAY_SIZE(bq24022_states), @@ -679,6 +676,17 @@ static struct platform_device bq24022 = { }, }; +static struct gpiod_lookup_table bq24022_gpiod_table = { + .dev_id = "gpio-regulator", + .table = { + GPIO_LOOKUP("gpio-pxa", EGPIO_MAGICIAN_BQ24022_ISET2, + NULL, GPIO_ACTIVE_HIGH), + GPIO_LOOKUP("gpio-pxa", GPIO30_MAGICIAN_BQ24022_nCHARGE_EN, + "enable", GPIO_ACTIVE_LOW), + { }, + }, +}; + /* * fixed regulator for ads7846 */ @@ -1027,6 +1035,7 @@ static void __init magician_init(void) regulator_register_always_on(0, "power", pwm_backlight_supply, ARRAY_SIZE(pwm_backlight_supply), 5000000); + gpiod_add_lookup_table(&bq24022_gpiod_table); platform_add_devices(ARRAY_AND_SIZE(devices)); } diff --git a/drivers/regulator/gpio-regulator.c b/drivers/regulator/gpio-regulator.c index b2f5ec4f658a..07fb41abd4e8 100644 --- a/drivers/regulator/gpio-regulator.c +++ b/drivers/regulator/gpio-regulator.c @@ -30,16 +30,15 @@ #include #include #include -#include +#include #include #include -#include struct gpio_regulator_data { struct regulator_desc desc; struct regulator_dev *dev; - struct gpio *gpios; + struct gpio_desc **gpiods; int nr_gpios; struct gpio_regulator_state *states; @@ -82,7 +81,7 @@ static int gpio_regulator_set_voltage(struct regulator_dev *dev, for (ptr = 0; ptr < data->nr_gpios; ptr++) { state = (target & (1 << ptr)) >> ptr; - gpio_set_value_cansleep(data->gpios[ptr].gpio, state); + gpiod_set_value_cansleep(data->gpiods[ptr], state); } data->state = target; @@ -119,7 +118,7 @@ static int gpio_regulator_set_current_limit(struct regulator_dev *dev, for (ptr = 0; ptr < data->nr_gpios; ptr++) { state = (target & (1 << ptr)) >> ptr; - gpio_set_value_cansleep(data->gpios[ptr].gpio, state); + gpiod_set_value_cansleep(data->gpiods[ptr], state); } data->state = target; @@ -138,7 +137,8 @@ of_get_gpio_regulator_config(struct device *dev, struct device_node *np, { struct gpio_regulator_config *config; const char *regtype; - int proplen, gpio, i; + int proplen, i; + int ngpios; int ret; config = devm_kzalloc(dev, @@ -153,59 +153,36 @@ of_get_gpio_regulator_config(struct device *dev, struct device_node *np, config->supply_name = config->init_data->constraints.name; - if (of_property_read_bool(np, "enable-active-high")) - config->enable_high = true; - if (of_property_read_bool(np, "enable-at-boot")) config->enabled_at_boot = true; of_property_read_u32(np, "startup-delay-us", &config->startup_delay); - config->enable_gpio = of_get_named_gpio(np, "enable-gpio", 0); - if (config->enable_gpio < 0 && config->enable_gpio != -ENOENT) - return ERR_PTR(config->enable_gpio); - - /* Fetch GPIOs. - optional property*/ - ret = of_gpio_count(np); - if ((ret < 0) && (ret != -ENOENT)) - return ERR_PTR(ret); - - if (ret > 0) { - config->nr_gpios = ret; - config->gpios = devm_kcalloc(dev, - config->nr_gpios, sizeof(struct gpio), - GFP_KERNEL); - if (!config->gpios) + /* Fetch GPIO init levels */ + ngpios = gpiod_count(dev, NULL); + if (ngpios > 0) { + config->gflags = devm_kzalloc(dev, + sizeof(enum gpiod_flags) + * ngpios, + GFP_KERNEL); + if (!config->gflags) return ERR_PTR(-ENOMEM); - proplen = of_property_count_u32_elems(np, "gpios-states"); - /* optional property */ - if (proplen < 0) - proplen = 0; + for (i = 0; i < ngpios; i++) { + u32 val; - if (proplen > 0 && proplen != config->nr_gpios) { - dev_warn(dev, "gpios <-> gpios-states mismatch\n"); - proplen = 0; - } + ret = of_property_read_u32_index(np, "gpios-states", i, + &val); - for (i = 0; i < config->nr_gpios; i++) { - gpio = of_get_named_gpio(np, "gpios", i); - if (gpio < 0) { - if (gpio != -ENOENT) - return ERR_PTR(gpio); - break; - } - config->gpios[i].gpio = gpio; - config->gpios[i].label = config->supply_name; - if (proplen > 0) { - of_property_read_u32_index(np, "gpios-states", - i, &ret); - if (ret) - config->gpios[i].flags = - GPIOF_OUT_INIT_HIGH; - } + /* Default to high per specification */ + if (ret) + config->gflags[i] = GPIOD_OUT_HIGH; + else + config->gflags[i] = + val ? GPIOD_OUT_HIGH : GPIOD_OUT_LOW; } } + config->ngpios = ngpios; /* Fetch states. */ proplen = of_property_count_u32_elems(np, "states"); @@ -255,7 +232,8 @@ static int gpio_regulator_probe(struct platform_device *pdev) struct device_node *np = pdev->dev.of_node; struct gpio_regulator_data *drvdata; struct regulator_config cfg = { }; - int ptr, ret, state; + enum gpiod_flags gflags; + int ptr, ret, state, i; drvdata = devm_kzalloc(&pdev->dev, sizeof(struct gpio_regulator_data), GFP_KERNEL); @@ -275,26 +253,21 @@ static int gpio_regulator_probe(struct platform_device *pdev) return -ENOMEM; } - if (config->nr_gpios != 0) { - drvdata->gpios = kmemdup(config->gpios, - config->nr_gpios * sizeof(struct gpio), - GFP_KERNEL); - if (drvdata->gpios == NULL) { - dev_err(&pdev->dev, "Failed to allocate gpio data\n"); - ret = -ENOMEM; - goto err_name; - } - - drvdata->nr_gpios = config->nr_gpios; - ret = gpio_request_array(drvdata->gpios, drvdata->nr_gpios); - if (ret) { - if (ret != -EPROBE_DEFER) - dev_err(&pdev->dev, - "Could not obtain regulator setting GPIOs: %d\n", - ret); - goto err_memgpio; - } + drvdata->gpiods = devm_kzalloc(&pdev->dev, sizeof(struct gpio_desc *), + GFP_KERNEL); + if (!drvdata->gpiods) + return -ENOMEM; + for (i = 0; i < config->ngpios; i++) { + drvdata->gpiods[i] = devm_gpiod_get_index(&pdev->dev, + NULL, + i, + config->gflags[i]); + if (IS_ERR(drvdata->gpiods[i])) + return PTR_ERR(drvdata->gpiods[i]); + /* This is good to know */ + gpiod_set_consumer_name(drvdata->gpiods[i], drvdata->desc.name); } + drvdata->nr_gpios = config->ngpios; drvdata->states = kmemdup(config->states, config->nr_states * @@ -303,7 +276,7 @@ static int gpio_regulator_probe(struct platform_device *pdev) if (drvdata->states == NULL) { dev_err(&pdev->dev, "Failed to allocate state data\n"); ret = -ENOMEM; - goto err_stategpio; + goto err_name; } drvdata->nr_states = config->nr_states; @@ -330,7 +303,7 @@ static int gpio_regulator_probe(struct platform_device *pdev) /* build initial state from gpio init data. */ state = 0; for (ptr = 0; ptr < drvdata->nr_gpios; ptr++) { - if (config->gpios[ptr].flags & GPIOF_OUT_INIT_HIGH) + if (config->gflags[ptr] == GPIOD_OUT_HIGH) state |= (1 << ptr); } drvdata->state = state; @@ -340,21 +313,19 @@ static int gpio_regulator_probe(struct platform_device *pdev) cfg.driver_data = drvdata; cfg.of_node = np; - if (gpio_is_valid(config->enable_gpio)) { - cfg.ena_gpio = config->enable_gpio; - cfg.ena_gpio_initialized = true; - } - cfg.ena_gpio_invert = !config->enable_high; - if (config->enabled_at_boot) { - if (config->enable_high) - cfg.ena_gpio_flags |= GPIOF_OUT_INIT_HIGH; - else - cfg.ena_gpio_flags |= GPIOF_OUT_INIT_LOW; - } else { - if (config->enable_high) - cfg.ena_gpio_flags |= GPIOF_OUT_INIT_LOW; - else - cfg.ena_gpio_flags |= GPIOF_OUT_INIT_HIGH; + /* + * The signal will be inverted by the GPIO core if flagged so in the + * decriptor. + */ + if (config->enabled_at_boot) + gflags = GPIOD_OUT_HIGH | GPIOD_FLAGS_BIT_NONEXCLUSIVE; + else + gflags = GPIOD_OUT_LOW | GPIOD_FLAGS_BIT_NONEXCLUSIVE; + + cfg.ena_gpiod = gpiod_get_optional(&pdev->dev, "enable", gflags); + if (IS_ERR(cfg.ena_gpiod)) { + ret = PTR_ERR(cfg.ena_gpiod); + goto err_memstate; } drvdata->dev = regulator_register(&drvdata->desc, &cfg); @@ -370,10 +341,6 @@ static int gpio_regulator_probe(struct platform_device *pdev) err_memstate: kfree(drvdata->states); -err_stategpio: - gpio_free_array(drvdata->gpios, drvdata->nr_gpios); -err_memgpio: - kfree(drvdata->gpios); err_name: kfree(drvdata->desc.name); return ret; @@ -384,12 +351,7 @@ static int gpio_regulator_remove(struct platform_device *pdev) struct gpio_regulator_data *drvdata = platform_get_drvdata(pdev); regulator_unregister(drvdata->dev); - - gpio_free_array(drvdata->gpios, drvdata->nr_gpios); - kfree(drvdata->states); - kfree(drvdata->gpios); - kfree(drvdata->desc.name); return 0; diff --git a/include/linux/regulator/gpio-regulator.h b/include/linux/regulator/gpio-regulator.h index 19fbd267406d..49c407afb944 100644 --- a/include/linux/regulator/gpio-regulator.h +++ b/include/linux/regulator/gpio-regulator.h @@ -21,6 +21,8 @@ #ifndef __REGULATOR_GPIO_H #define __REGULATOR_GPIO_H +#include + struct regulator_init_data; enum regulator_type; @@ -53,9 +55,9 @@ struct gpio_regulator_state { * This is used to keep the regulator at * the default state * @startup_delay: Start-up time in microseconds - * @gpios: Array containing the gpios needed to control - * the setting of the regulator - * @nr_gpios: Number of gpios + * @gflags: Array of GPIO configuration flags for initial + * states + * @ngpios: Number of GPIOs and configurations available * @states: Array of gpio_regulator_state entries describing * the gpio state for specific voltages * @nr_states: Number of states available @@ -74,8 +76,8 @@ struct gpio_regulator_config { unsigned enabled_at_boot:1; unsigned startup_delay; - struct gpio *gpios; - int nr_gpios; + enum gpiod_flags *gflags; + int ngpios; struct gpio_regulator_state *states; int nr_states; -- cgit v1.2.3 From 01dc79cd6fe7d25b0eba84009634f5435cbdb4e6 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 29 Jan 2019 11:31:53 +0100 Subject: regulator: fixed/gpio: Pull inversion/OD into gpiolib This pushes the handling of inversion semantics and open drain settings to the GPIO descriptor and gpiolib. All affected board files are also augmented. This is especially nice since we don't have to have any confusing flags passed around to the left and right littering the fixed and GPIO regulator drivers and the regulator core. It is all just very straight-forward: the core asks the GPIO line to be asserted or deasserted and gpiolib deals with the rest depending on how the platform is configured: if the line is active low, it deals with that, if the line is open drain, it deals with that too. Cc: Alexander Shiyan # i.MX boards user Cc: Haojian Zhuang # MMP2 maintainer Cc: Aaro Koskinen # OMAP1 maintainer Cc: Tony Lindgren # OMAP1,2,3 maintainer Cc: Mike Rapoport # EM-X270 maintainer Cc: Robert Jarzmik # EZX maintainer Cc: Philipp Zabel # Magician maintainer Cc: Petr Cvek # Magician Cc: Robert Jarzmik # PXA Cc: Paul Parsons # hx4700 Cc: Daniel Mack # Raumfeld maintainer Cc: Marc Zyngier # Zeus maintainer Cc: Geert Uytterhoeven # SuperH pinctrl/GPIO maintainer Cc: Russell King # SA1100 Tested-by: Marek Szyprowski Tested-by: Janusz Krzysztofik #OMAP1 Amstrad Delta Signed-off-by: Linus Walleij Signed-off-by: Mark Brown --- arch/arm/mach-imx/mach-mx21ads.c | 1 - arch/arm/mach-imx/mach-mx27ads.c | 2 +- arch/arm/mach-mmp/brownstone.c | 1 - arch/arm/mach-omap1/board-ams-delta.c | 2 -- arch/arm/mach-omap2/pdata-quirks.c | 1 - arch/arm/mach-pxa/em-x270.c | 1 - arch/arm/mach-pxa/ezx.c | 3 +- arch/arm/mach-pxa/raumfeld.c | 1 - arch/arm/mach-pxa/zeus.c | 3 +- arch/arm/mach-sa1100/assabet.c | 1 - arch/sh/boards/mach-ecovec24/setup.c | 2 -- .../intel-mid/device_libs/platform_bcm43xx.c | 1 - drivers/regulator/core.c | 8 ++--- drivers/regulator/da9055-regulator.c | 1 - drivers/regulator/fixed.c | 35 +++++----------------- include/linux/regulator/fixed.h | 10 ------- include/linux/regulator/gpio-regulator.h | 6 ---- 17 files changed, 13 insertions(+), 66 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/mach-imx/mach-mx21ads.c b/arch/arm/mach-imx/mach-mx21ads.c index 2e1e540f2e5a..d278fb672d40 100644 --- a/arch/arm/mach-imx/mach-mx21ads.c +++ b/arch/arm/mach-imx/mach-mx21ads.c @@ -205,7 +205,6 @@ static struct regulator_init_data mx21ads_lcd_regulator_init_data = { static struct fixed_voltage_config mx21ads_lcd_regulator_pdata = { .supply_name = "LCD", .microvolts = 3300000, - .enable_high = 1, .init_data = &mx21ads_lcd_regulator_init_data, }; diff --git a/arch/arm/mach-imx/mach-mx27ads.c b/arch/arm/mach-imx/mach-mx27ads.c index f5e04047ed13..6dd7f57c332f 100644 --- a/arch/arm/mach-imx/mach-mx27ads.c +++ b/arch/arm/mach-imx/mach-mx27ads.c @@ -237,7 +237,7 @@ static struct fixed_voltage_config mx27ads_lcd_regulator_pdata = { static struct gpiod_lookup_table mx27ads_lcd_regulator_gpiod_table = { .dev_id = "reg-fixed-voltage.0", /* Let's hope ID 0 is what we get */ .table = { - GPIO_LOOKUP("LCD", 0, NULL, GPIO_ACTIVE_HIGH), + GPIO_LOOKUP("LCD", 0, NULL, GPIO_ACTIVE_LOW), { }, }, }; diff --git a/arch/arm/mach-mmp/brownstone.c b/arch/arm/mach-mmp/brownstone.c index a04e249c654b..d2560fb1e835 100644 --- a/arch/arm/mach-mmp/brownstone.c +++ b/arch/arm/mach-mmp/brownstone.c @@ -149,7 +149,6 @@ static struct regulator_init_data brownstone_v_5vp_data = { static struct fixed_voltage_config brownstone_v_5vp = { .supply_name = "v_5vp", .microvolts = 5000000, - .enable_high = 1, .enabled_at_boot = 1, .init_data = &brownstone_v_5vp_data, }; diff --git a/arch/arm/mach-omap1/board-ams-delta.c b/arch/arm/mach-omap1/board-ams-delta.c index c4c0a8ea11e4..be30c3c061b4 100644 --- a/arch/arm/mach-omap1/board-ams-delta.c +++ b/arch/arm/mach-omap1/board-ams-delta.c @@ -267,7 +267,6 @@ static struct fixed_voltage_config modem_nreset_config = { .supply_name = "modem_nreset", .microvolts = 3300000, .startup_delay = 25000, - .enable_high = 1, .enabled_at_boot = 1, .init_data = &modem_nreset_data, }; @@ -533,7 +532,6 @@ static struct regulator_init_data keybrd_pwr_initdata = { static struct fixed_voltage_config keybrd_pwr_config = { .supply_name = "keybrd_pwr", .microvolts = 5000000, - .enable_high = 1, .init_data = &keybrd_pwr_initdata, }; diff --git a/arch/arm/mach-omap2/pdata-quirks.c b/arch/arm/mach-omap2/pdata-quirks.c index 8a5b6ed4ec36..a2ecc5e69abb 100644 --- a/arch/arm/mach-omap2/pdata-quirks.c +++ b/arch/arm/mach-omap2/pdata-quirks.c @@ -330,7 +330,6 @@ static struct fixed_voltage_config pandora_vwlan = { .supply_name = "vwlan", .microvolts = 1800000, /* 1.8V */ .startup_delay = 50000, /* 50ms */ - .enable_high = 1, .init_data = &pandora_vmmc3, }; diff --git a/arch/arm/mach-pxa/em-x270.c b/arch/arm/mach-pxa/em-x270.c index 32c1edeb3f14..5ba7bb7f7d51 100644 --- a/arch/arm/mach-pxa/em-x270.c +++ b/arch/arm/mach-pxa/em-x270.c @@ -976,7 +976,6 @@ static struct fixed_voltage_config camera_dummy_config = { .supply_name = "camera_vdd", .input_supply = "vcc cam", .microvolts = 2800000, - .enable_high = 0, .init_data = &camera_dummy_initdata, }; diff --git a/arch/arm/mach-pxa/ezx.c b/arch/arm/mach-pxa/ezx.c index 565965e9acc7..5e110e70ce5a 100644 --- a/arch/arm/mach-pxa/ezx.c +++ b/arch/arm/mach-pxa/ezx.c @@ -714,7 +714,6 @@ static struct regulator_init_data camera_regulator_initdata = { static struct fixed_voltage_config camera_regulator_config = { .supply_name = "camera_vdd", .microvolts = 2800000, - .enable_high = 0, .init_data = &camera_regulator_initdata, }; @@ -730,7 +729,7 @@ static struct gpiod_lookup_table camera_supply_gpiod_table = { .dev_id = "reg-fixed-voltage.1", .table = { GPIO_LOOKUP("gpio-pxa", GPIO50_nCAM_EN, - NULL, GPIO_ACTIVE_HIGH), + NULL, GPIO_ACTIVE_LOW), { }, }, }; diff --git a/arch/arm/mach-pxa/raumfeld.c b/arch/arm/mach-pxa/raumfeld.c index e1db072756f2..e13bfc9b01d2 100644 --- a/arch/arm/mach-pxa/raumfeld.c +++ b/arch/arm/mach-pxa/raumfeld.c @@ -883,7 +883,6 @@ static struct regulator_init_data audio_va_initdata = { static struct fixed_voltage_config audio_va_config = { .supply_name = "audio_va", .microvolts = 5000000, - .enable_high = 1, .enabled_at_boot = 0, .init_data = &audio_va_initdata, }; diff --git a/arch/arm/mach-pxa/zeus.c b/arch/arm/mach-pxa/zeus.c index c411f79d4cb5..ebd654302387 100644 --- a/arch/arm/mach-pxa/zeus.c +++ b/arch/arm/mach-pxa/zeus.c @@ -426,7 +426,7 @@ static struct gpiod_lookup_table can_regulator_gpiod_table = { .dev_id = "reg-fixed-voltage.0", .table = { GPIO_LOOKUP("gpio-pxa", ZEUS_CAN_SHDN_GPIO, - NULL, GPIO_ACTIVE_HIGH), + NULL, GPIO_ACTIVE_LOW), { }, }, }; @@ -547,7 +547,6 @@ static struct regulator_init_data zeus_ohci_regulator_data = { static struct fixed_voltage_config zeus_ohci_regulator_config = { .supply_name = "vbus2", .microvolts = 5000000, /* 5.0V */ - .enable_high = 1, .startup_delay = 0, .init_data = &zeus_ohci_regulator_data, }; diff --git a/arch/arm/mach-sa1100/assabet.c b/arch/arm/mach-sa1100/assabet.c index dfa42496ec27..d09c3f236186 100644 --- a/arch/arm/mach-sa1100/assabet.c +++ b/arch/arm/mach-sa1100/assabet.c @@ -469,7 +469,6 @@ static struct regulator_consumer_supply assabet_cf_vcc_consumers[] = { static struct fixed_voltage_config assabet_cf_vcc_pdata __initdata = { .supply_name = "cf-power", .microvolts = 3300000, - .enable_high = 1, }; static struct gpiod_lookup_table assabet_cf_vcc_gpio_table = { diff --git a/arch/sh/boards/mach-ecovec24/setup.c b/arch/sh/boards/mach-ecovec24/setup.c index 22b4106b8084..5495efa07335 100644 --- a/arch/sh/boards/mach-ecovec24/setup.c +++ b/arch/sh/boards/mach-ecovec24/setup.c @@ -630,7 +630,6 @@ static struct regulator_init_data cn12_power_init_data = { static struct fixed_voltage_config cn12_power_info = { .supply_name = "CN12 SD/MMC Vdd", .microvolts = 3300000, - .enable_high = 1, .init_data = &cn12_power_init_data, }; @@ -671,7 +670,6 @@ static struct regulator_init_data sdhi0_power_init_data = { static struct fixed_voltage_config sdhi0_power_info = { .supply_name = "CN11 SD/MMC Vdd", .microvolts = 3300000, - .enable_high = 1, .init_data = &sdhi0_power_init_data, }; diff --git a/arch/x86/platform/intel-mid/device_libs/platform_bcm43xx.c b/arch/x86/platform/intel-mid/device_libs/platform_bcm43xx.c index 96f438d4b026..1421d5330b2c 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_bcm43xx.c +++ b/arch/x86/platform/intel-mid/device_libs/platform_bcm43xx.c @@ -44,7 +44,6 @@ static struct fixed_voltage_config bcm43xx_vmmc = { */ .microvolts = 2000000, /* 1.8V */ .startup_delay = 250 * 1000, /* 250ms */ - .enable_high = 1, /* active high */ .enabled_at_boot = 0, /* disabled at boot */ .init_data = &bcm43xx_vmmc_data, }; diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 430a73dea487..1778c5d1b2d0 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -82,7 +82,6 @@ struct regulator_enable_gpio { struct gpio_desc *gpiod; u32 enable_count; /* a number of enabled shared GPIO */ u32 request_count; /* a number of requested shared GPIO */ - unsigned int ena_gpio_invert:1; }; /* @@ -2268,7 +2267,6 @@ static int regulator_ena_gpio_request(struct regulator_dev *rdev, } pin->gpiod = gpiod; - pin->ena_gpio_invert = config->ena_gpio_invert; list_add(&pin->list, ®ulator_ena_gpio_list); update_ena_gpio_to_rdev: @@ -2319,8 +2317,7 @@ static int regulator_ena_gpio_ctrl(struct regulator_dev *rdev, bool enable) if (enable) { /* Enable GPIO at initial use */ if (pin->enable_count == 0) - gpiod_set_value_cansleep(pin->gpiod, - !pin->ena_gpio_invert); + gpiod_set_value_cansleep(pin->gpiod, 1); pin->enable_count++; } else { @@ -2331,8 +2328,7 @@ static int regulator_ena_gpio_ctrl(struct regulator_dev *rdev, bool enable) /* Disable GPIO if not used */ if (pin->enable_count <= 1) { - gpiod_set_value_cansleep(pin->gpiod, - pin->ena_gpio_invert); + gpiod_set_value_cansleep(pin->gpiod, 0); pin->enable_count = 0; } } diff --git a/drivers/regulator/da9055-regulator.c b/drivers/regulator/da9055-regulator.c index 588c3d2445cf..417cafe2aba0 100644 --- a/drivers/regulator/da9055-regulator.c +++ b/drivers/regulator/da9055-regulator.c @@ -457,7 +457,6 @@ static int da9055_gpio_init(struct da9055_regulator *regulator, int gpio_mux = pdata->gpio_ren[id]; config->ena_gpiod = pdata->ena_gpiods[id]; - config->ena_gpio_invert = 1; /* * GPI pin is muxed with regulator to control the diff --git a/drivers/regulator/fixed.c b/drivers/regulator/fixed.c index 9abdb9130766..b5afc9db2c61 100644 --- a/drivers/regulator/fixed.c +++ b/drivers/regulator/fixed.c @@ -79,15 +79,6 @@ of_get_fixed_voltage_config(struct device *dev, of_property_read_u32(np, "startup-delay-us", &config->startup_delay); - /* - * FIXME: we pulled active low/high and open drain handling into - * gpiolib so it will be handled there. Delete this in the second - * step when we also remove the custom inversion handling for all - * legacy boardfiles. - */ - config->enable_high = 1; - config->gpio_is_open_drain = 0; - if (of_find_property(np, "vin-supply", NULL)) config->input_supply = "vin"; @@ -151,24 +142,14 @@ static int reg_fixed_voltage_probe(struct platform_device *pdev) drvdata->desc.fixed_uV = config->microvolts; - cfg.ena_gpio_invert = !config->enable_high; - if (config->enabled_at_boot) { - if (config->enable_high) - gflags = GPIOD_OUT_HIGH; - else - gflags = GPIOD_OUT_LOW; - } else { - if (config->enable_high) - gflags = GPIOD_OUT_LOW; - else - gflags = GPIOD_OUT_HIGH; - } - if (config->gpio_is_open_drain) { - if (gflags == GPIOD_OUT_HIGH) - gflags = GPIOD_OUT_HIGH_OPEN_DRAIN; - else - gflags = GPIOD_OUT_LOW_OPEN_DRAIN; - } + /* + * The signal will be inverted by the GPIO core if flagged so in the + * decriptor. + */ + if (config->enabled_at_boot) + gflags = GPIOD_OUT_HIGH; + else + gflags = GPIOD_OUT_LOW; /* * Some fixed regulators share the enable line between two diff --git a/include/linux/regulator/fixed.h b/include/linux/regulator/fixed.h index 1a4340ed8e2b..f10140da7145 100644 --- a/include/linux/regulator/fixed.h +++ b/include/linux/regulator/fixed.h @@ -25,14 +25,6 @@ struct regulator_init_data; * @input_supply: Name of the input regulator supply * @microvolts: Output voltage of regulator * @startup_delay: Start-up time in microseconds - * @gpio_is_open_drain: Gpio pin is open drain or normal type. - * If it is open drain type then HIGH will be set - * through PULL-UP with setting gpio as input - * and low will be set as gpio-output with driven - * to low. For non-open-drain case, the gpio will - * will be in output and drive to low/high accordingly. - * @enable_high: Polarity of enable GPIO - * 1 = Active high, 0 = Active low * @enabled_at_boot: Whether regulator has been enabled at * boot or not. 1 = Yes, 0 = No * This is used to keep the regulator at @@ -48,8 +40,6 @@ struct fixed_voltage_config { const char *input_supply; int microvolts; unsigned startup_delay; - unsigned gpio_is_open_drain:1; - unsigned enable_high:1; unsigned enabled_at_boot:1; struct regulator_init_data *init_data; }; diff --git a/include/linux/regulator/gpio-regulator.h b/include/linux/regulator/gpio-regulator.h index 49c407afb944..11cd6375215d 100644 --- a/include/linux/regulator/gpio-regulator.h +++ b/include/linux/regulator/gpio-regulator.h @@ -46,10 +46,6 @@ struct gpio_regulator_state { /** * struct gpio_regulator_config - config structure * @supply_name: Name of the regulator supply - * @enable_gpio: GPIO to use for enable control - * set to -EINVAL if not used - * @enable_high: Polarity of enable GPIO - * 1 = Active high, 0 = Active low * @enabled_at_boot: Whether regulator has been enabled at * boot or not. 1 = Yes, 0 = No * This is used to keep the regulator at @@ -71,8 +67,6 @@ struct gpio_regulator_state { struct gpio_regulator_config { const char *supply_name; - int enable_gpio; - unsigned enable_high:1; unsigned enabled_at_boot:1; unsigned startup_delay; -- cgit v1.2.3 From 541d052d721506549774ab780a2709e4ff8ca79b Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 29 Jan 2019 11:31:56 +0100 Subject: regulator: core: Only support passing enable GPIO descriptors Now that we changed all providers to pass descriptors into the core for enable GPIOs instead of a global GPIO number, delete the support for passing GPIO numbers in, and we get a cleanup and size reduction in the core, and from a GPIO point of view we use the modern, cleaner interface. Tested-by: Marek Szyprowski Signed-off-by: Linus Walleij Signed-off-by: Mark Brown --- drivers/regulator/core.c | 32 ++++++-------------------------- include/linux/regulator/driver.h | 12 +----------- 2 files changed, 7 insertions(+), 37 deletions(-) (limited to 'include/linux') diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 1778c5d1b2d0..4fb475a2e4f2 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -23,7 +23,6 @@ #include #include #include -#include #include #include #include @@ -2236,35 +2235,19 @@ static int regulator_ena_gpio_request(struct regulator_dev *rdev, { struct regulator_enable_gpio *pin; struct gpio_desc *gpiod; - int ret; - if (config->ena_gpiod) - gpiod = config->ena_gpiod; - else - gpiod = gpio_to_desc(config->ena_gpio); + gpiod = config->ena_gpiod; list_for_each_entry(pin, ®ulator_ena_gpio_list, list) { if (pin->gpiod == gpiod) { - rdev_dbg(rdev, "GPIO %d is already used\n", - config->ena_gpio); + rdev_dbg(rdev, "GPIO is already used\n"); goto update_ena_gpio_to_rdev; } } - if (!config->ena_gpiod) { - ret = gpio_request_one(config->ena_gpio, - GPIOF_DIR_OUT | config->ena_gpio_flags, - rdev_get_name(rdev)); - if (ret) - return ret; - } - pin = kzalloc(sizeof(struct regulator_enable_gpio), GFP_KERNEL); - if (pin == NULL) { - if (!config->ena_gpiod) - gpio_free(config->ena_gpio); + if (pin == NULL) return -ENOMEM; - } pin->gpiod = gpiod; list_add(&pin->list, ®ulator_ena_gpio_list); @@ -2287,7 +2270,6 @@ static void regulator_ena_gpio_free(struct regulator_dev *rdev) if (pin->gpiod == rdev->ena_pin->gpiod) { if (pin->request_count <= 1) { pin->request_count = 0; - gpiod_put(pin->gpiod); list_del(&pin->list); kfree(pin); rdev->ena_pin = NULL; @@ -4971,15 +4953,13 @@ regulator_register(const struct regulator_desc *regulator_desc, goto clean; } - if (config->ena_gpiod || - ((config->ena_gpio || config->ena_gpio_initialized) && - gpio_is_valid(config->ena_gpio))) { + if (config->ena_gpiod) { mutex_lock(®ulator_list_mutex); ret = regulator_ena_gpio_request(rdev, config); mutex_unlock(®ulator_list_mutex); if (ret != 0) { - rdev_err(rdev, "Failed to request enable GPIO%d: %d\n", - config->ena_gpio, ret); + rdev_err(rdev, "Failed to request enable GPIO: %d\n", + ret); goto clean; } /* The regulator core took over the GPIO descriptor */ diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index 795b38a06b6c..7f8345bff4e1 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -401,13 +401,7 @@ struct regulator_desc { * NULL). * @regmap: regmap to use for core regmap helpers if dev_get_regmap() is * insufficient. - * @ena_gpio_initialized: GPIO controlling regulator enable was properly - * initialized, meaning that >= 0 is a valid gpio - * identifier and < 0 is a non existent gpio. - * @ena_gpio: GPIO controlling regulator enable. - * @ena_gpiod: GPIO descriptor controlling regulator enable. - * @ena_gpio_invert: Sense for GPIO enable control. - * @ena_gpio_flags: Flags to use when calling gpio_request_one() + * @ena_gpiod: GPIO controlling regulator enable. */ struct regulator_config { struct device *dev; @@ -416,11 +410,7 @@ struct regulator_config { struct device_node *of_node; struct regmap *regmap; - bool ena_gpio_initialized; - int ena_gpio; struct gpio_desc *ena_gpiod; - unsigned int ena_gpio_invert:1; - unsigned int ena_gpio_flags; }; /* -- cgit v1.2.3 From d325c402964e7c63db94e9138c530832269a1297 Mon Sep 17 00:00:00 2001 From: Miroslav Benes Date: Fri, 28 Dec 2018 14:38:47 +0100 Subject: ring-buffer: Remove unused function ring_buffer_page_len() Commit 6b7e633fe9c2 ("tracing: Remove extra zeroing out of the ring buffer page") removed the only caller of ring_buffer_page_len(). The function is now unused and may be removed. Link: http://lkml.kernel.org/r/20181228133847.106177-1-mbenes@suse.cz Signed-off-by: Miroslav Benes Signed-off-by: Steven Rostedt (VMware) --- include/linux/ring_buffer.h | 2 -- kernel/trace/ring_buffer.c | 14 -------------- 2 files changed, 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 5b9ae62272bb..f1429675f252 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -187,8 +187,6 @@ void ring_buffer_set_clock(struct ring_buffer *buffer, void ring_buffer_set_time_stamp_abs(struct ring_buffer *buffer, bool abs); bool ring_buffer_time_stamp_abs(struct ring_buffer *buffer); -size_t ring_buffer_page_len(void *page); - size_t ring_buffer_nr_pages(struct ring_buffer *buffer, int cpu); size_t ring_buffer_nr_dirty_pages(struct ring_buffer *buffer, int cpu); diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 06e864a334bb..9a91479bbbfe 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -353,20 +353,6 @@ static void rb_init_page(struct buffer_data_page *bpage) local_set(&bpage->commit, 0); } -/** - * ring_buffer_page_len - the size of data on the page. - * @page: The page to read - * - * Returns the amount of data on the page, including buffer page header. - */ -size_t ring_buffer_page_len(void *page) -{ - struct buffer_data_page *bpage = page; - - return (local_read(&bpage->commit) & ~RB_MISSED_FLAGS) - + BUF_PAGE_HDR_SIZE; -} - /* * Also stolen from mm/slob.c. Thanks to Mathieu Desnoyers for pointing * this issue out. -- cgit v1.2.3 From 1878f0dcbff0cd07f62602deb160a44d69a8f146 Mon Sep 17 00:00:00 2001 From: Nikita Yushchenko Date: Wed, 6 Feb 2019 07:36:40 +0100 Subject: net: phy: provide full set of accessor functions to MMD registers This adds full set of locked and unlocked accessor functions to read and write PHY MMD registers and/or bitfields. Set of functions exactly matches what is already available for PHY legacy registers. Signed-off-by: Nikita Yushchenko Signed-off-by: Andrew Lunn Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- drivers/net/phy/phy-core.c | 116 ++++++++++++++++++++++++++++++++++----- include/linux/phy.h | 134 +++++++++++++++++++++++++++++++++++++-------- 2 files changed, 214 insertions(+), 36 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/phy/phy-core.c b/drivers/net/phy/phy-core.c index 909b3344babf..7d6aad287f84 100644 --- a/drivers/net/phy/phy-core.c +++ b/drivers/net/phy/phy-core.c @@ -414,15 +414,15 @@ static void mmd_phy_indirect(struct mii_bus *bus, int phy_addr, int devad, } /** - * phy_read_mmd - Convenience function for reading a register + * __phy_read_mmd - Convenience function for reading a register * from an MMD on a given PHY. * @phydev: The phy_device struct * @devad: The MMD to read from (0..31) * @regnum: The register on the MMD to read (0..65535) * - * Same rules as for phy_read(); + * Same rules as for __phy_read(); */ -int phy_read_mmd(struct phy_device *phydev, int devad, u32 regnum) +int __phy_read_mmd(struct phy_device *phydev, int devad, u32 regnum) { int val; @@ -434,33 +434,52 @@ int phy_read_mmd(struct phy_device *phydev, int devad, u32 regnum) } else if (phydev->is_c45) { u32 addr = MII_ADDR_C45 | (devad << 16) | (regnum & 0xffff); - val = mdiobus_read(phydev->mdio.bus, phydev->mdio.addr, addr); + val = __mdiobus_read(phydev->mdio.bus, phydev->mdio.addr, addr); } else { struct mii_bus *bus = phydev->mdio.bus; int phy_addr = phydev->mdio.addr; - mutex_lock(&bus->mdio_lock); mmd_phy_indirect(bus, phy_addr, devad, regnum); /* Read the content of the MMD's selected register */ val = __mdiobus_read(bus, phy_addr, MII_MMD_DATA); - mutex_unlock(&bus->mdio_lock); } return val; } +EXPORT_SYMBOL(__phy_read_mmd); + +/** + * phy_read_mmd - Convenience function for reading a register + * from an MMD on a given PHY. + * @phydev: The phy_device struct + * @devad: The MMD to read from + * @regnum: The register on the MMD to read + * + * Same rules as for phy_read(); + */ +int phy_read_mmd(struct phy_device *phydev, int devad, u32 regnum) +{ + int ret; + + mutex_lock(&phydev->mdio.bus->mdio_lock); + ret = __phy_read_mmd(phydev, devad, regnum); + mutex_unlock(&phydev->mdio.bus->mdio_lock); + + return ret; +} EXPORT_SYMBOL(phy_read_mmd); /** - * phy_write_mmd - Convenience function for writing a register + * __phy_write_mmd - Convenience function for writing a register * on an MMD on a given PHY. * @phydev: The phy_device struct * @devad: The MMD to read from * @regnum: The register on the MMD to read * @val: value to write to @regnum * - * Same rules as for phy_write(); + * Same rules as for __phy_write(); */ -int phy_write_mmd(struct phy_device *phydev, int devad, u32 regnum, u16 val) +int __phy_write_mmd(struct phy_device *phydev, int devad, u32 regnum, u16 val) { int ret; @@ -472,23 +491,43 @@ int phy_write_mmd(struct phy_device *phydev, int devad, u32 regnum, u16 val) } else if (phydev->is_c45) { u32 addr = MII_ADDR_C45 | (devad << 16) | (regnum & 0xffff); - ret = mdiobus_write(phydev->mdio.bus, phydev->mdio.addr, - addr, val); + ret = __mdiobus_write(phydev->mdio.bus, phydev->mdio.addr, + addr, val); } else { struct mii_bus *bus = phydev->mdio.bus; int phy_addr = phydev->mdio.addr; - mutex_lock(&bus->mdio_lock); mmd_phy_indirect(bus, phy_addr, devad, regnum); /* Write the data into MMD's selected register */ __mdiobus_write(bus, phy_addr, MII_MMD_DATA, val); - mutex_unlock(&bus->mdio_lock); ret = 0; } return ret; } +EXPORT_SYMBOL(__phy_write_mmd); + +/** + * phy_write_mmd - Convenience function for writing a register + * on an MMD on a given PHY. + * @phydev: The phy_device struct + * @devad: The MMD to read from + * @regnum: The register on the MMD to read + * @val: value to write to @regnum + * + * Same rules as for phy_write(); + */ +int phy_write_mmd(struct phy_device *phydev, int devad, u32 regnum, u16 val) +{ + int ret; + + mutex_lock(&phydev->mdio.bus->mdio_lock); + ret = __phy_write_mmd(phydev, devad, regnum, val); + mutex_unlock(&phydev->mdio.bus->mdio_lock); + + return ret; +} EXPORT_SYMBOL(phy_write_mmd); /** @@ -538,6 +577,57 @@ int phy_modify(struct phy_device *phydev, u32 regnum, u16 mask, u16 set) } EXPORT_SYMBOL_GPL(phy_modify); +/** + * __phy_modify_mmd - Convenience function for modifying a register on MMD + * @phydev: the phy_device struct + * @devad: the MMD containing register to modify + * @regnum: register number to modify + * @mask: bit mask of bits to clear + * @set: new value of bits set in mask to write to @regnum + * + * Unlocked helper function which allows a MMD register to be modified as + * new register value = (old register value & ~mask) | set + */ +int __phy_modify_mmd(struct phy_device *phydev, int devad, u32 regnum, + u16 mask, u16 set) +{ + int ret; + + ret = __phy_read_mmd(phydev, devad, regnum); + if (ret < 0) + return ret; + + ret = __phy_write_mmd(phydev, devad, regnum, (ret & ~mask) | set); + + return ret < 0 ? ret : 0; +} +EXPORT_SYMBOL_GPL(__phy_modify_mmd); + +/** + * phy_modify_mmd - Convenience function for modifying a register on MMD + * @phydev: the phy_device struct + * @devad: the MMD containing register to modify + * @regnum: register number to modify + * @mask: bit mask of bits to clear + * @set: new value of bits set in mask to write to @regnum + * + * NOTE: MUST NOT be called from interrupt context, + * because the bus read/write functions may wait for an interrupt + * to conclude the operation. + */ +int phy_modify_mmd(struct phy_device *phydev, int devad, u32 regnum, + u16 mask, u16 set) +{ + int ret; + + mutex_lock(&phydev->mdio.bus->mdio_lock); + ret = __phy_modify_mmd(phydev, devad, regnum, mask, set); + mutex_unlock(&phydev->mdio.bus->mdio_lock); + + return ret; +} +EXPORT_SYMBOL_GPL(phy_modify_mmd); + static int __phy_read_page(struct phy_device *phydev) { return phydev->drv->read_page(phydev); diff --git a/include/linux/phy.h b/include/linux/phy.h index 70f83d0d7469..237dd035858a 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -692,17 +692,6 @@ static inline bool phy_is_started(struct phy_device *phydev) void phy_resolve_aneg_linkmode(struct phy_device *phydev); -/** - * phy_read_mmd - Convenience function for reading a register - * from an MMD on a given PHY. - * @phydev: The phy_device struct - * @devad: The MMD to read from - * @regnum: The register on the MMD to read - * - * Same rules as for phy_read(); - */ -int phy_read_mmd(struct phy_device *phydev, int devad, u32 regnum); - /** * phy_read - Convenience function for reading a given PHY register * @phydev: the phy_device struct @@ -758,9 +747,60 @@ static inline int __phy_write(struct phy_device *phydev, u32 regnum, u16 val) val); } +/** + * phy_read_mmd - Convenience function for reading a register + * from an MMD on a given PHY. + * @phydev: The phy_device struct + * @devad: The MMD to read from + * @regnum: The register on the MMD to read + * + * Same rules as for phy_read(); + */ +int phy_read_mmd(struct phy_device *phydev, int devad, u32 regnum); + +/** + * __phy_read_mmd - Convenience function for reading a register + * from an MMD on a given PHY. + * @phydev: The phy_device struct + * @devad: The MMD to read from + * @regnum: The register on the MMD to read + * + * Same rules as for __phy_read(); + */ +int __phy_read_mmd(struct phy_device *phydev, int devad, u32 regnum); + +/** + * phy_write_mmd - Convenience function for writing a register + * on an MMD on a given PHY. + * @phydev: The phy_device struct + * @devad: The MMD to write to + * @regnum: The register on the MMD to read + * @val: value to write to @regnum + * + * Same rules as for phy_write(); + */ +int phy_write_mmd(struct phy_device *phydev, int devad, u32 regnum, u16 val); + +/** + * __phy_write_mmd - Convenience function for writing a register + * on an MMD on a given PHY. + * @phydev: The phy_device struct + * @devad: The MMD to write to + * @regnum: The register on the MMD to read + * @val: value to write to @regnum + * + * Same rules as for __phy_write(); + */ +int __phy_write_mmd(struct phy_device *phydev, int devad, u32 regnum, u16 val); + int __phy_modify(struct phy_device *phydev, u32 regnum, u16 mask, u16 set); int phy_modify(struct phy_device *phydev, u32 regnum, u16 mask, u16 set); +int __phy_modify_mmd(struct phy_device *phydev, int devad, u32 regnum, + u16 mask, u16 set); +int phy_modify_mmd(struct phy_device *phydev, int devad, u32 regnum, + u16 mask, u16 set); + /** * __phy_set_bits - Convenience function for setting bits in a PHY register * @phydev: the phy_device struct @@ -810,6 +850,66 @@ static inline int phy_clear_bits(struct phy_device *phydev, u32 regnum, u16 val) return phy_modify(phydev, regnum, val, 0); } +/** + * __phy_set_bits_mmd - Convenience function for setting bits in a register + * on MMD + * @phydev: the phy_device struct + * @devad: the MMD containing register to modify + * @regnum: register number to modify + * @val: bits to set + * + * The caller must have taken the MDIO bus lock. + */ +static inline int __phy_set_bits_mmd(struct phy_device *phydev, int devad, + u32 regnum, u16 val) +{ + return __phy_modify_mmd(phydev, devad, regnum, 0, val); +} + +/** + * __phy_clear_bits_mmd - Convenience function for clearing bits in a register + * on MMD + * @phydev: the phy_device struct + * @devad: the MMD containing register to modify + * @regnum: register number to modify + * @val: bits to clear + * + * The caller must have taken the MDIO bus lock. + */ +static inline int __phy_clear_bits_mmd(struct phy_device *phydev, int devad, + u32 regnum, u16 val) +{ + return __phy_modify_mmd(phydev, devad, regnum, val, 0); +} + +/** + * phy_set_bits_mmd - Convenience function for setting bits in a register + * on MMD + * @phydev: the phy_device struct + * @devad: the MMD containing register to modify + * @regnum: register number to modify + * @val: bits to set + */ +static inline int phy_set_bits_mmd(struct phy_device *phydev, int devad, + u32 regnum, u16 val) +{ + return phy_modify_mmd(phydev, devad, regnum, 0, val); +} + +/** + * phy_clear_bits_mmd - Convenience function for clearing bits in a register + * on MMD + * @phydev: the phy_device struct + * @devad: the MMD containing register to modify + * @regnum: register number to modify + * @val: bits to clear + */ +static inline int phy_clear_bits_mmd(struct phy_device *phydev, int devad, + u32 regnum, u16 val) +{ + return phy_modify_mmd(phydev, devad, regnum, val, 0); +} + /** * phy_interrupt_is_valid - Convenience function for testing a given PHY irq * @phydev: the phy_device struct @@ -886,18 +986,6 @@ static inline bool phy_is_pseudo_fixed_link(struct phy_device *phydev) return phydev->is_pseudo_fixed_link; } -/** - * phy_write_mmd - Convenience function for writing a register - * on an MMD on a given PHY. - * @phydev: The phy_device struct - * @devad: The MMD to read from - * @regnum: The register on the MMD to read - * @val: value to write to @regnum - * - * Same rules as for phy_write(); - */ -int phy_write_mmd(struct phy_device *phydev, int devad, u32 regnum, u16 val); - int phy_save_page(struct phy_device *phydev); int phy_select_page(struct phy_device *phydev, int page); int phy_restore_page(struct phy_device *phydev, int oldpage, int ret); -- cgit v1.2.3 From fd9dc93e36231fb6d520e0edd467058fad4fd12d Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Wed, 6 Feb 2019 13:07:11 -0500 Subject: XArray: Change xa_insert to return -EBUSY Userspace translates EEXIST to "File exists" which isn't a very good error message for the problem. "Device or resource busy" is a better indication of what went wrong. Signed-off-by: Matthew Wilcox --- Documentation/core-api/xarray.rst | 2 +- fs/nilfs2/btnode.c | 2 +- include/linux/xarray.h | 6 +++--- lib/test_xarray.c | 4 ++-- lib/xarray.c | 4 ++-- 5 files changed, 9 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/Documentation/core-api/xarray.rst b/Documentation/core-api/xarray.rst index 5d54b27c6eba..42bb1a62650f 100644 --- a/Documentation/core-api/xarray.rst +++ b/Documentation/core-api/xarray.rst @@ -85,7 +85,7 @@ which was at that index; if it returns the same entry which was passed as If you want to only store a new entry to an index if the current entry at that index is ``NULL``, you can use :c:func:`xa_insert` which -returns ``-EEXIST`` if the entry is not empty. +returns ``-EBUSY`` if the entry is not empty. You can enquire whether a mark is set on an entry by using :c:func:`xa_get_mark`. If the entry is not ``NULL``, you can set a mark diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c index f2129a5d9f23..4391fd3abd8f 100644 --- a/fs/nilfs2/btnode.c +++ b/fs/nilfs2/btnode.c @@ -189,7 +189,7 @@ retry: */ if (!err) return 0; - else if (err != -EEXIST) + else if (err != -EBUSY) goto failed_unlock; err = invalidate_inode_pages2_range(btnc, newkey, newkey); diff --git a/include/linux/xarray.h b/include/linux/xarray.h index e11841537631..57cf35c4d094 100644 --- a/include/linux/xarray.h +++ b/include/linux/xarray.h @@ -664,7 +664,7 @@ static inline void *xa_cmpxchg_irq(struct xarray *xa, unsigned long index, * * Context: Any context. Takes and releases the xa_lock. May sleep if * the @gfp flags permit. - * Return: 0 if the store succeeded. -EEXIST if another entry was present. + * Return: 0 if the store succeeded. -EBUSY if another entry was present. * -ENOMEM if memory could not be allocated. */ static inline int xa_insert(struct xarray *xa, unsigned long index, @@ -693,7 +693,7 @@ static inline int xa_insert(struct xarray *xa, unsigned long index, * * Context: Any context. Takes and releases the xa_lock while * disabling softirqs. May sleep if the @gfp flags permit. - * Return: 0 if the store succeeded. -EEXIST if another entry was present. + * Return: 0 if the store succeeded. -EBUSY if another entry was present. * -ENOMEM if memory could not be allocated. */ static inline int xa_insert_bh(struct xarray *xa, unsigned long index, @@ -722,7 +722,7 @@ static inline int xa_insert_bh(struct xarray *xa, unsigned long index, * * Context: Process context. Takes and releases the xa_lock while * disabling interrupts. May sleep if the @gfp flags permit. - * Return: 0 if the store succeeded. -EEXIST if another entry was present. + * Return: 0 if the store succeeded. -EBUSY if another entry was present. * -ENOMEM if memory could not be allocated. */ static inline int xa_insert_irq(struct xarray *xa, unsigned long index, diff --git a/lib/test_xarray.c b/lib/test_xarray.c index 671a93ee09e6..9d894e93456c 100644 --- a/lib/test_xarray.c +++ b/lib/test_xarray.c @@ -346,7 +346,7 @@ static noinline void check_cmpxchg(struct xarray *xa) XA_BUG_ON(xa, !xa_empty(xa)); XA_BUG_ON(xa, xa_store_index(xa, 12345678, GFP_KERNEL) != NULL); - XA_BUG_ON(xa, xa_insert(xa, 12345678, xa, GFP_KERNEL) != -EEXIST); + XA_BUG_ON(xa, xa_insert(xa, 12345678, xa, GFP_KERNEL) != -EBUSY); XA_BUG_ON(xa, xa_cmpxchg(xa, 12345678, SIX, FIVE, GFP_KERNEL) != LOTS); XA_BUG_ON(xa, xa_cmpxchg(xa, 12345678, LOTS, FIVE, GFP_KERNEL) != LOTS); XA_BUG_ON(xa, xa_cmpxchg(xa, 12345678, FIVE, LOTS, GFP_KERNEL) != FIVE); @@ -388,7 +388,7 @@ static noinline void check_reserve(struct xarray *xa) /* But xa_insert does not */ xa_reserve(xa, 12345678, GFP_KERNEL); XA_BUG_ON(xa, xa_insert(xa, 12345678, xa_mk_value(12345678), 0) != - -EEXIST); + -EBUSY); XA_BUG_ON(xa, xa_empty(xa)); XA_BUG_ON(xa, xa_erase(xa, 12345678) != NULL); XA_BUG_ON(xa, !xa_empty(xa)); diff --git a/lib/xarray.c b/lib/xarray.c index fb783bf2a441..1b97ca58bd15 100644 --- a/lib/xarray.c +++ b/lib/xarray.c @@ -1451,7 +1451,7 @@ EXPORT_SYMBOL(__xa_cmpxchg); * * Context: Any context. Expects xa_lock to be held on entry. May * release and reacquire xa_lock if @gfp flags permit. - * Return: 0 if the store succeeded. -EEXIST if another entry was present. + * Return: 0 if the store succeeded. -EBUSY if another entry was present. * -ENOMEM if memory could not be allocated. */ int __xa_insert(struct xarray *xa, unsigned long index, void *entry, gfp_t gfp) @@ -1471,7 +1471,7 @@ int __xa_insert(struct xarray *xa, unsigned long index, void *entry, gfp_t gfp) if (xa_track_free(xa)) xas_clear_mark(&xas, XA_FREE_MARK); } else { - xas_set_err(&xas, -EEXIST); + xas_set_err(&xas, -EBUSY); } } while (__xas_nomem(&xas, gfp)); -- cgit v1.2.3 From 3ccaf57a6a63ad171a951dcaddffc453b2414c7b Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 26 Oct 2018 14:43:22 -0400 Subject: XArray: Add support for 1s-based allocation A lot of places want to allocate IDs starting at 1 instead of 0. While the xa_alloc() API supports this, it's not very efficient if lots of IDs are allocated, due to having to walk down to the bottom of the tree to see if ID 1 is available, then all the way over to the next non-allocated ID. This method marks ID 0 as being occupied which wastes one slot in the XArray, but preserves xa_empty() as working. Signed-off-by: Matthew Wilcox --- Documentation/core-api/xarray.rst | 10 +++-- include/linux/xarray.h | 14 ++++++- lib/test_xarray.c | 88 ++++++++++++++++++++++++--------------- lib/xarray.c | 11 +++++ 4 files changed, 86 insertions(+), 37 deletions(-) (limited to 'include/linux') diff --git a/Documentation/core-api/xarray.rst b/Documentation/core-api/xarray.rst index 42bb1a62650f..e90c4925cd37 100644 --- a/Documentation/core-api/xarray.rst +++ b/Documentation/core-api/xarray.rst @@ -131,17 +131,21 @@ If you use :c:func:`DEFINE_XARRAY_ALLOC` to define the XArray, or initialise it by passing ``XA_FLAGS_ALLOC`` to :c:func:`xa_init_flags`, the XArray changes to track whether entries are in use or not. -You can call :c:func:`xa_alloc` to store the entry at any unused index +You can call :c:func:`xa_alloc` to store the entry at an unused index in the XArray. If you need to modify the array from interrupt context, you can use :c:func:`xa_alloc_bh` or :c:func:`xa_alloc_irq` to disable interrupts while allocating the ID. -Using :c:func:`xa_store`, :c:func:`xa_cmpxchg` or :c:func:`xa_insert` -will mark the entry as being allocated. Unlike a normal XArray, storing +Using :c:func:`xa_store`, :c:func:`xa_cmpxchg` or :c:func:`xa_insert` will +also mark the entry as being allocated. Unlike a normal XArray, storing ``NULL`` will mark the entry as being in use, like :c:func:`xa_reserve`. To free an entry, use :c:func:`xa_erase` (or :c:func:`xa_release` if you only want to free the entry if it's ``NULL``). +By default, the lowest free entry is allocated starting from 0. If you +want to allocate entries starting at 1, it is more efficient to use +:c:func:`DEFINE_XARRAY_ALLOC1` or ``XA_FLAGS_ALLOC1``. + You cannot use ``XA_MARK_0`` with an allocating XArray as this mark is used to track whether an entry is free or not. The other marks are available for your use. diff --git a/include/linux/xarray.h b/include/linux/xarray.h index 57cf35c4d094..99dd0838b4ba 100644 --- a/include/linux/xarray.h +++ b/include/linux/xarray.h @@ -220,10 +220,13 @@ enum xa_lock_type { #define XA_FLAGS_LOCK_IRQ ((__force gfp_t)XA_LOCK_IRQ) #define XA_FLAGS_LOCK_BH ((__force gfp_t)XA_LOCK_BH) #define XA_FLAGS_TRACK_FREE ((__force gfp_t)4U) +#define XA_FLAGS_ZERO_BUSY ((__force gfp_t)8U) #define XA_FLAGS_MARK(mark) ((__force gfp_t)((1U << __GFP_BITS_SHIFT) << \ (__force unsigned)(mark))) +/* ALLOC is for a normal 0-based alloc. ALLOC1 is for an 1-based alloc */ #define XA_FLAGS_ALLOC (XA_FLAGS_TRACK_FREE | XA_FLAGS_MARK(XA_FREE_MARK)) +#define XA_FLAGS_ALLOC1 (XA_FLAGS_TRACK_FREE | XA_FLAGS_ZERO_BUSY) /** * struct xarray - The anchor of the XArray. @@ -279,7 +282,7 @@ struct xarray { #define DEFINE_XARRAY(name) DEFINE_XARRAY_FLAGS(name, 0) /** - * DEFINE_XARRAY_ALLOC() - Define an XArray which can allocate IDs. + * DEFINE_XARRAY_ALLOC() - Define an XArray which allocates IDs starting at 0. * @name: A string that names your XArray. * * This is intended for file scope definitions of allocating XArrays. @@ -287,6 +290,15 @@ struct xarray { */ #define DEFINE_XARRAY_ALLOC(name) DEFINE_XARRAY_FLAGS(name, XA_FLAGS_ALLOC) +/** + * DEFINE_XARRAY_ALLOC1() - Define an XArray which allocates IDs starting at 1. + * @name: A string that names your XArray. + * + * This is intended for file scope definitions of allocating XArrays. + * See also DEFINE_XARRAY(). + */ +#define DEFINE_XARRAY_ALLOC1(name) DEFINE_XARRAY_FLAGS(name, XA_FLAGS_ALLOC1) + void *xa_load(struct xarray *, unsigned long index); void *xa_store(struct xarray *, unsigned long index, void *entry, gfp_t); void *xa_erase(struct xarray *, unsigned long index); diff --git a/lib/test_xarray.c b/lib/test_xarray.c index 9d894e93456c..cd74f8f32abe 100644 --- a/lib/test_xarray.c +++ b/lib/test_xarray.c @@ -589,64 +589,86 @@ static noinline void check_multi_store(struct xarray *xa) #endif } -static DEFINE_XARRAY_ALLOC(xa0); - -static noinline void check_xa_alloc(void) +static noinline void check_xa_alloc_1(struct xarray *xa, unsigned int base) { int i; u32 id; - /* An empty array should assign 0 to the first alloc */ - xa_alloc_index(&xa0, 0, GFP_KERNEL); + XA_BUG_ON(xa, !xa_empty(xa)); + /* An empty array should assign %base to the first alloc */ + xa_alloc_index(xa, base, GFP_KERNEL); /* Erasing it should make the array empty again */ - xa_erase_index(&xa0, 0); - XA_BUG_ON(&xa0, !xa_empty(&xa0)); + xa_erase_index(xa, base); + XA_BUG_ON(xa, !xa_empty(xa)); + + /* And it should assign %base again */ + xa_alloc_index(xa, base, GFP_KERNEL); + + /* Allocating and then erasing a lot should not lose base */ + for (i = base + 1; i < 2 * XA_CHUNK_SIZE; i++) + xa_alloc_index(xa, i, GFP_KERNEL); + for (i = base; i < 2 * XA_CHUNK_SIZE; i++) + xa_erase_index(xa, i); + xa_alloc_index(xa, base, GFP_KERNEL); + + /* Destroying the array should do the same as erasing */ + xa_destroy(xa); - /* And it should assign 0 again */ - xa_alloc_index(&xa0, 0, GFP_KERNEL); + /* And it should assign %base again */ + xa_alloc_index(xa, base, GFP_KERNEL); - /* The next assigned ID should be 1 */ - xa_alloc_index(&xa0, 1, GFP_KERNEL); - xa_erase_index(&xa0, 1); + /* The next assigned ID should be base+1 */ + xa_alloc_index(xa, base + 1, GFP_KERNEL); + xa_erase_index(xa, base + 1); /* Storing a value should mark it used */ - xa_store_index(&xa0, 1, GFP_KERNEL); - xa_alloc_index(&xa0, 2, GFP_KERNEL); + xa_store_index(xa, base + 1, GFP_KERNEL); + xa_alloc_index(xa, base + 2, GFP_KERNEL); - /* If we then erase 0, it should be free */ - xa_erase_index(&xa0, 0); - xa_alloc_index(&xa0, 0, GFP_KERNEL); + /* If we then erase base, it should be free */ + xa_erase_index(xa, base); + xa_alloc_index(xa, base, GFP_KERNEL); - xa_erase_index(&xa0, 1); - xa_erase_index(&xa0, 2); + xa_erase_index(xa, base + 1); + xa_erase_index(xa, base + 2); for (i = 1; i < 5000; i++) { - xa_alloc_index(&xa0, i, GFP_KERNEL); + xa_alloc_index(xa, base + i, GFP_KERNEL); } - xa_destroy(&xa0); + xa_destroy(xa); + /* Check that we fail properly at the limit of allocation */ id = 0xfffffffeU; - XA_BUG_ON(&xa0, xa_alloc(&xa0, &id, UINT_MAX, xa_mk_index(id), + XA_BUG_ON(xa, xa_alloc(xa, &id, UINT_MAX, xa_mk_index(id), GFP_KERNEL) != 0); - XA_BUG_ON(&xa0, id != 0xfffffffeU); - XA_BUG_ON(&xa0, xa_alloc(&xa0, &id, UINT_MAX, xa_mk_index(id), + XA_BUG_ON(xa, id != 0xfffffffeU); + XA_BUG_ON(xa, xa_alloc(xa, &id, UINT_MAX, xa_mk_index(id), GFP_KERNEL) != 0); - XA_BUG_ON(&xa0, id != 0xffffffffU); - XA_BUG_ON(&xa0, xa_alloc(&xa0, &id, UINT_MAX, xa_mk_index(id), + XA_BUG_ON(xa, id != 0xffffffffU); + XA_BUG_ON(xa, xa_alloc(xa, &id, UINT_MAX, xa_mk_index(id), GFP_KERNEL) != -ENOSPC); - XA_BUG_ON(&xa0, id != 0xffffffffU); - xa_destroy(&xa0); + XA_BUG_ON(xa, id != 0xffffffffU); + xa_destroy(xa); id = 10; - XA_BUG_ON(&xa0, xa_alloc(&xa0, &id, 5, xa_mk_index(id), + XA_BUG_ON(xa, xa_alloc(xa, &id, 5, xa_mk_index(id), GFP_KERNEL) != -ENOSPC); - XA_BUG_ON(&xa0, xa_store_index(&xa0, 3, GFP_KERNEL) != 0); - XA_BUG_ON(&xa0, xa_alloc(&xa0, &id, 5, xa_mk_index(id), + XA_BUG_ON(xa, xa_store_index(xa, 3, GFP_KERNEL) != 0); + XA_BUG_ON(xa, xa_alloc(xa, &id, 5, xa_mk_index(id), GFP_KERNEL) != -ENOSPC); - xa_erase_index(&xa0, 3); - XA_BUG_ON(&xa0, !xa_empty(&xa0)); + xa_erase_index(xa, 3); + XA_BUG_ON(xa, !xa_empty(xa)); +} + +static DEFINE_XARRAY_ALLOC(xa0); +static DEFINE_XARRAY_ALLOC1(xa1); + +static noinline void check_xa_alloc(void) +{ + check_xa_alloc_1(&xa0, 0); + check_xa_alloc_1(&xa1, 1); } static noinline void __check_store_iter(struct xarray *xa, unsigned long start, diff --git a/lib/xarray.c b/lib/xarray.c index 1b97ca58bd15..468fb7b7963f 100644 --- a/lib/xarray.c +++ b/lib/xarray.c @@ -57,6 +57,11 @@ static inline bool xa_track_free(const struct xarray *xa) return xa->xa_flags & XA_FLAGS_TRACK_FREE; } +static inline bool xa_zero_busy(const struct xarray *xa) +{ + return xa->xa_flags & XA_FLAGS_ZERO_BUSY; +} + static inline void xa_mark_set(struct xarray *xa, xa_mark_t mark) { if (!(xa->xa_flags & XA_FLAGS_MARK(mark))) @@ -432,6 +437,8 @@ static void xas_shrink(struct xa_state *xas) break; if (!xa_is_node(entry) && node->shift) break; + if (xa_is_zero(entry) && xa_zero_busy(xa)) + entry = NULL; xas->xa_node = XAS_BOUNDS; RCU_INIT_POINTER(xa->xa_head, entry); @@ -628,6 +635,8 @@ static void *xas_create(struct xa_state *xas, bool allow_root) if (xas_top(node)) { entry = xa_head_locked(xa); xas->xa_node = NULL; + if (!entry && xa_zero_busy(xa)) + entry = XA_ZERO_ENTRY; shift = xas_expand(xas, entry); if (shift < 0) return NULL; @@ -1942,6 +1951,8 @@ void xa_destroy(struct xarray *xa) entry = xa_head_locked(xa); RCU_INIT_POINTER(xa->xa_head, NULL); xas_init_marks(&xas); + if (xa_zero_busy(xa)) + xa_mark_clear(xa, XA_FREE_MARK); /* lockdep checks we're still holding the lock in xas_free_nodes() */ if (xa_is_node(entry)) xas_free_nodes(&xas, xa_to_node(entry)); -- cgit v1.2.3 From a3e4d3f97ec844de005a679585c04c5c03dfbdb6 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Mon, 31 Dec 2018 10:41:01 -0500 Subject: XArray: Redesign xa_alloc API It was too easy to forget to initialise the start index. Add an xa_limit data structure which can be used to pass min & max, and define a couple of special values for common cases. Also add some more tests cribbed from the IDR test suite. Change the return value from -ENOSPC to -EBUSY to match xa_insert(). Signed-off-by: Matthew Wilcox --- include/linux/xarray.h | 80 +++++++++++++++++++++++++++++----------------- lib/test_xarray.c | 86 ++++++++++++++++++++++++++++++++++++++++---------- lib/xarray.c | 29 ++++++++--------- 3 files changed, 135 insertions(+), 60 deletions(-) (limited to 'include/linux') diff --git a/include/linux/xarray.h b/include/linux/xarray.h index 99dd0838b4ba..883bb958e462 100644 --- a/include/linux/xarray.h +++ b/include/linux/xarray.h @@ -200,6 +200,27 @@ static inline int xa_err(void *entry) return 0; } +/** + * struct xa_limit - Represents a range of IDs. + * @min: The lowest ID to allocate (inclusive). + * @max: The maximum ID to allocate (inclusive). + * + * This structure is used either directly or via the XA_LIMIT() macro + * to communicate the range of IDs that are valid for allocation. + * Two common ranges are predefined for you: + * * xa_limit_32b - [0 - UINT_MAX] + * * xa_limit_31b - [0 - INT_MAX] + */ +struct xa_limit { + u32 max; + u32 min; +}; + +#define XA_LIMIT(_min, _max) (struct xa_limit) { .min = _min, .max = _max } + +#define xa_limit_32b XA_LIMIT(0, UINT_MAX) +#define xa_limit_31b XA_LIMIT(0, INT_MAX) + typedef unsigned __bitwise xa_mark_t; #define XA_MARK_0 ((__force xa_mark_t)0U) #define XA_MARK_1 ((__force xa_mark_t)1U) @@ -476,7 +497,8 @@ void *__xa_store(struct xarray *, unsigned long index, void *entry, gfp_t); void *__xa_cmpxchg(struct xarray *, unsigned long index, void *old, void *entry, gfp_t); int __xa_insert(struct xarray *, unsigned long index, void *entry, gfp_t); -int __xa_alloc(struct xarray *, u32 *id, u32 max, void *entry, gfp_t); +int __must_check __xa_alloc(struct xarray *, u32 *id, void *entry, + struct xa_limit, gfp_t); int __xa_reserve(struct xarray *, unsigned long index, gfp_t); void __xa_set_mark(struct xarray *, unsigned long index, xa_mark_t); void __xa_clear_mark(struct xarray *, unsigned long index, xa_mark_t); @@ -753,26 +775,26 @@ static inline int xa_insert_irq(struct xarray *xa, unsigned long index, * xa_alloc() - Find somewhere to store this entry in the XArray. * @xa: XArray. * @id: Pointer to ID. - * @max: Maximum ID to allocate (inclusive). * @entry: New entry. + * @limit: Range of ID to allocate. * @gfp: Memory allocation flags. * - * Allocates an unused ID in the range specified by @id and @max. - * Updates the @id pointer with the index, then stores the entry at that - * index. A concurrent lookup will not see an uninitialised @id. + * Finds an empty entry in @xa between @limit.min and @limit.max, + * stores the index into the @id pointer, then stores the entry at + * that index. A concurrent lookup will not see an uninitialised @id. * - * Context: Process context. Takes and releases the xa_lock. May sleep if + * Context: Any context. Takes and releases the xa_lock. May sleep if * the @gfp flags permit. - * Return: 0 on success, -ENOMEM if memory allocation fails or -ENOSPC if - * there is no more space in the XArray. + * Return: 0 on success, -ENOMEM if memory could not be allocated or + * -EBUSY if there are no free entries in @limit. */ -static inline int xa_alloc(struct xarray *xa, u32 *id, u32 max, void *entry, - gfp_t gfp) +static inline __must_check int xa_alloc(struct xarray *xa, u32 *id, + void *entry, struct xa_limit limit, gfp_t gfp) { int err; xa_lock(xa); - err = __xa_alloc(xa, id, max, entry, gfp); + err = __xa_alloc(xa, id, entry, limit, gfp); xa_unlock(xa); return err; @@ -782,26 +804,26 @@ static inline int xa_alloc(struct xarray *xa, u32 *id, u32 max, void *entry, * xa_alloc_bh() - Find somewhere to store this entry in the XArray. * @xa: XArray. * @id: Pointer to ID. - * @max: Maximum ID to allocate (inclusive). * @entry: New entry. + * @limit: Range of ID to allocate. * @gfp: Memory allocation flags. * - * Allocates an unused ID in the range specified by @id and @max. - * Updates the @id pointer with the index, then stores the entry at that - * index. A concurrent lookup will not see an uninitialised @id. + * Finds an empty entry in @xa between @limit.min and @limit.max, + * stores the index into the @id pointer, then stores the entry at + * that index. A concurrent lookup will not see an uninitialised @id. * * Context: Any context. Takes and releases the xa_lock while * disabling softirqs. May sleep if the @gfp flags permit. - * Return: 0 on success, -ENOMEM if memory allocation fails or -ENOSPC if - * there is no more space in the XArray. + * Return: 0 on success, -ENOMEM if memory could not be allocated or + * -EBUSY if there are no free entries in @limit. */ -static inline int xa_alloc_bh(struct xarray *xa, u32 *id, u32 max, void *entry, - gfp_t gfp) +static inline int __must_check xa_alloc_bh(struct xarray *xa, u32 *id, + void *entry, struct xa_limit limit, gfp_t gfp) { int err; xa_lock_bh(xa); - err = __xa_alloc(xa, id, max, entry, gfp); + err = __xa_alloc(xa, id, entry, limit, gfp); xa_unlock_bh(xa); return err; @@ -811,26 +833,26 @@ static inline int xa_alloc_bh(struct xarray *xa, u32 *id, u32 max, void *entry, * xa_alloc_irq() - Find somewhere to store this entry in the XArray. * @xa: XArray. * @id: Pointer to ID. - * @max: Maximum ID to allocate (inclusive). * @entry: New entry. + * @limit: Range of ID to allocate. * @gfp: Memory allocation flags. * - * Allocates an unused ID in the range specified by @id and @max. - * Updates the @id pointer with the index, then stores the entry at that - * index. A concurrent lookup will not see an uninitialised @id. + * Finds an empty entry in @xa between @limit.min and @limit.max, + * stores the index into the @id pointer, then stores the entry at + * that index. A concurrent lookup will not see an uninitialised @id. * * Context: Process context. Takes and releases the xa_lock while * disabling interrupts. May sleep if the @gfp flags permit. - * Return: 0 on success, -ENOMEM if memory allocation fails or -ENOSPC if - * there is no more space in the XArray. + * Return: 0 on success, -ENOMEM if memory could not be allocated or + * -EBUSY if there are no free entries in @limit. */ -static inline int xa_alloc_irq(struct xarray *xa, u32 *id, u32 max, void *entry, - gfp_t gfp) +static inline int __must_check xa_alloc_irq(struct xarray *xa, u32 *id, + void *entry, struct xa_limit limit, gfp_t gfp) { int err; xa_lock_irq(xa); - err = __xa_alloc(xa, id, max, entry, gfp); + err = __xa_alloc(xa, id, entry, limit, gfp); xa_unlock_irq(xa); return err; diff --git a/lib/test_xarray.c b/lib/test_xarray.c index cd74f8f32abe..b5a6b981454d 100644 --- a/lib/test_xarray.c +++ b/lib/test_xarray.c @@ -40,9 +40,9 @@ static void *xa_store_index(struct xarray *xa, unsigned long index, gfp_t gfp) static void xa_alloc_index(struct xarray *xa, unsigned long index, gfp_t gfp) { - u32 id = 0; + u32 id; - XA_BUG_ON(xa, xa_alloc(xa, &id, UINT_MAX, xa_mk_index(index), + XA_BUG_ON(xa, xa_alloc(xa, &id, xa_mk_index(index), xa_limit_32b, gfp) != 0); XA_BUG_ON(xa, id != index); } @@ -640,28 +640,81 @@ static noinline void check_xa_alloc_1(struct xarray *xa, unsigned int base) xa_destroy(xa); /* Check that we fail properly at the limit of allocation */ - id = 0xfffffffeU; - XA_BUG_ON(xa, xa_alloc(xa, &id, UINT_MAX, xa_mk_index(id), + XA_BUG_ON(xa, xa_alloc(xa, &id, xa_mk_index(UINT_MAX - 1), + XA_LIMIT(UINT_MAX - 1, UINT_MAX), GFP_KERNEL) != 0); XA_BUG_ON(xa, id != 0xfffffffeU); - XA_BUG_ON(xa, xa_alloc(xa, &id, UINT_MAX, xa_mk_index(id), + XA_BUG_ON(xa, xa_alloc(xa, &id, xa_mk_index(UINT_MAX), + XA_LIMIT(UINT_MAX - 1, UINT_MAX), GFP_KERNEL) != 0); XA_BUG_ON(xa, id != 0xffffffffU); - XA_BUG_ON(xa, xa_alloc(xa, &id, UINT_MAX, xa_mk_index(id), - GFP_KERNEL) != -ENOSPC); - XA_BUG_ON(xa, id != 0xffffffffU); + id = 3; + XA_BUG_ON(xa, xa_alloc(xa, &id, xa_mk_index(0), + XA_LIMIT(UINT_MAX - 1, UINT_MAX), + GFP_KERNEL) != -EBUSY); + XA_BUG_ON(xa, id != 3); xa_destroy(xa); - id = 10; - XA_BUG_ON(xa, xa_alloc(xa, &id, 5, xa_mk_index(id), - GFP_KERNEL) != -ENOSPC); + XA_BUG_ON(xa, xa_alloc(xa, &id, xa_mk_index(10), XA_LIMIT(10, 5), + GFP_KERNEL) != -EBUSY); XA_BUG_ON(xa, xa_store_index(xa, 3, GFP_KERNEL) != 0); - XA_BUG_ON(xa, xa_alloc(xa, &id, 5, xa_mk_index(id), - GFP_KERNEL) != -ENOSPC); + XA_BUG_ON(xa, xa_alloc(xa, &id, xa_mk_index(10), XA_LIMIT(10, 5), + GFP_KERNEL) != -EBUSY); xa_erase_index(xa, 3); XA_BUG_ON(xa, !xa_empty(xa)); } +static noinline void check_xa_alloc_2(struct xarray *xa, unsigned int base) +{ + unsigned int i, id; + unsigned long index; + void *entry; + + /* Allocate and free a NULL and check xa_empty() behaves */ + XA_BUG_ON(xa, !xa_empty(xa)); + XA_BUG_ON(xa, xa_alloc(xa, &id, NULL, xa_limit_32b, GFP_KERNEL) != 0); + XA_BUG_ON(xa, id != base); + XA_BUG_ON(xa, xa_empty(xa)); + XA_BUG_ON(xa, xa_erase(xa, id) != NULL); + XA_BUG_ON(xa, !xa_empty(xa)); + + /* Ditto, but check destroy instead of erase */ + XA_BUG_ON(xa, !xa_empty(xa)); + XA_BUG_ON(xa, xa_alloc(xa, &id, NULL, xa_limit_32b, GFP_KERNEL) != 0); + XA_BUG_ON(xa, id != base); + XA_BUG_ON(xa, xa_empty(xa)); + xa_destroy(xa); + XA_BUG_ON(xa, !xa_empty(xa)); + + for (i = base; i < base + 10; i++) { + XA_BUG_ON(xa, xa_alloc(xa, &id, NULL, xa_limit_32b, + GFP_KERNEL) != 0); + XA_BUG_ON(xa, id != i); + } + + XA_BUG_ON(xa, xa_store(xa, 3, xa_mk_index(3), GFP_KERNEL) != NULL); + XA_BUG_ON(xa, xa_store(xa, 4, xa_mk_index(4), GFP_KERNEL) != NULL); + XA_BUG_ON(xa, xa_store(xa, 4, NULL, GFP_KERNEL) != xa_mk_index(4)); + XA_BUG_ON(xa, xa_erase(xa, 5) != NULL); + XA_BUG_ON(xa, xa_alloc(xa, &id, NULL, xa_limit_32b, GFP_KERNEL) != 0); + XA_BUG_ON(xa, id != 5); + + xa_for_each(xa, index, entry) { + xa_erase_index(xa, index); + } + + for (i = base; i < base + 9; i++) { + XA_BUG_ON(xa, xa_erase(xa, i) != NULL); + XA_BUG_ON(xa, xa_empty(xa)); + } + XA_BUG_ON(xa, xa_erase(xa, 8) != NULL); + XA_BUG_ON(xa, xa_empty(xa)); + XA_BUG_ON(xa, xa_erase(xa, base + 9) != NULL); + XA_BUG_ON(xa, !xa_empty(xa)); + + xa_destroy(xa); +} + static DEFINE_XARRAY_ALLOC(xa0); static DEFINE_XARRAY_ALLOC1(xa1); @@ -669,6 +722,8 @@ static noinline void check_xa_alloc(void) { check_xa_alloc_1(&xa0, 0); check_xa_alloc_1(&xa1, 1); + check_xa_alloc_2(&xa0, 0); + check_xa_alloc_2(&xa1, 1); } static noinline void __check_store_iter(struct xarray *xa, unsigned long start, @@ -1219,9 +1274,8 @@ static void check_align_1(struct xarray *xa, char *name) void *entry; for (i = 0; i < 8; i++) { - id = 0; - XA_BUG_ON(xa, xa_alloc(xa, &id, UINT_MAX, name + i, GFP_KERNEL) - != 0); + XA_BUG_ON(xa, xa_alloc(xa, &id, name + i, xa_limit_32b, + GFP_KERNEL) != 0); XA_BUG_ON(xa, id != i); } xa_for_each(xa, index, entry) diff --git a/lib/xarray.c b/lib/xarray.c index 468fb7b7963f..c707388fb05e 100644 --- a/lib/xarray.c +++ b/lib/xarray.c @@ -1615,23 +1615,23 @@ EXPORT_SYMBOL(xa_store_range); * __xa_alloc() - Find somewhere to store this entry in the XArray. * @xa: XArray. * @id: Pointer to ID. - * @max: Maximum ID to allocate (inclusive). + * @limit: Range for allocated ID. * @entry: New entry. * @gfp: Memory allocation flags. * - * Allocates an unused ID in the range specified by @id and @max. - * Updates the @id pointer with the index, then stores the entry at that - * index. A concurrent lookup will not see an uninitialised @id. + * Finds an empty entry in @xa between @limit.min and @limit.max, + * stores the index into the @id pointer, then stores the entry at + * that index. A concurrent lookup will not see an uninitialised @id. * * Context: Any context. Expects xa_lock to be held on entry. May * release and reacquire xa_lock if @gfp flags permit. - * Return: 0 on success, -ENOMEM if memory allocation fails or -ENOSPC if - * there is no more space in the XArray. + * Return: 0 on success, -ENOMEM if memory could not be allocated or + * -EBUSY if there are no free entries in @limit. */ -int __xa_alloc(struct xarray *xa, u32 *id, u32 max, void *entry, gfp_t gfp) +int __xa_alloc(struct xarray *xa, u32 *id, void *entry, + struct xa_limit limit, gfp_t gfp) { XA_STATE(xas, xa, 0); - int err; if (WARN_ON_ONCE(xa_is_advanced(entry))) return -EINVAL; @@ -1642,18 +1642,17 @@ int __xa_alloc(struct xarray *xa, u32 *id, u32 max, void *entry, gfp_t gfp) entry = XA_ZERO_ENTRY; do { - xas.xa_index = *id; - xas_find_marked(&xas, max, XA_FREE_MARK); + xas.xa_index = limit.min; + xas_find_marked(&xas, limit.max, XA_FREE_MARK); if (xas.xa_node == XAS_RESTART) - xas_set_err(&xas, -ENOSPC); + xas_set_err(&xas, -EBUSY); + else + *id = xas.xa_index; xas_store(&xas, entry); xas_clear_mark(&xas, XA_FREE_MARK); } while (__xas_nomem(&xas, gfp)); - err = xas_error(&xas); - if (!err) - *id = xas.xa_index; - return err; + return xas_error(&xas); } EXPORT_SYMBOL(__xa_alloc); -- cgit v1.2.3 From 2fa044e51a1f35d7b04cbde07ec513b0ba195e38 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Tue, 6 Nov 2018 14:13:35 -0500 Subject: XArray: Add cyclic allocation This differs slightly from the IDR equivalent in five ways. 1. It can allocate up to UINT_MAX instead of being limited to INT_MAX, like xa_alloc(). Also like xa_alloc(), it will write to the 'id' pointer before placing the entry in the XArray. 2. The 'next' cursor is allocated separately from the XArray instead of being part of the IDR. This saves memory for all the users which do not use the cyclic allocation API and suits some users better. 3. It returns -EBUSY instead of -ENOSPC. 4. It will attempt to wrap back to the minimum value on memory allocation failure as well as on an -EBUSY error, assuming that a user would rather allocate a small ID than suffer an ID allocation failure. 5. It reports whether it has wrapped, which is important to some users. Signed-off-by: Matthew Wilcox --- Documentation/core-api/xarray.rst | 4 +- include/linux/xarray.h | 102 ++++++++++++++++++++++++++++++++++++++ lib/test_xarray.c | 53 ++++++++++++++++++++ lib/xarray.c | 50 +++++++++++++++++++ 4 files changed, 208 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/Documentation/core-api/xarray.rst b/Documentation/core-api/xarray.rst index e90c4925cd37..c7436da5c4ad 100644 --- a/Documentation/core-api/xarray.rst +++ b/Documentation/core-api/xarray.rst @@ -144,7 +144,9 @@ you only want to free the entry if it's ``NULL``). By default, the lowest free entry is allocated starting from 0. If you want to allocate entries starting at 1, it is more efficient to use -:c:func:`DEFINE_XARRAY_ALLOC1` or ``XA_FLAGS_ALLOC1``. +:c:func:`DEFINE_XARRAY_ALLOC1` or ``XA_FLAGS_ALLOC1``. If you want to +allocate IDs up to a maximum, then wrap back around to the lowest free +ID, you can use :c:func:`xa_alloc_cyclic`. You cannot use ``XA_MARK_0`` with an allocating XArray as this mark is used to track whether an entry is free or not. The other marks are diff --git a/include/linux/xarray.h b/include/linux/xarray.h index 883bb958e462..5ed6b462e754 100644 --- a/include/linux/xarray.h +++ b/include/linux/xarray.h @@ -242,6 +242,7 @@ enum xa_lock_type { #define XA_FLAGS_LOCK_BH ((__force gfp_t)XA_LOCK_BH) #define XA_FLAGS_TRACK_FREE ((__force gfp_t)4U) #define XA_FLAGS_ZERO_BUSY ((__force gfp_t)8U) +#define XA_FLAGS_ALLOC_WRAPPED ((__force gfp_t)16U) #define XA_FLAGS_MARK(mark) ((__force gfp_t)((1U << __GFP_BITS_SHIFT) << \ (__force unsigned)(mark))) @@ -499,6 +500,8 @@ void *__xa_cmpxchg(struct xarray *, unsigned long index, void *old, int __xa_insert(struct xarray *, unsigned long index, void *entry, gfp_t); int __must_check __xa_alloc(struct xarray *, u32 *id, void *entry, struct xa_limit, gfp_t); +int __must_check __xa_alloc_cyclic(struct xarray *, u32 *id, void *entry, + struct xa_limit, u32 *next, gfp_t); int __xa_reserve(struct xarray *, unsigned long index, gfp_t); void __xa_set_mark(struct xarray *, unsigned long index, xa_mark_t); void __xa_clear_mark(struct xarray *, unsigned long index, xa_mark_t); @@ -858,6 +861,105 @@ static inline int __must_check xa_alloc_irq(struct xarray *xa, u32 *id, return err; } +/** + * xa_alloc_cyclic() - Find somewhere to store this entry in the XArray. + * @xa: XArray. + * @id: Pointer to ID. + * @entry: New entry. + * @limit: Range of allocated ID. + * @next: Pointer to next ID to allocate. + * @gfp: Memory allocation flags. + * + * Finds an empty entry in @xa between @limit.min and @limit.max, + * stores the index into the @id pointer, then stores the entry at + * that index. A concurrent lookup will not see an uninitialised @id. + * The search for an empty entry will start at @next and will wrap + * around if necessary. + * + * Context: Any context. Takes and releases the xa_lock. May sleep if + * the @gfp flags permit. + * Return: 0 if the allocation succeeded without wrapping. 1 if the + * allocation succeeded after wrapping, -ENOMEM if memory could not be + * allocated or -EBUSY if there are no free entries in @limit. + */ +static inline int xa_alloc_cyclic(struct xarray *xa, u32 *id, void *entry, + struct xa_limit limit, u32 *next, gfp_t gfp) +{ + int err; + + xa_lock(xa); + err = __xa_alloc_cyclic(xa, id, entry, limit, next, gfp); + xa_unlock(xa); + + return err; +} + +/** + * xa_alloc_cyclic_bh() - Find somewhere to store this entry in the XArray. + * @xa: XArray. + * @id: Pointer to ID. + * @entry: New entry. + * @limit: Range of allocated ID. + * @next: Pointer to next ID to allocate. + * @gfp: Memory allocation flags. + * + * Finds an empty entry in @xa between @limit.min and @limit.max, + * stores the index into the @id pointer, then stores the entry at + * that index. A concurrent lookup will not see an uninitialised @id. + * The search for an empty entry will start at @next and will wrap + * around if necessary. + * + * Context: Any context. Takes and releases the xa_lock while + * disabling softirqs. May sleep if the @gfp flags permit. + * Return: 0 if the allocation succeeded without wrapping. 1 if the + * allocation succeeded after wrapping, -ENOMEM if memory could not be + * allocated or -EBUSY if there are no free entries in @limit. + */ +static inline int xa_alloc_cyclic_bh(struct xarray *xa, u32 *id, void *entry, + struct xa_limit limit, u32 *next, gfp_t gfp) +{ + int err; + + xa_lock_bh(xa); + err = __xa_alloc_cyclic(xa, id, entry, limit, next, gfp); + xa_unlock_bh(xa); + + return err; +} + +/** + * xa_alloc_cyclic_irq() - Find somewhere to store this entry in the XArray. + * @xa: XArray. + * @id: Pointer to ID. + * @entry: New entry. + * @limit: Range of allocated ID. + * @next: Pointer to next ID to allocate. + * @gfp: Memory allocation flags. + * + * Finds an empty entry in @xa between @limit.min and @limit.max, + * stores the index into the @id pointer, then stores the entry at + * that index. A concurrent lookup will not see an uninitialised @id. + * The search for an empty entry will start at @next and will wrap + * around if necessary. + * + * Context: Process context. Takes and releases the xa_lock while + * disabling interrupts. May sleep if the @gfp flags permit. + * Return: 0 if the allocation succeeded without wrapping. 1 if the + * allocation succeeded after wrapping, -ENOMEM if memory could not be + * allocated or -EBUSY if there are no free entries in @limit. + */ +static inline int xa_alloc_cyclic_irq(struct xarray *xa, u32 *id, void *entry, + struct xa_limit limit, u32 *next, gfp_t gfp) +{ + int err; + + xa_lock_irq(xa); + err = __xa_alloc_cyclic(xa, id, entry, limit, next, gfp); + xa_unlock_irq(xa); + + return err; +} + /** * xa_reserve() - Reserve this index in the XArray. * @xa: XArray. diff --git a/lib/test_xarray.c b/lib/test_xarray.c index b5a6b981454d..eaf53f742c72 100644 --- a/lib/test_xarray.c +++ b/lib/test_xarray.c @@ -715,6 +715,57 @@ static noinline void check_xa_alloc_2(struct xarray *xa, unsigned int base) xa_destroy(xa); } +static noinline void check_xa_alloc_3(struct xarray *xa, unsigned int base) +{ + struct xa_limit limit = XA_LIMIT(1, 0x3fff); + u32 next = 0; + unsigned int i, id; + unsigned long index; + void *entry; + + XA_BUG_ON(xa, xa_alloc_cyclic(xa, &id, xa_mk_index(1), limit, + &next, GFP_KERNEL) != 0); + XA_BUG_ON(xa, id != 1); + + next = 0x3ffd; + XA_BUG_ON(xa, xa_alloc_cyclic(xa, &id, xa_mk_index(0x3ffd), limit, + &next, GFP_KERNEL) != 0); + XA_BUG_ON(xa, id != 0x3ffd); + xa_erase_index(xa, 0x3ffd); + xa_erase_index(xa, 1); + XA_BUG_ON(xa, !xa_empty(xa)); + + for (i = 0x3ffe; i < 0x4003; i++) { + if (i < 0x4000) + entry = xa_mk_index(i); + else + entry = xa_mk_index(i - 0x3fff); + XA_BUG_ON(xa, xa_alloc_cyclic(xa, &id, entry, limit, + &next, GFP_KERNEL) != (id == 1)); + XA_BUG_ON(xa, xa_mk_index(id) != entry); + } + + /* Check wrap-around is handled correctly */ + if (base != 0) + xa_erase_index(xa, base); + xa_erase_index(xa, base + 1); + next = UINT_MAX; + XA_BUG_ON(xa, xa_alloc_cyclic(xa, &id, xa_mk_index(UINT_MAX), + xa_limit_32b, &next, GFP_KERNEL) != 0); + XA_BUG_ON(xa, id != UINT_MAX); + XA_BUG_ON(xa, xa_alloc_cyclic(xa, &id, xa_mk_index(base), + xa_limit_32b, &next, GFP_KERNEL) != 1); + XA_BUG_ON(xa, id != base); + XA_BUG_ON(xa, xa_alloc_cyclic(xa, &id, xa_mk_index(base + 1), + xa_limit_32b, &next, GFP_KERNEL) != 0); + XA_BUG_ON(xa, id != base + 1); + + xa_for_each(xa, index, entry) + xa_erase_index(xa, index); + + XA_BUG_ON(xa, !xa_empty(xa)); +} + static DEFINE_XARRAY_ALLOC(xa0); static DEFINE_XARRAY_ALLOC1(xa1); @@ -724,6 +775,8 @@ static noinline void check_xa_alloc(void) check_xa_alloc_1(&xa1, 1); check_xa_alloc_2(&xa0, 0); check_xa_alloc_2(&xa1, 1); + check_xa_alloc_3(&xa0, 0); + check_xa_alloc_3(&xa1, 1); } static noinline void __check_store_iter(struct xarray *xa, unsigned long start, diff --git a/lib/xarray.c b/lib/xarray.c index c707388fb05e..89e37ac50850 100644 --- a/lib/xarray.c +++ b/lib/xarray.c @@ -1656,6 +1656,56 @@ int __xa_alloc(struct xarray *xa, u32 *id, void *entry, } EXPORT_SYMBOL(__xa_alloc); +/** + * __xa_alloc_cyclic() - Find somewhere to store this entry in the XArray. + * @xa: XArray. + * @id: Pointer to ID. + * @entry: New entry. + * @limit: Range of allocated ID. + * @next: Pointer to next ID to allocate. + * @gfp: Memory allocation flags. + * + * Finds an empty entry in @xa between @limit.min and @limit.max, + * stores the index into the @id pointer, then stores the entry at + * that index. A concurrent lookup will not see an uninitialised @id. + * The search for an empty entry will start at @next and will wrap + * around if necessary. + * + * Context: Any context. Expects xa_lock to be held on entry. May + * release and reacquire xa_lock if @gfp flags permit. + * Return: 0 if the allocation succeeded without wrapping. 1 if the + * allocation succeeded after wrapping, -ENOMEM if memory could not be + * allocated or -EBUSY if there are no free entries in @limit. + */ +int __xa_alloc_cyclic(struct xarray *xa, u32 *id, void *entry, + struct xa_limit limit, u32 *next, gfp_t gfp) +{ + u32 min = limit.min; + int ret; + + limit.min = max(min, *next); + ret = __xa_alloc(xa, id, entry, limit, gfp); + if ((xa->xa_flags & XA_FLAGS_ALLOC_WRAPPED) && ret == 0) { + xa->xa_flags &= ~XA_FLAGS_ALLOC_WRAPPED; + ret = 1; + } + + if (ret < 0 && limit.min > min) { + limit.min = min; + ret = __xa_alloc(xa, id, entry, limit, gfp); + if (ret == 0) + ret = 1; + } + + if (ret >= 0) { + *next = *id + 1; + if (*next == 0) + xa->xa_flags |= XA_FLAGS_ALLOC_WRAPPED; + } + return ret; +} +EXPORT_SYMBOL(__xa_alloc_cyclic); + /** * __xa_set_mark() - Set this mark on this entry while locked. * @xa: XArray. -- cgit v1.2.3 From 60b8f0ddf1a927ef02141a6610fd52575134f821 Mon Sep 17 00:00:00 2001 From: Phil Edworthy Date: Mon, 3 Dec 2018 11:13:09 +0000 Subject: clk: Add (devm_)clk_get_optional() functions This adds clk_get_optional() and devm_clk_get_optional() functions to get optional clocks. They behave the same as (devm_)clk_get() except where there is no clock producer. In this case, instead of returning -ENOENT, the function returns NULL. This makes error checking simpler and allows clk_prepare_enable, etc to be called on the returned reference without additional checks. Signed-off-by: Phil Edworthy Reviewed-by: Andy Shevchenko Cc: Russell King [sboyd@kernel.org: Document in devres.txt] Signed-off-by: Stephen Boyd --- Documentation/driver-model/devres.txt | 1 + drivers/clk/clk-devres.c | 11 +++++++++++ include/linux/clk.h | 36 +++++++++++++++++++++++++++++++++++ 3 files changed, 48 insertions(+) (limited to 'include/linux') diff --git a/Documentation/driver-model/devres.txt b/Documentation/driver-model/devres.txt index b277cafce71e..83f38c0439cd 100644 --- a/Documentation/driver-model/devres.txt +++ b/Documentation/driver-model/devres.txt @@ -242,6 +242,7 @@ certainly invest a bit more effort into libata core layer). CLOCK devm_clk_get() + devm_clk_get_optional() devm_clk_put() devm_clk_hw_register() devm_of_clk_add_hw_provider() diff --git a/drivers/clk/clk-devres.c b/drivers/clk/clk-devres.c index c9a86156ced8..daa1fc8fba53 100644 --- a/drivers/clk/clk-devres.c +++ b/drivers/clk/clk-devres.c @@ -29,6 +29,17 @@ struct clk *devm_clk_get(struct device *dev, const char *id) } EXPORT_SYMBOL(devm_clk_get); +struct clk *devm_clk_get_optional(struct device *dev, const char *id) +{ + struct clk *clk = devm_clk_get(dev, id); + + if (clk == ERR_PTR(-ENOENT)) + return NULL; + + return clk; +} +EXPORT_SYMBOL(devm_clk_get_optional); + struct clk_bulk_devres { struct clk_bulk_data *clks; int num_clks; diff --git a/include/linux/clk.h b/include/linux/clk.h index a7773b5c0b9f..d8bc1a856b39 100644 --- a/include/linux/clk.h +++ b/include/linux/clk.h @@ -383,6 +383,17 @@ int __must_check devm_clk_bulk_get_all(struct device *dev, */ struct clk *devm_clk_get(struct device *dev, const char *id); +/** + * devm_clk_get_optional - lookup and obtain a managed reference to an optional + * clock producer. + * @dev: device for clock "consumer" + * @id: clock consumer ID + * + * Behaves the same as devm_clk_get() except where there is no clock producer. + * In this case, instead of returning -ENOENT, the function returns NULL. + */ +struct clk *devm_clk_get_optional(struct device *dev, const char *id); + /** * devm_get_clk_from_child - lookup and obtain a managed reference to a * clock producer from child node. @@ -718,6 +729,12 @@ static inline struct clk *devm_clk_get(struct device *dev, const char *id) return NULL; } +static inline struct clk *devm_clk_get_optional(struct device *dev, + const char *id) +{ + return NULL; +} + static inline int __must_check devm_clk_bulk_get(struct device *dev, int num_clks, struct clk_bulk_data *clks) { @@ -862,6 +879,25 @@ static inline void clk_bulk_disable_unprepare(int num_clks, clk_bulk_unprepare(num_clks, clks); } +/** + * clk_get_optional - lookup and obtain a reference to an optional clock + * producer. + * @dev: device for clock "consumer" + * @id: clock consumer ID + * + * Behaves the same as clk_get() except where there is no clock producer. In + * this case, instead of returning -ENOENT, the function returns NULL. + */ +static inline struct clk *clk_get_optional(struct device *dev, const char *id) +{ + struct clk *clk = clk_get(dev, id); + + if (clk == ERR_PTR(-ENOENT)) + return NULL; + + return clk; +} + #if defined(CONFIG_OF) && defined(CONFIG_COMMON_CLK) struct clk *of_clk_get(struct device_node *np, int index); struct clk *of_clk_get_by_name(struct device_node *np, const char *name); -- cgit v1.2.3 From 3eee6c7d119cd8563ad25898f94d6c1b514da548 Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Fri, 7 Dec 2018 13:09:39 +0200 Subject: clkdev: add managed clkdev lookup registration Clkdev registration lacks of managed registration functions and it seems few drivers do not drop clkdev lookups at exit. Add devm_clk_hw_register_clkdev and devm_clk_release_clkdev to ease lookup releasing at exit. Signed-off-by: Matti Vaittinen Signed-off-by: Stephen Boyd --- Documentation/driver-model/devres.txt | 1 + drivers/clk/clkdev.c | 111 +++++++++++++++++++++++++++------- include/linux/clkdev.h | 4 ++ 3 files changed, 93 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/Documentation/driver-model/devres.txt b/Documentation/driver-model/devres.txt index b277cafce71e..805b7bf5d98f 100644 --- a/Documentation/driver-model/devres.txt +++ b/Documentation/driver-model/devres.txt @@ -245,6 +245,7 @@ CLOCK devm_clk_put() devm_clk_hw_register() devm_of_clk_add_hw_provider() + devm_clk_hw_register_clkdev() DMA dmaenginem_async_device_register() diff --git a/drivers/clk/clkdev.c b/drivers/clk/clkdev.c index 9ab3db8b3988..4621f8a91fc0 100644 --- a/drivers/clk/clkdev.c +++ b/drivers/clk/clkdev.c @@ -401,6 +401,23 @@ static struct clk_lookup *__clk_register_clkdev(struct clk_hw *hw, return cl; } +static int do_clk_register_clkdev(struct clk_hw *hw, + struct clk_lookup **cl, const char *con_id, const char *dev_id) +{ + if (IS_ERR(hw)) + return PTR_ERR(hw); + /* + * Since dev_id can be NULL, and NULL is handled specially, we must + * pass it as either a NULL format string, or with "%s". + */ + if (dev_id) + *cl = __clk_register_clkdev(hw, con_id, "%s", dev_id); + else + *cl = __clk_register_clkdev(hw, con_id, NULL); + + return *cl ? 0 : -ENOMEM; +} + /** * clk_register_clkdev - register one clock lookup for a struct clk * @clk: struct clk to associate with all clk_lookups @@ -423,17 +440,8 @@ int clk_register_clkdev(struct clk *clk, const char *con_id, if (IS_ERR(clk)) return PTR_ERR(clk); - /* - * Since dev_id can be NULL, and NULL is handled specially, we must - * pass it as either a NULL format string, or with "%s". - */ - if (dev_id) - cl = __clk_register_clkdev(__clk_get_hw(clk), con_id, "%s", - dev_id); - else - cl = __clk_register_clkdev(__clk_get_hw(clk), con_id, NULL); - - return cl ? 0 : -ENOMEM; + return do_clk_register_clkdev(__clk_get_hw(clk), &cl, con_id, + dev_id); } EXPORT_SYMBOL(clk_register_clkdev); @@ -456,18 +464,75 @@ int clk_hw_register_clkdev(struct clk_hw *hw, const char *con_id, { struct clk_lookup *cl; - if (IS_ERR(hw)) - return PTR_ERR(hw); + return do_clk_register_clkdev(hw, &cl, con_id, dev_id); +} +EXPORT_SYMBOL(clk_hw_register_clkdev); - /* - * Since dev_id can be NULL, and NULL is handled specially, we must - * pass it as either a NULL format string, or with "%s". - */ - if (dev_id) - cl = __clk_register_clkdev(hw, con_id, "%s", dev_id); - else - cl = __clk_register_clkdev(hw, con_id, NULL); +static void devm_clkdev_release(struct device *dev, void *res) +{ + clkdev_drop(*(struct clk_lookup **)res); +} - return cl ? 0 : -ENOMEM; +static int devm_clk_match_clkdev(struct device *dev, void *res, void *data) +{ + struct clk_lookup **l = res; + + return *l == data; } -EXPORT_SYMBOL(clk_hw_register_clkdev); + +/** + * devm_clk_release_clkdev - Resource managed clkdev lookup release + * @dev: device this lookup is bound + * @con_id: connection ID string on device + * @dev_id: format string describing device name + * + * Drop the clkdev lookup created with devm_clk_hw_register_clkdev. + * Normally this function will not need to be called and the resource + * management code will ensure that the resource is freed. + */ +void devm_clk_release_clkdev(struct device *dev, const char *con_id, + const char *dev_id) +{ + struct clk_lookup *cl; + int rval; + + cl = clk_find(dev_id, con_id); + WARN_ON(!cl); + rval = devres_release(dev, devm_clkdev_release, + devm_clk_match_clkdev, cl); + WARN_ON(rval); +} +EXPORT_SYMBOL(devm_clk_release_clkdev); + +/** + * devm_clk_hw_register_clkdev - managed clk lookup registration for clk_hw + * @dev: device this lookup is bound + * @hw: struct clk_hw to associate with all clk_lookups + * @con_id: connection ID string on device + * @dev_id: format string describing device name + * + * con_id or dev_id may be NULL as a wildcard, just as in the rest of + * clkdev. + * + * To make things easier for mass registration, we detect error clk_hws + * from a previous clk_hw_register_*() call, and return the error code for + * those. This is to permit this function to be called immediately + * after clk_hw_register_*(). + */ +int devm_clk_hw_register_clkdev(struct device *dev, struct clk_hw *hw, + const char *con_id, const char *dev_id) +{ + int rval = -ENOMEM; + struct clk_lookup **cl; + + cl = devres_alloc(devm_clkdev_release, sizeof(*cl), GFP_KERNEL); + if (cl) { + rval = do_clk_register_clkdev(hw, cl, con_id, dev_id); + if (!rval) + devres_add(dev, cl); + else + devres_free(cl); + } + return rval; +} +EXPORT_SYMBOL(devm_clk_hw_register_clkdev); diff --git a/include/linux/clkdev.h b/include/linux/clkdev.h index 4890ff033220..ccb32af5848b 100644 --- a/include/linux/clkdev.h +++ b/include/linux/clkdev.h @@ -52,4 +52,8 @@ int clk_add_alias(const char *, const char *, const char *, struct device *); int clk_register_clkdev(struct clk *, const char *, const char *); int clk_hw_register_clkdev(struct clk_hw *, const char *, const char *); +int devm_clk_hw_register_clkdev(struct device *dev, struct clk_hw *hw, + const char *con_id, const char *dev_id); +void devm_clk_release_clkdev(struct device *dev, const char *con_id, + const char *dev_id); #endif -- cgit v1.2.3 From eca4205f9ec3bea2d5aad0493c19f5d2675a20fc Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 2 Feb 2019 12:50:51 +0100 Subject: ethtool: add ethtool_rx_flow_spec to flow_rule structure translator This patch adds a function to translate the ethtool_rx_flow_spec structure to the flow_rule representation. This allows us to reuse code from the driver side given that both flower and ethtool_rx_flow interfaces use the same representation. This patch also includes support for the flow type flags FLOW_EXT, FLOW_MAC_EXT and FLOW_RSS. The ethtool_rx_flow_spec_input wrapper structure is used to convey the rss_context field, that is away from the ethtool_rx_flow_spec structure, and the ethtool_rx_flow_spec structure. Signed-off-by: Pablo Neira Ayuso Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/ethtool.h | 15 +++ net/core/ethtool.c | 241 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 256 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index afd9596ce636..19a8de5326fb 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -400,4 +400,19 @@ struct ethtool_ops { void (*get_ethtool_phy_stats)(struct net_device *, struct ethtool_stats *, u64 *); }; + +struct ethtool_rx_flow_rule { + struct flow_rule *rule; + unsigned long priv[0]; +}; + +struct ethtool_rx_flow_spec_input { + const struct ethtool_rx_flow_spec *fs; + u32 rss_ctx; +}; + +struct ethtool_rx_flow_rule * +ethtool_rx_flow_rule_create(const struct ethtool_rx_flow_spec_input *input); +void ethtool_rx_flow_rule_destroy(struct ethtool_rx_flow_rule *rule); + #endif /* _LINUX_ETHTOOL_H */ diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 45c0a6e3d6ad..0fbf39239b29 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -29,6 +29,7 @@ #include #include #include +#include /* * Some useful ethtool_ops methods that're device independent. @@ -2820,3 +2821,243 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) return rc; } + +struct ethtool_rx_flow_key { + struct flow_dissector_key_basic basic; + union { + struct flow_dissector_key_ipv4_addrs ipv4; + struct flow_dissector_key_ipv6_addrs ipv6; + }; + struct flow_dissector_key_ports tp; + struct flow_dissector_key_ip ip; + struct flow_dissector_key_vlan vlan; + struct flow_dissector_key_eth_addrs eth_addrs; +} __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */ + +struct ethtool_rx_flow_match { + struct flow_dissector dissector; + struct ethtool_rx_flow_key key; + struct ethtool_rx_flow_key mask; +}; + +struct ethtool_rx_flow_rule * +ethtool_rx_flow_rule_create(const struct ethtool_rx_flow_spec_input *input) +{ + const struct ethtool_rx_flow_spec *fs = input->fs; + static struct in6_addr zero_addr = {}; + struct ethtool_rx_flow_match *match; + struct ethtool_rx_flow_rule *flow; + struct flow_action_entry *act; + + flow = kzalloc(sizeof(struct ethtool_rx_flow_rule) + + sizeof(struct ethtool_rx_flow_match), GFP_KERNEL); + if (!flow) + return ERR_PTR(-ENOMEM); + + /* ethtool_rx supports only one single action per rule. */ + flow->rule = flow_rule_alloc(1); + if (!flow->rule) { + kfree(flow); + return ERR_PTR(-ENOMEM); + } + + match = (struct ethtool_rx_flow_match *)flow->priv; + flow->rule->match.dissector = &match->dissector; + flow->rule->match.mask = &match->mask; + flow->rule->match.key = &match->key; + + match->mask.basic.n_proto = htons(0xffff); + + switch (fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT | FLOW_RSS)) { + case TCP_V4_FLOW: + case UDP_V4_FLOW: { + const struct ethtool_tcpip4_spec *v4_spec, *v4_m_spec; + + match->key.basic.n_proto = htons(ETH_P_IP); + + v4_spec = &fs->h_u.tcp_ip4_spec; + v4_m_spec = &fs->m_u.tcp_ip4_spec; + + if (v4_m_spec->ip4src) { + match->key.ipv4.src = v4_spec->ip4src; + match->mask.ipv4.src = v4_m_spec->ip4src; + } + if (v4_m_spec->ip4dst) { + match->key.ipv4.dst = v4_spec->ip4dst; + match->mask.ipv4.dst = v4_m_spec->ip4dst; + } + if (v4_m_spec->ip4src || + v4_m_spec->ip4dst) { + match->dissector.used_keys |= + BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS); + match->dissector.offset[FLOW_DISSECTOR_KEY_IPV4_ADDRS] = + offsetof(struct ethtool_rx_flow_key, ipv4); + } + if (v4_m_spec->psrc) { + match->key.tp.src = v4_spec->psrc; + match->mask.tp.src = v4_m_spec->psrc; + } + if (v4_m_spec->pdst) { + match->key.tp.dst = v4_spec->pdst; + match->mask.tp.dst = v4_m_spec->pdst; + } + if (v4_m_spec->psrc || + v4_m_spec->pdst) { + match->dissector.used_keys |= + BIT(FLOW_DISSECTOR_KEY_PORTS); + match->dissector.offset[FLOW_DISSECTOR_KEY_PORTS] = + offsetof(struct ethtool_rx_flow_key, tp); + } + if (v4_m_spec->tos) { + match->key.ip.tos = v4_spec->tos; + match->mask.ip.tos = v4_m_spec->tos; + match->dissector.used_keys |= + BIT(FLOW_DISSECTOR_KEY_IP); + match->dissector.offset[FLOW_DISSECTOR_KEY_IP] = + offsetof(struct ethtool_rx_flow_key, ip); + } + } + break; + case TCP_V6_FLOW: + case UDP_V6_FLOW: { + const struct ethtool_tcpip6_spec *v6_spec, *v6_m_spec; + + match->key.basic.n_proto = htons(ETH_P_IPV6); + + v6_spec = &fs->h_u.tcp_ip6_spec; + v6_m_spec = &fs->m_u.tcp_ip6_spec; + if (memcmp(v6_m_spec->ip6src, &zero_addr, sizeof(zero_addr))) { + memcpy(&match->key.ipv6.src, v6_spec->ip6src, + sizeof(match->key.ipv6.src)); + memcpy(&match->mask.ipv6.src, v6_m_spec->ip6src, + sizeof(match->mask.ipv6.src)); + } + if (memcmp(v6_m_spec->ip6dst, &zero_addr, sizeof(zero_addr))) { + memcpy(&match->key.ipv6.dst, v6_spec->ip6dst, + sizeof(match->key.ipv6.dst)); + memcpy(&match->mask.ipv6.dst, v6_m_spec->ip6dst, + sizeof(match->mask.ipv6.dst)); + } + if (memcmp(v6_m_spec->ip6src, &zero_addr, sizeof(zero_addr)) || + memcmp(v6_m_spec->ip6src, &zero_addr, sizeof(zero_addr))) { + match->dissector.used_keys |= + BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS); + match->dissector.offset[FLOW_DISSECTOR_KEY_IPV6_ADDRS] = + offsetof(struct ethtool_rx_flow_key, ipv6); + } + if (v6_m_spec->psrc) { + match->key.tp.src = v6_spec->psrc; + match->mask.tp.src = v6_m_spec->psrc; + } + if (v6_m_spec->pdst) { + match->key.tp.dst = v6_spec->pdst; + match->mask.tp.dst = v6_m_spec->pdst; + } + if (v6_m_spec->psrc || + v6_m_spec->pdst) { + match->dissector.used_keys |= + BIT(FLOW_DISSECTOR_KEY_PORTS); + match->dissector.offset[FLOW_DISSECTOR_KEY_PORTS] = + offsetof(struct ethtool_rx_flow_key, tp); + } + if (v6_m_spec->tclass) { + match->key.ip.tos = v6_spec->tclass; + match->mask.ip.tos = v6_m_spec->tclass; + match->dissector.used_keys |= + BIT(FLOW_DISSECTOR_KEY_IP); + match->dissector.offset[FLOW_DISSECTOR_KEY_IP] = + offsetof(struct ethtool_rx_flow_key, ip); + } + } + break; + default: + ethtool_rx_flow_rule_destroy(flow); + return ERR_PTR(-EINVAL); + } + + switch (fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT | FLOW_RSS)) { + case TCP_V4_FLOW: + case TCP_V6_FLOW: + match->key.basic.ip_proto = IPPROTO_TCP; + break; + case UDP_V4_FLOW: + case UDP_V6_FLOW: + match->key.basic.ip_proto = IPPROTO_UDP; + break; + } + match->mask.basic.ip_proto = 0xff; + + match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_BASIC); + match->dissector.offset[FLOW_DISSECTOR_KEY_BASIC] = + offsetof(struct ethtool_rx_flow_key, basic); + + if (fs->flow_type & FLOW_EXT) { + const struct ethtool_flow_ext *ext_h_spec = &fs->h_ext; + const struct ethtool_flow_ext *ext_m_spec = &fs->m_ext; + + if (ext_m_spec->vlan_etype && + ext_m_spec->vlan_tci) { + match->key.vlan.vlan_tpid = ext_h_spec->vlan_etype; + match->mask.vlan.vlan_tpid = ext_m_spec->vlan_etype; + + match->key.vlan.vlan_id = + ntohs(ext_h_spec->vlan_tci) & 0x0fff; + match->mask.vlan.vlan_id = + ntohs(ext_m_spec->vlan_tci) & 0x0fff; + + match->key.vlan.vlan_priority = + (ntohs(ext_h_spec->vlan_tci) & 0xe000) >> 13; + match->mask.vlan.vlan_priority = + (ntohs(ext_m_spec->vlan_tci) & 0xe000) >> 13; + + match->dissector.used_keys |= + BIT(FLOW_DISSECTOR_KEY_VLAN); + match->dissector.offset[FLOW_DISSECTOR_KEY_VLAN] = + offsetof(struct ethtool_rx_flow_key, vlan); + } + } + if (fs->flow_type & FLOW_MAC_EXT) { + const struct ethtool_flow_ext *ext_h_spec = &fs->h_ext; + const struct ethtool_flow_ext *ext_m_spec = &fs->m_ext; + + if (ext_m_spec->h_dest) { + memcpy(match->key.eth_addrs.dst, ext_h_spec->h_dest, + ETH_ALEN); + memcpy(match->mask.eth_addrs.dst, ext_m_spec->h_dest, + ETH_ALEN); + + match->dissector.used_keys |= + BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS); + match->dissector.offset[FLOW_DISSECTOR_KEY_ETH_ADDRS] = + offsetof(struct ethtool_rx_flow_key, eth_addrs); + } + } + + act = &flow->rule->action.entries[0]; + switch (fs->ring_cookie) { + case RX_CLS_FLOW_DISC: + act->id = FLOW_ACTION_DROP; + break; + case RX_CLS_FLOW_WAKE: + act->id = FLOW_ACTION_WAKE; + break; + default: + act->id = FLOW_ACTION_QUEUE; + if (fs->flow_type & FLOW_RSS) + act->queue.ctx = input->rss_ctx; + + act->queue.vf = ethtool_get_flow_spec_ring_vf(fs->ring_cookie); + act->queue.index = ethtool_get_flow_spec_ring(fs->ring_cookie); + break; + } + + return flow; +} +EXPORT_SYMBOL(ethtool_rx_flow_rule_create); + +void ethtool_rx_flow_rule_destroy(struct ethtool_rx_flow_rule *flow) +{ + kfree(flow->rule); + kfree(flow); +} +EXPORT_SYMBOL(ethtool_rx_flow_rule_destroy); -- cgit v1.2.3 From d6abc5969463359c366d459247b90366fcd6f5c5 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 6 Feb 2019 09:45:35 -0800 Subject: net: Introduce ndo_get_port_parent_id() In preparation for getting rid of switchdev_ops, create a dedicated NDO operation for getting the port's parent identifier. There are essentially two classes of drivers that need to implement getting the port's parent ID which are VF/PF drivers with a built-in switch, and pure switchdev drivers such as mlxsw, ocelot, dsa etc. We introduce a helper function: dev_get_port_parent_id() which supports recursion into the lower devices to obtain the first port's parent ID. Convert the bridge, core and ipv4 multicast routing code to check for such ndo_get_port_parent_id() and call the helper function when valid before falling back to switchdev_port_attr_get(). This will allow us to convert all relevant drivers in one go instead of having to implement both switchdev_port_attr_get() and ndo_get_port_parent_id() operations, then get rid of switchdev_port_attr_get(). Acked-by: Jiri Pirko Signed-off-by: Florian Fainelli Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- include/linux/netdevice.h | 9 ++++++++ net/bridge/br_switchdev.c | 9 ++++++-- net/core/dev.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++ net/core/net-sysfs.c | 7 +++++- net/core/rtnetlink.c | 6 ++++- net/ipv4/ipmr.c | 8 ++++++- 6 files changed, 91 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ba57d0ba425e..1d95e634f3fe 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1188,6 +1188,10 @@ struct dev_ifalias { * not implement this, it is assumed that the hw is not able to have * multiple net devices on single physical port. * + * int (*ndo_get_port_parent_id)(struct net_device *dev, + * struct netdev_phys_item_id *ppid) + * Called to get the parent ID of the physical port of this device. + * * void (*ndo_udp_tunnel_add)(struct net_device *dev, * struct udp_tunnel_info *ti); * Called by UDP tunnel to notify a driver about the UDP port and socket @@ -1412,6 +1416,8 @@ struct net_device_ops { bool new_carrier); int (*ndo_get_phys_port_id)(struct net_device *dev, struct netdev_phys_item_id *ppid); + int (*ndo_get_port_parent_id)(struct net_device *dev, + struct netdev_phys_item_id *ppid); int (*ndo_get_phys_port_name)(struct net_device *dev, char *name, size_t len); void (*ndo_udp_tunnel_add)(struct net_device *dev, @@ -3651,6 +3657,9 @@ int dev_get_phys_port_id(struct net_device *dev, struct netdev_phys_item_id *ppid); int dev_get_phys_port_name(struct net_device *dev, char *name, size_t len); +int dev_get_port_parent_id(struct net_device *dev, + struct netdev_phys_item_id *ppid, bool recurse); +bool netdev_port_same_parent_id(struct net_device *a, struct net_device *b); int dev_change_proto_down(struct net_device *dev, bool proto_down); struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev, bool *again); struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c index 4d2b9eb7604a..06b0ae44585f 100644 --- a/net/bridge/br_switchdev.c +++ b/net/bridge/br_switchdev.c @@ -14,7 +14,8 @@ static int br_switchdev_mark_get(struct net_bridge *br, struct net_device *dev) /* dev is yet to be added to the port list. */ list_for_each_entry(p, &br->port_list, list) { - if (switchdev_port_same_parent_id(dev, p->dev)) + if (netdev_port_same_parent_id(dev, p->dev) || + switchdev_port_same_parent_id(dev, p->dev)) return p->offload_fwd_mark; } @@ -23,6 +24,7 @@ static int br_switchdev_mark_get(struct net_bridge *br, struct net_device *dev) int nbp_switchdev_mark_set(struct net_bridge_port *p) { + const struct net_device_ops *ops = p->dev->netdev_ops; struct switchdev_attr attr = { .orig_dev = p->dev, .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID, @@ -31,7 +33,10 @@ int nbp_switchdev_mark_set(struct net_bridge_port *p) ASSERT_RTNL(); - err = switchdev_port_attr_get(p->dev, &attr); + if (ops->ndo_get_port_parent_id) + err = dev_get_port_parent_id(p->dev, &attr.u.ppid, true); + else + err = switchdev_port_attr_get(p->dev, &attr); if (err) { if (err == -EOPNOTSUPP) return 0; diff --git a/net/core/dev.c b/net/core/dev.c index bfa4be42afff..8c6d5cf8a308 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -7877,6 +7877,63 @@ int dev_get_phys_port_name(struct net_device *dev, } EXPORT_SYMBOL(dev_get_phys_port_name); +/** + * dev_get_port_parent_id - Get the device's port parent identifier + * @dev: network device + * @ppid: pointer to a storage for the port's parent identifier + * @recurse: allow/disallow recursion to lower devices + * + * Get the devices's port parent identifier + */ +int dev_get_port_parent_id(struct net_device *dev, + struct netdev_phys_item_id *ppid, + bool recurse) +{ + const struct net_device_ops *ops = dev->netdev_ops; + struct netdev_phys_item_id first = { }; + struct net_device *lower_dev; + struct list_head *iter; + int err = -EOPNOTSUPP; + + if (ops->ndo_get_port_parent_id) + return ops->ndo_get_port_parent_id(dev, ppid); + + if (!recurse) + return err; + + netdev_for_each_lower_dev(dev, lower_dev, iter) { + err = dev_get_port_parent_id(lower_dev, ppid, recurse); + if (err) + break; + if (!first.id_len) + first = *ppid; + else if (memcmp(&first, ppid, sizeof(*ppid))) + return -ENODATA; + } + + return err; +} +EXPORT_SYMBOL(dev_get_port_parent_id); + +/** + * netdev_port_same_parent_id - Indicate if two network devices have + * the same port parent identifier + * @a: first network device + * @b: second network device + */ +bool netdev_port_same_parent_id(struct net_device *a, struct net_device *b) +{ + struct netdev_phys_item_id a_id = { }; + struct netdev_phys_item_id b_id = { }; + + if (dev_get_port_parent_id(a, &a_id, true) || + dev_get_port_parent_id(b, &b_id, true)) + return false; + + return netdev_phys_item_id_same(&a_id, &b_id); +} +EXPORT_SYMBOL(netdev_port_same_parent_id); + /** * dev_change_proto_down - update protocol port state information * @dev: device diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index ff9fd2bb4ce4..4eace9f1dcf9 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -495,6 +495,7 @@ static ssize_t phys_switch_id_show(struct device *dev, struct device_attribute *attr, char *buf) { struct net_device *netdev = to_net_dev(dev); + const struct net_device_ops *ops = netdev->netdev_ops; ssize_t ret = -EINVAL; if (!rtnl_trylock()) @@ -507,7 +508,11 @@ static ssize_t phys_switch_id_show(struct device *dev, .flags = SWITCHDEV_F_NO_RECURSE, }; - ret = switchdev_port_attr_get(netdev, &attr); + if (ops->ndo_get_port_parent_id) + ret = dev_get_port_parent_id(netdev, &attr.u.ppid, + false); + else + ret = switchdev_port_attr_get(netdev, &attr); if (!ret) ret = sprintf(buf, "%*phN\n", attr.u.ppid.id_len, attr.u.ppid.id); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index f5a98082ac7a..90dd02c1f561 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1146,6 +1146,7 @@ static int rtnl_phys_port_name_fill(struct sk_buff *skb, struct net_device *dev) static int rtnl_phys_switch_id_fill(struct sk_buff *skb, struct net_device *dev) { + const struct net_device_ops *ops = dev->netdev_ops; int err; struct switchdev_attr attr = { .orig_dev = dev, @@ -1153,7 +1154,10 @@ static int rtnl_phys_switch_id_fill(struct sk_buff *skb, struct net_device *dev) .flags = SWITCHDEV_F_NO_RECURSE, }; - err = switchdev_port_attr_get(dev, &attr); + if (ops->ndo_get_port_parent_id) + err = dev_get_port_parent_id(dev, &attr.u.ppid, false); + else + err = switchdev_port_attr_get(dev, &attr); if (err) { if (err == -EOPNOTSUPP) return 0; diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index fb99002c3d4e..c71bcc42d66d 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -837,6 +837,7 @@ static void ipmr_update_thresholds(struct mr_table *mrt, struct mr_mfc *cache, static int vif_add(struct net *net, struct mr_table *mrt, struct vifctl *vifc, int mrtsock) { + const struct net_device_ops *ops; int vifi = vifc->vifc_vifi; struct switchdev_attr attr = { .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID, @@ -920,7 +921,12 @@ static int vif_add(struct net *net, struct mr_table *mrt, (VIFF_TUNNEL | VIFF_REGISTER)); attr.orig_dev = dev; - if (!switchdev_port_attr_get(dev, &attr)) { + ops = dev->netdev_ops; + if (ops->ndo_get_port_parent_id && + !dev_get_port_parent_id(dev, &attr.u.ppid, true)) { + memcpy(v->dev_parent_id.id, attr.u.ppid.id, attr.u.ppid.id_len); + v->dev_parent_id.id_len = attr.u.ppid.id_len; + } else if (!switchdev_port_attr_get(dev, &attr)) { memcpy(v->dev_parent_id.id, attr.u.ppid.id, attr.u.ppid.id_len); v->dev_parent_id.id_len = attr.u.ppid.id_len; } else { -- cgit v1.2.3 From 4d5f007eedb74d71a7bde2bff69b6a31ad8ab427 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 2 Jan 2019 13:28:47 +0100 Subject: time: make adjtime compat handling available for 32 bit We want to reuse the compat_timex handling on 32-bit architectures the same way we are using the compat handling for timespec when moving to 64-bit time_t. Move all definitions related to compat_timex out of the compat code into the normal timekeeping code, along with a rename to old_timex32, corresponding to the timespec/timeval structures, and make it controlled by CONFIG_COMPAT_32BIT_TIME, which 32-bit architectures will then select. Signed-off-by: Arnd Bergmann --- include/linux/compat.h | 35 ++--------------------- include/linux/time32.h | 32 ++++++++++++++++++++- kernel/compat.c | 64 ------------------------------------------ kernel/time/posix-timers.c | 14 ++-------- kernel/time/time.c | 70 +++++++++++++++++++++++++++++++++++++++++++--- 5 files changed, 102 insertions(+), 113 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index 056be0d03722..657ca6abd855 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -132,37 +132,6 @@ struct compat_tms { compat_clock_t tms_cstime; }; -struct compat_timex { - compat_uint_t modes; - compat_long_t offset; - compat_long_t freq; - compat_long_t maxerror; - compat_long_t esterror; - compat_int_t status; - compat_long_t constant; - compat_long_t precision; - compat_long_t tolerance; - struct old_timeval32 time; - compat_long_t tick; - compat_long_t ppsfreq; - compat_long_t jitter; - compat_int_t shift; - compat_long_t stabil; - compat_long_t jitcnt; - compat_long_t calcnt; - compat_long_t errcnt; - compat_long_t stbcnt; - compat_int_t tai; - - compat_int_t:32; compat_int_t:32; compat_int_t:32; compat_int_t:32; - compat_int_t:32; compat_int_t:32; compat_int_t:32; compat_int_t:32; - compat_int_t:32; compat_int_t:32; compat_int_t:32; -}; - -struct timex; -int compat_get_timex(struct timex *, const struct compat_timex __user *); -int compat_put_timex(struct compat_timex __user *, const struct timex *); - #define _COMPAT_NSIG_WORDS (_COMPAT_NSIG / _COMPAT_NSIG_BPW) typedef struct { @@ -808,7 +777,7 @@ asmlinkage long compat_sys_gettimeofday(struct old_timeval32 __user *tv, struct timezone __user *tz); asmlinkage long compat_sys_settimeofday(struct old_timeval32 __user *tv, struct timezone __user *tz); -asmlinkage long compat_sys_adjtimex(struct compat_timex __user *utp); +asmlinkage long compat_sys_adjtimex(struct old_timex32 __user *utp); /* kernel/timer.c */ asmlinkage long compat_sys_sysinfo(struct compat_sysinfo __user *info); @@ -911,7 +880,7 @@ asmlinkage long compat_sys_open_by_handle_at(int mountdirfd, struct file_handle __user *handle, int flags); asmlinkage long compat_sys_clock_adjtime(clockid_t which_clock, - struct compat_timex __user *tp); + struct old_timex32 __user *tp); asmlinkage long compat_sys_sendmmsg(int fd, struct compat_mmsghdr __user *mmsg, unsigned vlen, unsigned int flags); asmlinkage ssize_t compat_sys_process_vm_readv(compat_pid_t pid, diff --git a/include/linux/time32.h b/include/linux/time32.h index 118b9977080c..820a22e2b98b 100644 --- a/include/linux/time32.h +++ b/include/linux/time32.h @@ -10,6 +10,7 @@ */ #include +#include #define TIME_T_MAX (time_t)((1UL << ((sizeof(time_t) << 3) - 1)) - 1) @@ -35,13 +36,42 @@ struct old_utimbuf32 { old_time32_t modtime; }; +struct old_timex32 { + u32 modes; + s32 offset; + s32 freq; + s32 maxerror; + s32 esterror; + s32 status; + s32 constant; + s32 precision; + s32 tolerance; + struct old_timeval32 time; + s32 tick; + s32 ppsfreq; + s32 jitter; + s32 shift; + s32 stabil; + s32 jitcnt; + s32 calcnt; + s32 errcnt; + s32 stbcnt; + s32 tai; + + s32:32; s32:32; s32:32; s32:32; + s32:32; s32:32; s32:32; s32:32; + s32:32; s32:32; s32:32; +}; + extern int get_old_timespec32(struct timespec64 *, const void __user *); extern int put_old_timespec32(const struct timespec64 *, void __user *); extern int get_old_itimerspec32(struct itimerspec64 *its, const struct old_itimerspec32 __user *uits); extern int put_old_itimerspec32(const struct itimerspec64 *its, struct old_itimerspec32 __user *uits); - +struct timex; +int get_old_timex32(struct timex *, const struct old_timex32 __user *); +int put_old_timex32(struct old_timex32 __user *, const struct timex *); #if __BITS_PER_LONG == 64 diff --git a/kernel/compat.c b/kernel/compat.c index f01affa17e22..d8a36c6ad7c9 100644 --- a/kernel/compat.c +++ b/kernel/compat.c @@ -20,7 +20,6 @@ #include #include #include -#include #include #include #include @@ -30,69 +29,6 @@ #include -int compat_get_timex(struct timex *txc, const struct compat_timex __user *utp) -{ - struct compat_timex tx32; - - memset(txc, 0, sizeof(struct timex)); - if (copy_from_user(&tx32, utp, sizeof(struct compat_timex))) - return -EFAULT; - - txc->modes = tx32.modes; - txc->offset = tx32.offset; - txc->freq = tx32.freq; - txc->maxerror = tx32.maxerror; - txc->esterror = tx32.esterror; - txc->status = tx32.status; - txc->constant = tx32.constant; - txc->precision = tx32.precision; - txc->tolerance = tx32.tolerance; - txc->time.tv_sec = tx32.time.tv_sec; - txc->time.tv_usec = tx32.time.tv_usec; - txc->tick = tx32.tick; - txc->ppsfreq = tx32.ppsfreq; - txc->jitter = tx32.jitter; - txc->shift = tx32.shift; - txc->stabil = tx32.stabil; - txc->jitcnt = tx32.jitcnt; - txc->calcnt = tx32.calcnt; - txc->errcnt = tx32.errcnt; - txc->stbcnt = tx32.stbcnt; - - return 0; -} - -int compat_put_timex(struct compat_timex __user *utp, const struct timex *txc) -{ - struct compat_timex tx32; - - memset(&tx32, 0, sizeof(struct compat_timex)); - tx32.modes = txc->modes; - tx32.offset = txc->offset; - tx32.freq = txc->freq; - tx32.maxerror = txc->maxerror; - tx32.esterror = txc->esterror; - tx32.status = txc->status; - tx32.constant = txc->constant; - tx32.precision = txc->precision; - tx32.tolerance = txc->tolerance; - tx32.time.tv_sec = txc->time.tv_sec; - tx32.time.tv_usec = txc->time.tv_usec; - tx32.tick = txc->tick; - tx32.ppsfreq = txc->ppsfreq; - tx32.jitter = txc->jitter; - tx32.shift = txc->shift; - tx32.stabil = txc->stabil; - tx32.jitcnt = txc->jitcnt; - tx32.calcnt = txc->calcnt; - tx32.errcnt = txc->errcnt; - tx32.stbcnt = txc->stbcnt; - tx32.tai = txc->tai; - if (copy_to_user(utp, &tx32, sizeof(struct compat_timex))) - return -EFAULT; - return 0; -} - static int __compat_get_timeval(struct timeval *tv, const struct old_timeval32 __user *ctv) { return (!access_ok(ctv, sizeof(*ctv)) || diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index 0e84bb72a3da..8955f32f2a36 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -1123,12 +1123,8 @@ COMPAT_SYSCALL_DEFINE2(clock_gettime, clockid_t, which_clock, return err; } -#endif - -#ifdef CONFIG_COMPAT - COMPAT_SYSCALL_DEFINE2(clock_adjtime, clockid_t, which_clock, - struct compat_timex __user *, utp) + struct old_timex32 __user *, utp) { const struct k_clock *kc = clockid_to_kclock(which_clock); struct timex ktx; @@ -1139,22 +1135,18 @@ COMPAT_SYSCALL_DEFINE2(clock_adjtime, clockid_t, which_clock, if (!kc->clock_adj) return -EOPNOTSUPP; - err = compat_get_timex(&ktx, utp); + err = get_old_timex32(&ktx, utp); if (err) return err; err = kc->clock_adj(which_clock, &ktx); if (err >= 0) - err = compat_put_timex(utp, &ktx); + err = put_old_timex32(utp, &ktx); return err; } -#endif - -#ifdef CONFIG_COMPAT_32BIT_TIME - COMPAT_SYSCALL_DEFINE2(clock_getres, clockid_t, which_clock, struct old_timespec32 __user *, tp) { diff --git a/kernel/time/time.c b/kernel/time/time.c index 2edb5088a70b..2d013bc2b271 100644 --- a/kernel/time/time.c +++ b/kernel/time/time.c @@ -278,20 +278,82 @@ SYSCALL_DEFINE1(adjtimex, struct timex __user *, txc_p) return copy_to_user(txc_p, &txc, sizeof(struct timex)) ? -EFAULT : ret; } -#ifdef CONFIG_COMPAT +#ifdef CONFIG_COMPAT_32BIT_TIME +int get_old_timex32(struct timex *txc, const struct old_timex32 __user *utp) +{ + struct old_timex32 tx32; + + memset(txc, 0, sizeof(struct timex)); + if (copy_from_user(&tx32, utp, sizeof(struct old_timex32))) + return -EFAULT; + + txc->modes = tx32.modes; + txc->offset = tx32.offset; + txc->freq = tx32.freq; + txc->maxerror = tx32.maxerror; + txc->esterror = tx32.esterror; + txc->status = tx32.status; + txc->constant = tx32.constant; + txc->precision = tx32.precision; + txc->tolerance = tx32.tolerance; + txc->time.tv_sec = tx32.time.tv_sec; + txc->time.tv_usec = tx32.time.tv_usec; + txc->tick = tx32.tick; + txc->ppsfreq = tx32.ppsfreq; + txc->jitter = tx32.jitter; + txc->shift = tx32.shift; + txc->stabil = tx32.stabil; + txc->jitcnt = tx32.jitcnt; + txc->calcnt = tx32.calcnt; + txc->errcnt = tx32.errcnt; + txc->stbcnt = tx32.stbcnt; + + return 0; +} + +int put_old_timex32(struct old_timex32 __user *utp, const struct timex *txc) +{ + struct old_timex32 tx32; + + memset(&tx32, 0, sizeof(struct old_timex32)); + tx32.modes = txc->modes; + tx32.offset = txc->offset; + tx32.freq = txc->freq; + tx32.maxerror = txc->maxerror; + tx32.esterror = txc->esterror; + tx32.status = txc->status; + tx32.constant = txc->constant; + tx32.precision = txc->precision; + tx32.tolerance = txc->tolerance; + tx32.time.tv_sec = txc->time.tv_sec; + tx32.time.tv_usec = txc->time.tv_usec; + tx32.tick = txc->tick; + tx32.ppsfreq = txc->ppsfreq; + tx32.jitter = txc->jitter; + tx32.shift = txc->shift; + tx32.stabil = txc->stabil; + tx32.jitcnt = txc->jitcnt; + tx32.calcnt = txc->calcnt; + tx32.errcnt = txc->errcnt; + tx32.stbcnt = txc->stbcnt; + tx32.tai = txc->tai; + if (copy_to_user(utp, &tx32, sizeof(struct old_timex32))) + return -EFAULT; + return 0; +} -COMPAT_SYSCALL_DEFINE1(adjtimex, struct compat_timex __user *, utp) +COMPAT_SYSCALL_DEFINE1(adjtimex, struct old_timex32 __user *, utp) { struct timex txc; int err, ret; - err = compat_get_timex(&txc, utp); + err = get_old_timex32(&txc, utp); if (err) return err; ret = do_adjtimex(&txc); - err = compat_put_timex(utp, &txc); + err = put_old_timex32(utp, &txc); if (err) return err; -- cgit v1.2.3 From 2c620ff93d9fbd5d644760d4c21d389078ec1080 Mon Sep 17 00:00:00 2001 From: Deepa Dinamani Date: Mon, 2 Jul 2018 22:44:20 -0700 Subject: time: Add struct __kernel_timex struct timex uses struct timeval internally. struct timeval is not y2038 safe. Introduce a new UAPI type struct __kernel_timex that is y2038 safe. struct __kernel_timex uses a timeval type that is similar to struct __kernel_timespec which preserves the same structure size across 32 bit and 64 bit ABIs. struct __kernel_timex also restructures other members of the structure to make the structure the same on 64 bit and 32 bit architectures. Note that struct __kernel_timex is the same as struct timex on a 64 bit architecture. The above solution is similar to other new y2038 syscalls that are being introduced: both 32 bit and 64 bit ABIs have a common entry, and the compat entry supports the old 32 bit syscall interface. Alternatives considered were: 1. Add new time type to struct timex that makes use of padded bits. This time type could be based on the struct __kernel_timespec. modes will use a flag to notify which time structure should be used internally. This needs some application level changes on both 64 bit and 32 bit architectures. Although 64 bit machines could continue to use the older timeval structure without any changes. 2. Add a new u8 type to struct timex that makes use of padded bits. This can be used to save higher order tv_sec bits. modes will use a flag to notify presence of such a type. This will need some application level changes on 32 bit architectures. 3. Add a new compat_timex structure that differs in only the size of the time type; keep rest of struct timex the same. This requires extra syscalls to manage all 3 cases on 64 bit architectures. This will not need any application level changes but will add more complexity from kernel side. Signed-off-by: Deepa Dinamani Signed-off-by: Arnd Bergmann --- include/linux/timex.h | 7 +++++++ include/uapi/linux/timex.h | 41 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+) (limited to 'include/linux') diff --git a/include/linux/timex.h b/include/linux/timex.h index 39c25dbebfe8..7f40e9e42ecc 100644 --- a/include/linux/timex.h +++ b/include/linux/timex.h @@ -53,6 +53,13 @@ #ifndef _LINUX_TIMEX_H #define _LINUX_TIMEX_H +/* CONFIG_64BIT_TIME enables new 64 bit time_t syscalls in the compat path + * and 32-bit emulation. + */ +#ifndef CONFIG_64BIT_TIME +#define __kernel_timex timex +#endif + #include #define ADJ_ADJTIME 0x8000 /* switch between adjtime/adjtimex modes */ diff --git a/include/uapi/linux/timex.h b/include/uapi/linux/timex.h index 92685d826444..a1c6b73016a5 100644 --- a/include/uapi/linux/timex.h +++ b/include/uapi/linux/timex.h @@ -92,6 +92,47 @@ struct timex { int :32; int :32; int :32; }; +struct __kernel_timex_timeval { + __kernel_time64_t tv_sec; + long long tv_usec; +}; + +#ifndef __kernel_timex +struct __kernel_timex { + unsigned int modes; /* mode selector */ + int :32; /* pad */ + long long offset; /* time offset (usec) */ + long long freq; /* frequency offset (scaled ppm) */ + long long maxerror;/* maximum error (usec) */ + long long esterror;/* estimated error (usec) */ + int status; /* clock command/status */ + int :32; /* pad */ + long long constant;/* pll time constant */ + long long precision;/* clock precision (usec) (read only) */ + long long tolerance;/* clock frequency tolerance (ppm) + * (read only) + */ + struct __kernel_timex_timeval time; /* (read only, except for ADJ_SETOFFSET) */ + long long tick; /* (modified) usecs between clock ticks */ + + long long ppsfreq;/* pps frequency (scaled ppm) (ro) */ + long long jitter; /* pps jitter (us) (ro) */ + int shift; /* interval duration (s) (shift) (ro) */ + int :32; /* pad */ + long long stabil; /* pps stability (scaled ppm) (ro) */ + long long jitcnt; /* jitter limit exceeded (ro) */ + long long calcnt; /* calibration intervals (ro) */ + long long errcnt; /* calibration errors (ro) */ + long long stbcnt; /* stability limit exceeded (ro) */ + + int tai; /* TAI offset (ro) */ + + int :32; int :32; int :32; int :32; + int :32; int :32; int :32; int :32; + int :32; int :32; int :32; +}; +#endif + /* * Mode codes (timex.mode) */ -- cgit v1.2.3 From 50b93f30f6d8672f9ec80e90af94d733f11a20e0 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 1 Jan 2019 17:34:39 +0100 Subject: time: fix sys_timer_settime prototype A small typo has crept into the y2038 conversion of the timer_settime system call. So far this was completely harmless, but once we start using the new version, this has to be fixed. Fixes: 6ff847350702 ("time: Change types to new y2038 safe __kernel_itimerspec") Signed-off-by: Arnd Bergmann --- include/linux/syscalls.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 938d8908b9e0..baa4b70b02d3 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -591,7 +591,7 @@ asmlinkage long sys_timer_gettime(timer_t timer_id, asmlinkage long sys_timer_getoverrun(timer_t timer_id); asmlinkage long sys_timer_settime(timer_t timer_id, int flags, const struct __kernel_itimerspec __user *new_setting, - struct itimerspec __user *old_setting); + struct __kernel_itimerspec __user *old_setting); asmlinkage long sys_timer_delete(timer_t timer_id); asmlinkage long sys_clock_settime(clockid_t which_clock, const struct __kernel_timespec __user *tp); -- cgit v1.2.3 From 1a596398a3d75f966b75f428e992cf1f242f9a5b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 3 Jan 2019 21:12:39 +0100 Subject: sparc64: add custom adjtimex/clock_adjtime functions sparc64 is the only architecture on Linux that has a 'timeval' definition with a 32-bit tv_usec but a 64-bit tv_sec. This causes problems for sparc32 compat mode when we convert it to use the new __kernel_timex type that has the same layout as all other 64-bit architectures. To avoid adding sparc64 specific code into the generic adjtimex implementation, this adds a wrapper in the sparc64 system call handling that converts the sparc64 'timex' into the new '__kernel_timex'. At this point, the two structures are defined to be identical, but that will change in the next step once we convert sparc32. Signed-off-by: Arnd Bergmann --- arch/sparc/kernel/sys_sparc_64.c | 59 +++++++++++++++++++++++++++++++++- arch/sparc/kernel/syscalls/syscall.tbl | 6 ++-- include/linux/timex.h | 2 ++ kernel/time/posix-timers.c | 24 +++++++------- 4 files changed, 76 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c index 1c079e7bab09..37de18a11207 100644 --- a/arch/sparc/kernel/sys_sparc_64.c +++ b/arch/sparc/kernel/sys_sparc_64.c @@ -28,8 +28,9 @@ #include #include #include - +#include #include + #include #include @@ -544,6 +545,62 @@ out_unlock: return err; } +SYSCALL_DEFINE1(sparc_adjtimex, struct timex __user *, txc_p) +{ + struct timex txc; /* Local copy of parameter */ + struct timex *kt = (void *)&txc; + int ret; + + /* Copy the user data space into the kernel copy + * structure. But bear in mind that the structures + * may change + */ + if (copy_from_user(&txc, txc_p, sizeof(struct timex))) + return -EFAULT; + + /* + * override for sparc64 specific timeval type: tv_usec + * is 32 bit wide instead of 64-bit in __kernel_timex + */ + kt->time.tv_usec = txc.time.tv_usec; + ret = do_adjtimex(kt); + txc.time.tv_usec = kt->time.tv_usec; + + return copy_to_user(txc_p, &txc, sizeof(struct timex)) ? -EFAULT : ret; +} + +SYSCALL_DEFINE2(sparc_clock_adjtime, const clockid_t, which_clock,struct timex __user *, txc_p) +{ + struct timex txc; /* Local copy of parameter */ + struct timex *kt = (void *)&txc; + int ret; + + if (!IS_ENABLED(CONFIG_POSIX_TIMERS)) { + pr_err_once("process %d (%s) attempted a POSIX timer syscall " + "while CONFIG_POSIX_TIMERS is not set\n", + current->pid, current->comm); + + return -ENOSYS; + } + + /* Copy the user data space into the kernel copy + * structure. But bear in mind that the structures + * may change + */ + if (copy_from_user(&txc, txc_p, sizeof(struct timex))) + return -EFAULT; + + /* + * override for sparc64 specific timeval type: tv_usec + * is 32 bit wide instead of 64-bit in __kernel_timex + */ + kt->time.tv_usec = txc.time.tv_usec; + ret = do_clock_adjtime(which_clock, kt); + txc.time.tv_usec = kt->time.tv_usec; + + return copy_to_user(txc_p, &txc, sizeof(struct timex)) ? -EFAULT : ret; +} + SYSCALL_DEFINE5(utrap_install, utrap_entry_t, type, utrap_handler_t, new_p, utrap_handler_t, new_d, utrap_handler_t __user *, old_p, diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl index 6992d17cce37..e63cd013cc77 100644 --- a/arch/sparc/kernel/syscalls/syscall.tbl +++ b/arch/sparc/kernel/syscalls/syscall.tbl @@ -258,7 +258,8 @@ 216 64 sigreturn sys_nis_syscall 217 common clone sys_clone 218 common ioprio_get sys_ioprio_get -219 common adjtimex sys_adjtimex compat_sys_adjtimex +219 32 adjtimex sys_adjtimex compat_sys_adjtimex +219 64 adjtimex sys_sparc_adjtimex 220 32 sigprocmask sys_sigprocmask compat_sys_sigprocmask 220 64 sigprocmask sys_nis_syscall 221 common create_module sys_ni_syscall @@ -377,7 +378,8 @@ 331 common prlimit64 sys_prlimit64 332 common name_to_handle_at sys_name_to_handle_at 333 common open_by_handle_at sys_open_by_handle_at compat_sys_open_by_handle_at -334 common clock_adjtime sys_clock_adjtime compat_sys_clock_adjtime +334 32 clock_adjtime sys_clock_adjtime compat_sys_clock_adjtime +334 64 clock_adjtime sys_sparc_clock_adjtime 335 common syncfs sys_syncfs 336 common sendmmsg sys_sendmmsg compat_sys_sendmmsg 337 common setns sys_setns diff --git a/include/linux/timex.h b/include/linux/timex.h index 7f40e9e42ecc..a15e6aeb8d49 100644 --- a/include/linux/timex.h +++ b/include/linux/timex.h @@ -159,6 +159,8 @@ extern unsigned long tick_nsec; /* SHIFTED_HZ period (nsec) */ #define NTP_INTERVAL_LENGTH (NSEC_PER_SEC/NTP_INTERVAL_FREQ) extern int do_adjtimex(struct timex *); +extern int do_clock_adjtime(const clockid_t which_clock, struct timex * ktx); + extern void hardpps(const struct timespec64 *, const struct timespec64 *); int read_current_timer(unsigned long *timer_val); diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index 8955f32f2a36..8f7f1dd95940 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -1047,22 +1047,28 @@ SYSCALL_DEFINE2(clock_gettime, const clockid_t, which_clock, return error; } -SYSCALL_DEFINE2(clock_adjtime, const clockid_t, which_clock, - struct timex __user *, utx) +int do_clock_adjtime(const clockid_t which_clock, struct timex * ktx) { const struct k_clock *kc = clockid_to_kclock(which_clock); - struct timex ktx; - int err; if (!kc) return -EINVAL; if (!kc->clock_adj) return -EOPNOTSUPP; + return kc->clock_adj(which_clock, ktx); +} + +SYSCALL_DEFINE2(clock_adjtime, const clockid_t, which_clock, + struct timex __user *, utx) +{ + struct timex ktx; + int err; + if (copy_from_user(&ktx, utx, sizeof(ktx))) return -EFAULT; - err = kc->clock_adj(which_clock, &ktx); + err = do_clock_adjtime(which_clock, &ktx); if (err >= 0 && copy_to_user(utx, &ktx, sizeof(ktx))) return -EFAULT; @@ -1126,20 +1132,14 @@ COMPAT_SYSCALL_DEFINE2(clock_gettime, clockid_t, which_clock, COMPAT_SYSCALL_DEFINE2(clock_adjtime, clockid_t, which_clock, struct old_timex32 __user *, utp) { - const struct k_clock *kc = clockid_to_kclock(which_clock); struct timex ktx; int err; - if (!kc) - return -EINVAL; - if (!kc->clock_adj) - return -EOPNOTSUPP; - err = get_old_timex32(&ktx, utp); if (err) return err; - err = kc->clock_adj(which_clock, &ktx); + err = do_clock_adjtime(which_clock, &ktx); if (err >= 0) err = put_old_timex32(utp, &ktx); -- cgit v1.2.3 From ead25417f82ed7f8a21da4dcefc768169f7da884 Mon Sep 17 00:00:00 2001 From: Deepa Dinamani Date: Mon, 2 Jul 2018 22:44:21 -0700 Subject: timex: use __kernel_timex internally struct timex is not y2038 safe. Replace all uses of timex with y2038 safe __kernel_timex. Note that struct __kernel_timex is an ABI interface definition. We could define a new structure based on __kernel_timex that is only available internally instead. Right now, there isn't a strong motivation for this as the structure is isolated to a few defined struct timex interfaces and such a structure would be exactly the same as struct timex. The patch was generated by the following coccinelle script: virtual patch @depends on patch forall@ identifier ts; expression e; @@ ( - struct timex ts; + struct __kernel_timex ts; | - struct timex ts = {}; + struct __kernel_timex ts = {}; | - struct timex ts = e; + struct __kernel_timex ts = e; | - struct timex *ts; + struct __kernel_timex *ts; | (memset \| copy_from_user \| copy_to_user \)(..., - sizeof(struct timex)) + sizeof(struct __kernel_timex)) ) @depends on patch forall@ identifier ts; identifier fn; @@ fn(..., - struct timex *ts, + struct __kernel_timex *ts, ...) { ... } @depends on patch forall@ identifier ts; identifier fn; @@ fn(..., - struct timex *ts) { + struct __kernel_timex *ts) { ... } Signed-off-by: Deepa Dinamani Cc: linux-alpha@vger.kernel.org Cc: netdev@vger.kernel.org Signed-off-by: Arnd Bergmann --- arch/alpha/kernel/osf_sys.c | 5 +++-- arch/sparc/kernel/sys_sparc_64.c | 4 ++-- drivers/ptp/ptp_clock.c | 2 +- include/linux/posix-clock.h | 2 +- include/linux/time32.h | 6 +++--- include/linux/timex.h | 4 ++-- kernel/time/ntp.c | 18 ++++++++++-------- kernel/time/ntp_internal.h | 2 +- kernel/time/posix-clock.c | 2 +- kernel/time/posix-timers.c | 8 ++++---- kernel/time/posix-timers.h | 2 +- kernel/time/time.c | 14 +++++++------- kernel/time/timekeeping.c | 4 ++-- 13 files changed, 38 insertions(+), 35 deletions(-) (limited to 'include/linux') diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c index 792586038808..bf497b8b0ec6 100644 --- a/arch/alpha/kernel/osf_sys.c +++ b/arch/alpha/kernel/osf_sys.c @@ -1253,7 +1253,7 @@ struct timex32 { SYSCALL_DEFINE1(old_adjtimex, struct timex32 __user *, txc_p) { - struct timex txc; + struct __kernel_timex txc; int ret; /* copy relevant bits of struct timex. */ @@ -1270,7 +1270,8 @@ SYSCALL_DEFINE1(old_adjtimex, struct timex32 __user *, txc_p) if (copy_to_user(txc_p, &txc, offsetof(struct timex32, time)) || (copy_to_user(&txc_p->tick, &txc.tick, sizeof(struct timex32) - offsetof(struct timex32, tick))) || - (put_tv_to_tv32(&txc_p->time, &txc.time))) + (put_user(txc.time.tv_sec, &txc_p->time.tv_sec)) || + (put_user(txc.time.tv_usec, &txc_p->time.tv_usec))) return -EFAULT; return ret; diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c index 37de18a11207..9825ca6a6020 100644 --- a/arch/sparc/kernel/sys_sparc_64.c +++ b/arch/sparc/kernel/sys_sparc_64.c @@ -548,7 +548,7 @@ out_unlock: SYSCALL_DEFINE1(sparc_adjtimex, struct timex __user *, txc_p) { struct timex txc; /* Local copy of parameter */ - struct timex *kt = (void *)&txc; + struct __kernel_timex *kt = (void *)&txc; int ret; /* Copy the user data space into the kernel copy @@ -572,7 +572,7 @@ SYSCALL_DEFINE1(sparc_adjtimex, struct timex __user *, txc_p) SYSCALL_DEFINE2(sparc_clock_adjtime, const clockid_t, which_clock,struct timex __user *, txc_p) { struct timex txc; /* Local copy of parameter */ - struct timex *kt = (void *)&txc; + struct __kernel_timex *kt = (void *)&txc; int ret; if (!IS_ENABLED(CONFIG_POSIX_TIMERS)) { diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c index 48f3594a7458..79bd102c9bbc 100644 --- a/drivers/ptp/ptp_clock.c +++ b/drivers/ptp/ptp_clock.c @@ -124,7 +124,7 @@ static int ptp_clock_gettime(struct posix_clock *pc, struct timespec64 *tp) return err; } -static int ptp_clock_adjtime(struct posix_clock *pc, struct timex *tx) +static int ptp_clock_adjtime(struct posix_clock *pc, struct __kernel_timex *tx) { struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock); struct ptp_clock_info *ops; diff --git a/include/linux/posix-clock.h b/include/linux/posix-clock.h index 3a3bc71017d5..18674d7d5b1c 100644 --- a/include/linux/posix-clock.h +++ b/include/linux/posix-clock.h @@ -51,7 +51,7 @@ struct posix_clock; struct posix_clock_operations { struct module *owner; - int (*clock_adjtime)(struct posix_clock *pc, struct timex *tx); + int (*clock_adjtime)(struct posix_clock *pc, struct __kernel_timex *tx); int (*clock_gettime)(struct posix_clock *pc, struct timespec64 *ts); diff --git a/include/linux/time32.h b/include/linux/time32.h index 820a22e2b98b..0a1f302a1753 100644 --- a/include/linux/time32.h +++ b/include/linux/time32.h @@ -69,9 +69,9 @@ extern int get_old_itimerspec32(struct itimerspec64 *its, const struct old_itimerspec32 __user *uits); extern int put_old_itimerspec32(const struct itimerspec64 *its, struct old_itimerspec32 __user *uits); -struct timex; -int get_old_timex32(struct timex *, const struct old_timex32 __user *); -int put_old_timex32(struct old_timex32 __user *, const struct timex *); +struct __kernel_timex; +int get_old_timex32(struct __kernel_timex *, const struct old_timex32 __user *); +int put_old_timex32(struct old_timex32 __user *, const struct __kernel_timex *); #if __BITS_PER_LONG == 64 diff --git a/include/linux/timex.h b/include/linux/timex.h index a15e6aeb8d49..4aff9f0d1367 100644 --- a/include/linux/timex.h +++ b/include/linux/timex.h @@ -158,8 +158,8 @@ extern unsigned long tick_nsec; /* SHIFTED_HZ period (nsec) */ #define NTP_INTERVAL_FREQ (HZ) #define NTP_INTERVAL_LENGTH (NSEC_PER_SEC/NTP_INTERVAL_FREQ) -extern int do_adjtimex(struct timex *); -extern int do_clock_adjtime(const clockid_t which_clock, struct timex * ktx); +extern int do_adjtimex(struct __kernel_timex *); +extern int do_clock_adjtime(const clockid_t which_clock, struct __kernel_timex * ktx); extern void hardpps(const struct timespec64 *, const struct timespec64 *); diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 36a2bef00125..92a90014a925 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -188,13 +188,13 @@ static inline int is_error_status(int status) && (status & (STA_PPSWANDER|STA_PPSERROR))); } -static inline void pps_fill_timex(struct timex *txc) +static inline void pps_fill_timex(struct __kernel_timex *txc) { txc->ppsfreq = shift_right((pps_freq >> PPM_SCALE_INV_SHIFT) * PPM_SCALE_INV, NTP_SCALE_SHIFT); txc->jitter = pps_jitter; if (!(time_status & STA_NANO)) - txc->jitter /= NSEC_PER_USEC; + txc->jitter = pps_jitter / NSEC_PER_USEC; txc->shift = pps_shift; txc->stabil = pps_stabil; txc->jitcnt = pps_jitcnt; @@ -220,7 +220,7 @@ static inline int is_error_status(int status) return status & (STA_UNSYNC|STA_CLOCKERR); } -static inline void pps_fill_timex(struct timex *txc) +static inline void pps_fill_timex(struct __kernel_timex *txc) { /* PPS is not implemented, so these are zero */ txc->ppsfreq = 0; @@ -633,7 +633,7 @@ void ntp_notify_cmos_timer(void) /* * Propagate a new txc->status value into the NTP state: */ -static inline void process_adj_status(const struct timex *txc) +static inline void process_adj_status(const struct __kernel_timex *txc) { if ((time_status & STA_PLL) && !(txc->status & STA_PLL)) { time_state = TIME_OK; @@ -656,7 +656,8 @@ static inline void process_adj_status(const struct timex *txc) } -static inline void process_adjtimex_modes(const struct timex *txc, s32 *time_tai) +static inline void process_adjtimex_modes(const struct __kernel_timex *txc, + s32 *time_tai) { if (txc->modes & ADJ_STATUS) process_adj_status(txc); @@ -707,7 +708,8 @@ static inline void process_adjtimex_modes(const struct timex *txc, s32 *time_tai * adjtimex mainly allows reading (and writing, if superuser) of * kernel time-keeping variables. used by xntpd. */ -int __do_adjtimex(struct timex *txc, const struct timespec64 *ts, s32 *time_tai) +int __do_adjtimex(struct __kernel_timex *txc, const struct timespec64 *ts, + s32 *time_tai) { int result; @@ -729,7 +731,7 @@ int __do_adjtimex(struct timex *txc, const struct timespec64 *ts, s32 *time_tai) txc->offset = shift_right(time_offset * NTP_INTERVAL_FREQ, NTP_SCALE_SHIFT); if (!(time_status & STA_NANO)) - txc->offset /= NSEC_PER_USEC; + txc->offset = (u32)txc->offset / NSEC_PER_USEC; } result = time_state; /* mostly `TIME_OK' */ @@ -754,7 +756,7 @@ int __do_adjtimex(struct timex *txc, const struct timespec64 *ts, s32 *time_tai) txc->time.tv_sec = (time_t)ts->tv_sec; txc->time.tv_usec = ts->tv_nsec; if (!(time_status & STA_NANO)) - txc->time.tv_usec /= NSEC_PER_USEC; + txc->time.tv_usec = ts->tv_nsec / NSEC_PER_USEC; /* Handle leapsec adjustments */ if (unlikely(ts->tv_sec >= ntp_next_leap_sec)) { diff --git a/kernel/time/ntp_internal.h b/kernel/time/ntp_internal.h index c24b0e13f011..40e6122e634e 100644 --- a/kernel/time/ntp_internal.h +++ b/kernel/time/ntp_internal.h @@ -8,6 +8,6 @@ extern void ntp_clear(void); extern u64 ntp_tick_length(void); extern ktime_t ntp_get_next_leap(void); extern int second_overflow(time64_t secs); -extern int __do_adjtimex(struct timex *txc, const struct timespec64 *ts, s32 *time_tai); +extern int __do_adjtimex(struct __kernel_timex *txc, const struct timespec64 *ts, s32 *time_tai); extern void __hardpps(const struct timespec64 *phase_ts, const struct timespec64 *raw_ts); #endif /* _LINUX_NTP_INTERNAL_H */ diff --git a/kernel/time/posix-clock.c b/kernel/time/posix-clock.c index 425bbfce6819..ec960bb939fd 100644 --- a/kernel/time/posix-clock.c +++ b/kernel/time/posix-clock.c @@ -228,7 +228,7 @@ static void put_clock_desc(struct posix_clock_desc *cd) fput(cd->fp); } -static int pc_clock_adjtime(clockid_t id, struct timex *tx) +static int pc_clock_adjtime(clockid_t id, struct __kernel_timex *tx) { struct posix_clock_desc cd; int err; diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index 8f7f1dd95940..2d84b3db1ade 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -179,7 +179,7 @@ static int posix_clock_realtime_set(const clockid_t which_clock, } static int posix_clock_realtime_adj(const clockid_t which_clock, - struct timex *t) + struct __kernel_timex *t) { return do_adjtimex(t); } @@ -1047,7 +1047,7 @@ SYSCALL_DEFINE2(clock_gettime, const clockid_t, which_clock, return error; } -int do_clock_adjtime(const clockid_t which_clock, struct timex * ktx) +int do_clock_adjtime(const clockid_t which_clock, struct __kernel_timex * ktx) { const struct k_clock *kc = clockid_to_kclock(which_clock); @@ -1062,7 +1062,7 @@ int do_clock_adjtime(const clockid_t which_clock, struct timex * ktx) SYSCALL_DEFINE2(clock_adjtime, const clockid_t, which_clock, struct timex __user *, utx) { - struct timex ktx; + struct __kernel_timex ktx; int err; if (copy_from_user(&ktx, utx, sizeof(ktx))) @@ -1132,7 +1132,7 @@ COMPAT_SYSCALL_DEFINE2(clock_gettime, clockid_t, which_clock, COMPAT_SYSCALL_DEFINE2(clock_adjtime, clockid_t, which_clock, struct old_timex32 __user *, utp) { - struct timex ktx; + struct __kernel_timex ktx; int err; err = get_old_timex32(&ktx, utp); diff --git a/kernel/time/posix-timers.h b/kernel/time/posix-timers.h index ddb21145211a..de5daa6d975a 100644 --- a/kernel/time/posix-timers.h +++ b/kernel/time/posix-timers.h @@ -8,7 +8,7 @@ struct k_clock { const struct timespec64 *tp); int (*clock_get)(const clockid_t which_clock, struct timespec64 *tp); - int (*clock_adj)(const clockid_t which_clock, struct timex *tx); + int (*clock_adj)(const clockid_t which_clock, struct __kernel_timex *tx); int (*timer_create)(struct k_itimer *timer); int (*nsleep)(const clockid_t which_clock, int flags, const struct timespec64 *); diff --git a/kernel/time/time.c b/kernel/time/time.c index 2d013bc2b271..d179d33f639a 100644 --- a/kernel/time/time.c +++ b/kernel/time/time.c @@ -265,25 +265,25 @@ COMPAT_SYSCALL_DEFINE2(settimeofday, struct old_timeval32 __user *, tv, SYSCALL_DEFINE1(adjtimex, struct timex __user *, txc_p) { - struct timex txc; /* Local copy of parameter */ + struct __kernel_timex txc; /* Local copy of parameter */ int ret; /* Copy the user data space into the kernel copy * structure. But bear in mind that the structures * may change */ - if (copy_from_user(&txc, txc_p, sizeof(struct timex))) + if (copy_from_user(&txc, txc_p, sizeof(struct __kernel_timex))) return -EFAULT; ret = do_adjtimex(&txc); - return copy_to_user(txc_p, &txc, sizeof(struct timex)) ? -EFAULT : ret; + return copy_to_user(txc_p, &txc, sizeof(struct __kernel_timex)) ? -EFAULT : ret; } #ifdef CONFIG_COMPAT_32BIT_TIME -int get_old_timex32(struct timex *txc, const struct old_timex32 __user *utp) +int get_old_timex32(struct __kernel_timex *txc, const struct old_timex32 __user *utp) { struct old_timex32 tx32; - memset(txc, 0, sizeof(struct timex)); + memset(txc, 0, sizeof(struct __kernel_timex)); if (copy_from_user(&tx32, utp, sizeof(struct old_timex32))) return -EFAULT; @@ -311,7 +311,7 @@ int get_old_timex32(struct timex *txc, const struct old_timex32 __user *utp) return 0; } -int put_old_timex32(struct old_timex32 __user *utp, const struct timex *txc) +int put_old_timex32(struct old_timex32 __user *utp, const struct __kernel_timex *txc) { struct old_timex32 tx32; @@ -344,7 +344,7 @@ int put_old_timex32(struct old_timex32 __user *utp, const struct timex *txc) COMPAT_SYSCALL_DEFINE1(adjtimex, struct old_timex32 __user *, utp) { - struct timex txc; + struct __kernel_timex txc; int err, ret; err = get_old_timex32(&txc, utp); diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index ac5dbf2cd4a2..f986e1918d12 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -2234,7 +2234,7 @@ ktime_t ktime_get_update_offsets_now(unsigned int *cwsseq, ktime_t *offs_real, /** * timekeeping_validate_timex - Ensures the timex is ok for use in do_adjtimex */ -static int timekeeping_validate_timex(const struct timex *txc) +static int timekeeping_validate_timex(const struct __kernel_timex *txc) { if (txc->modes & ADJ_ADJTIME) { /* singleshot must not be used with any other mode bits */ @@ -2300,7 +2300,7 @@ static int timekeeping_validate_timex(const struct timex *txc) /** * do_adjtimex() - Accessor function to NTP __do_adjtimex function */ -int do_adjtimex(struct timex *txc) +int do_adjtimex(struct __kernel_timex *txc) { struct timekeeper *tk = &tk_core.timekeeper; unsigned long flags; -- cgit v1.2.3 From 3876ced476c8ec17265d1739467e726ada88b660 Mon Sep 17 00:00:00 2001 From: Deepa Dinamani Date: Mon, 2 Jul 2018 22:44:22 -0700 Subject: timex: change syscalls to use struct __kernel_timex struct timex is not y2038 safe. Switch all the syscall apis to use y2038 safe __kernel_timex. Note that sys_adjtimex() does not have a y2038 safe solution. C libraries can implement it by calling clock_adjtime(CLOCK_REALTIME, ...). Signed-off-by: Deepa Dinamani Signed-off-by: Arnd Bergmann --- include/linux/syscalls.h | 6 +++--- kernel/time/posix-timers.c | 2 +- kernel/time/time.c | 4 +++- 3 files changed, 7 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index baa4b70b02d3..09330d5bda0c 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -54,7 +54,7 @@ struct __sysctl_args; struct sysinfo; struct timespec; struct timeval; -struct timex; +struct __kernel_timex; struct timezone; struct tms; struct utimbuf; @@ -695,7 +695,7 @@ asmlinkage long sys_gettimeofday(struct timeval __user *tv, struct timezone __user *tz); asmlinkage long sys_settimeofday(struct timeval __user *tv, struct timezone __user *tz); -asmlinkage long sys_adjtimex(struct timex __user *txc_p); +asmlinkage long sys_adjtimex(struct __kernel_timex __user *txc_p); /* kernel/timer.c */ asmlinkage long sys_getpid(void); @@ -870,7 +870,7 @@ asmlinkage long sys_open_by_handle_at(int mountdirfd, struct file_handle __user *handle, int flags); asmlinkage long sys_clock_adjtime(clockid_t which_clock, - struct timex __user *tx); + struct __kernel_timex __user *tx); asmlinkage long sys_syncfs(int fd); asmlinkage long sys_setns(int fd, int nstype); asmlinkage long sys_sendmmsg(int fd, struct mmsghdr __user *msg, diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index 2d84b3db1ade..de79f85ae14f 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -1060,7 +1060,7 @@ int do_clock_adjtime(const clockid_t which_clock, struct __kernel_timex * ktx) } SYSCALL_DEFINE2(clock_adjtime, const clockid_t, which_clock, - struct timex __user *, utx) + struct __kernel_timex __user *, utx) { struct __kernel_timex ktx; int err; diff --git a/kernel/time/time.c b/kernel/time/time.c index d179d33f639a..78b5c8f1495a 100644 --- a/kernel/time/time.c +++ b/kernel/time/time.c @@ -263,7 +263,8 @@ COMPAT_SYSCALL_DEFINE2(settimeofday, struct old_timeval32 __user *, tv, } #endif -SYSCALL_DEFINE1(adjtimex, struct timex __user *, txc_p) +#if !defined(CONFIG_64BIT_TIME) || defined(CONFIG_64BIT) +SYSCALL_DEFINE1(adjtimex, struct __kernel_timex __user *, txc_p) { struct __kernel_timex txc; /* Local copy of parameter */ int ret; @@ -277,6 +278,7 @@ SYSCALL_DEFINE1(adjtimex, struct timex __user *, txc_p) ret = do_adjtimex(&txc); return copy_to_user(txc_p, &txc, sizeof(struct __kernel_timex)) ? -EFAULT : ret; } +#endif #ifdef CONFIG_COMPAT_32BIT_TIME int get_old_timex32(struct __kernel_timex *txc, const struct old_timex32 __user *utp) -- cgit v1.2.3 From 8dabe7245bbc134f2cfcc12cde75c019dab924cc Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 7 Jan 2019 00:33:08 +0100 Subject: y2038: syscalls: rename y2038 compat syscalls A lot of system calls that pass a time_t somewhere have an implementation using a COMPAT_SYSCALL_DEFINEx() on 64-bit architectures, and have been reworked so that this implementation can now be used on 32-bit architectures as well. The missing step is to redefine them using the regular SYSCALL_DEFINEx() to get them out of the compat namespace and make it possible to build them on 32-bit architectures. Any system call that ends in 'time' gets a '32' suffix on its name for that version, while the others get a '_time32' suffix, to distinguish them from the normal version, which takes a 64-bit time argument in the future. In this step, only 64-bit architectures are changed, doing this rename first lets us avoid touching the 32-bit architectures twice. Acked-by: Catalin Marinas Signed-off-by: Arnd Bergmann --- arch/arm64/include/asm/unistd32.h | 48 ++++++++++---------- arch/mips/kernel/syscalls/syscall_n32.tbl | 50 ++++++++++----------- arch/mips/kernel/syscalls/syscall_o32.tbl | 52 +++++++++++----------- arch/parisc/kernel/syscalls/syscall.tbl | 54 +++++++++++------------ arch/powerpc/kernel/syscalls/syscall.tbl | 52 +++++++++++----------- arch/s390/kernel/syscalls/syscall.tbl | 52 +++++++++++----------- arch/sparc/kernel/syscalls/syscall.tbl | 52 +++++++++++----------- arch/x86/entry/syscalls/syscall_32.tbl | 52 +++++++++++----------- fs/aio.c | 10 ++--- fs/select.c | 4 +- fs/timerfd.c | 4 +- fs/utimes.c | 10 ++--- include/linux/compat.h | 73 ++----------------------------- include/linux/syscalls.h | 57 ++++++++++++++++++++++++ include/uapi/asm-generic/unistd.h | 44 +++++++++---------- ipc/mqueue.c | 16 +++---- ipc/sem.c | 2 +- kernel/futex.c | 2 +- kernel/sched/core.c | 5 +-- kernel/signal.c | 2 +- kernel/sys_ni.c | 18 ++++---- kernel/time/hrtimer.c | 2 +- kernel/time/posix-stubs.c | 25 ++++++----- kernel/time/posix-timers.c | 32 +++++++------- kernel/time/time.c | 8 ++-- net/compat.c | 2 +- 26 files changed, 361 insertions(+), 367 deletions(-) (limited to 'include/linux') diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index d10cce69a4b0..1ded82857161 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -270,7 +270,7 @@ __SYSCALL(__NR_uname, sys_newuname) /* 123 was sys_modify_ldt */ __SYSCALL(123, sys_ni_syscall) #define __NR_adjtimex 124 -__SYSCALL(__NR_adjtimex, compat_sys_adjtimex) +__SYSCALL(__NR_adjtimex, sys_adjtimex_time32) #define __NR_mprotect 125 __SYSCALL(__NR_mprotect, sys_mprotect) #define __NR_sigprocmask 126 @@ -344,9 +344,9 @@ __SYSCALL(__NR_sched_get_priority_max, sys_sched_get_priority_max) #define __NR_sched_get_priority_min 160 __SYSCALL(__NR_sched_get_priority_min, sys_sched_get_priority_min) #define __NR_sched_rr_get_interval 161 -__SYSCALL(__NR_sched_rr_get_interval, compat_sys_sched_rr_get_interval) +__SYSCALL(__NR_sched_rr_get_interval, sys_sched_rr_get_interval_time32) #define __NR_nanosleep 162 -__SYSCALL(__NR_nanosleep, compat_sys_nanosleep) +__SYSCALL(__NR_nanosleep, sys_nanosleep_time32) #define __NR_mremap 163 __SYSCALL(__NR_mremap, sys_mremap) #define __NR_setresuid 164 @@ -376,7 +376,7 @@ __SYSCALL(__NR_rt_sigprocmask, compat_sys_rt_sigprocmask) #define __NR_rt_sigpending 176 __SYSCALL(__NR_rt_sigpending, compat_sys_rt_sigpending) #define __NR_rt_sigtimedwait 177 -__SYSCALL(__NR_rt_sigtimedwait, compat_sys_rt_sigtimedwait) +__SYSCALL(__NR_rt_sigtimedwait, compat_sys_rt_sigtimedwait_time32) #define __NR_rt_sigqueueinfo 178 __SYSCALL(__NR_rt_sigqueueinfo, compat_sys_rt_sigqueueinfo) #define __NR_rt_sigsuspend 179 @@ -502,7 +502,7 @@ __SYSCALL(__NR_tkill, sys_tkill) #define __NR_sendfile64 239 __SYSCALL(__NR_sendfile64, sys_sendfile64) #define __NR_futex 240 -__SYSCALL(__NR_futex, compat_sys_futex) +__SYSCALL(__NR_futex, sys_futex_time32) #define __NR_sched_setaffinity 241 __SYSCALL(__NR_sched_setaffinity, compat_sys_sched_setaffinity) #define __NR_sched_getaffinity 242 @@ -512,7 +512,7 @@ __SYSCALL(__NR_io_setup, compat_sys_io_setup) #define __NR_io_destroy 244 __SYSCALL(__NR_io_destroy, sys_io_destroy) #define __NR_io_getevents 245 -__SYSCALL(__NR_io_getevents, compat_sys_io_getevents) +__SYSCALL(__NR_io_getevents, sys_io_getevents_time32) #define __NR_io_submit 246 __SYSCALL(__NR_io_submit, compat_sys_io_submit) #define __NR_io_cancel 247 @@ -538,21 +538,21 @@ __SYSCALL(__NR_set_tid_address, sys_set_tid_address) #define __NR_timer_create 257 __SYSCALL(__NR_timer_create, compat_sys_timer_create) #define __NR_timer_settime 258 -__SYSCALL(__NR_timer_settime, compat_sys_timer_settime) +__SYSCALL(__NR_timer_settime, sys_timer_settime32) #define __NR_timer_gettime 259 -__SYSCALL(__NR_timer_gettime, compat_sys_timer_gettime) +__SYSCALL(__NR_timer_gettime, sys_timer_gettime32) #define __NR_timer_getoverrun 260 __SYSCALL(__NR_timer_getoverrun, sys_timer_getoverrun) #define __NR_timer_delete 261 __SYSCALL(__NR_timer_delete, sys_timer_delete) #define __NR_clock_settime 262 -__SYSCALL(__NR_clock_settime, compat_sys_clock_settime) +__SYSCALL(__NR_clock_settime, sys_clock_settime32) #define __NR_clock_gettime 263 -__SYSCALL(__NR_clock_gettime, compat_sys_clock_gettime) +__SYSCALL(__NR_clock_gettime, sys_clock_gettime32) #define __NR_clock_getres 264 -__SYSCALL(__NR_clock_getres, compat_sys_clock_getres) +__SYSCALL(__NR_clock_getres, sys_clock_getres_time32) #define __NR_clock_nanosleep 265 -__SYSCALL(__NR_clock_nanosleep, compat_sys_clock_nanosleep) +__SYSCALL(__NR_clock_nanosleep, sys_clock_nanosleep_time32) #define __NR_statfs64 266 __SYSCALL(__NR_statfs64, compat_sys_aarch32_statfs64) #define __NR_fstatfs64 267 @@ -560,7 +560,7 @@ __SYSCALL(__NR_fstatfs64, compat_sys_aarch32_fstatfs64) #define __NR_tgkill 268 __SYSCALL(__NR_tgkill, sys_tgkill) #define __NR_utimes 269 -__SYSCALL(__NR_utimes, compat_sys_utimes) +__SYSCALL(__NR_utimes, sys_utimes_time32) #define __NR_arm_fadvise64_64 270 __SYSCALL(__NR_arm_fadvise64_64, compat_sys_aarch32_fadvise64_64) #define __NR_pciconfig_iobase 271 @@ -574,9 +574,9 @@ __SYSCALL(__NR_mq_open, compat_sys_mq_open) #define __NR_mq_unlink 275 __SYSCALL(__NR_mq_unlink, sys_mq_unlink) #define __NR_mq_timedsend 276 -__SYSCALL(__NR_mq_timedsend, compat_sys_mq_timedsend) +__SYSCALL(__NR_mq_timedsend, sys_mq_timedsend_time32) #define __NR_mq_timedreceive 277 -__SYSCALL(__NR_mq_timedreceive, compat_sys_mq_timedreceive) +__SYSCALL(__NR_mq_timedreceive, sys_mq_timedreceive_time32) #define __NR_mq_notify 278 __SYSCALL(__NR_mq_notify, compat_sys_mq_notify) #define __NR_mq_getsetattr 279 @@ -646,7 +646,7 @@ __SYSCALL(__NR_request_key, sys_request_key) #define __NR_keyctl 311 __SYSCALL(__NR_keyctl, compat_sys_keyctl) #define __NR_semtimedop 312 -__SYSCALL(__NR_semtimedop, compat_sys_semtimedop) +__SYSCALL(__NR_semtimedop, sys_semtimedop_time32) #define __NR_vserver 313 __SYSCALL(__NR_vserver, sys_ni_syscall) #define __NR_ioprio_set 314 @@ -674,7 +674,7 @@ __SYSCALL(__NR_mknodat, sys_mknodat) #define __NR_fchownat 325 __SYSCALL(__NR_fchownat, sys_fchownat) #define __NR_futimesat 326 -__SYSCALL(__NR_futimesat, compat_sys_futimesat) +__SYSCALL(__NR_futimesat, sys_futimesat_time32) #define __NR_fstatat64 327 __SYSCALL(__NR_fstatat64, sys_fstatat64) #define __NR_unlinkat 328 @@ -692,9 +692,9 @@ __SYSCALL(__NR_fchmodat, sys_fchmodat) #define __NR_faccessat 334 __SYSCALL(__NR_faccessat, sys_faccessat) #define __NR_pselect6 335 -__SYSCALL(__NR_pselect6, compat_sys_pselect6) +__SYSCALL(__NR_pselect6, compat_sys_pselect6_time32) #define __NR_ppoll 336 -__SYSCALL(__NR_ppoll, compat_sys_ppoll) +__SYSCALL(__NR_ppoll, compat_sys_ppoll_time32) #define __NR_unshare 337 __SYSCALL(__NR_unshare, sys_unshare) #define __NR_set_robust_list 338 @@ -718,7 +718,7 @@ __SYSCALL(__NR_epoll_pwait, compat_sys_epoll_pwait) #define __NR_kexec_load 347 __SYSCALL(__NR_kexec_load, compat_sys_kexec_load) #define __NR_utimensat 348 -__SYSCALL(__NR_utimensat, compat_sys_utimensat) +__SYSCALL(__NR_utimensat, sys_utimensat_time32) #define __NR_signalfd 349 __SYSCALL(__NR_signalfd, compat_sys_signalfd) #define __NR_timerfd_create 350 @@ -728,9 +728,9 @@ __SYSCALL(__NR_eventfd, sys_eventfd) #define __NR_fallocate 352 __SYSCALL(__NR_fallocate, compat_sys_aarch32_fallocate) #define __NR_timerfd_settime 353 -__SYSCALL(__NR_timerfd_settime, compat_sys_timerfd_settime) +__SYSCALL(__NR_timerfd_settime, sys_timerfd_settime32) #define __NR_timerfd_gettime 354 -__SYSCALL(__NR_timerfd_gettime, compat_sys_timerfd_gettime) +__SYSCALL(__NR_timerfd_gettime, sys_timerfd_gettime32) #define __NR_signalfd4 355 __SYSCALL(__NR_signalfd4, compat_sys_signalfd4) #define __NR_eventfd2 356 @@ -752,7 +752,7 @@ __SYSCALL(__NR_rt_tgsigqueueinfo, compat_sys_rt_tgsigqueueinfo) #define __NR_perf_event_open 364 __SYSCALL(__NR_perf_event_open, sys_perf_event_open) #define __NR_recvmmsg 365 -__SYSCALL(__NR_recvmmsg, compat_sys_recvmmsg) +__SYSCALL(__NR_recvmmsg, compat_sys_recvmmsg_time32) #define __NR_accept4 366 __SYSCALL(__NR_accept4, sys_accept4) #define __NR_fanotify_init 367 @@ -766,7 +766,7 @@ __SYSCALL(__NR_name_to_handle_at, sys_name_to_handle_at) #define __NR_open_by_handle_at 371 __SYSCALL(__NR_open_by_handle_at, compat_sys_open_by_handle_at) #define __NR_clock_adjtime 372 -__SYSCALL(__NR_clock_adjtime, compat_sys_clock_adjtime) +__SYSCALL(__NR_clock_adjtime, sys_clock_adjtime32) #define __NR_syncfs 373 __SYSCALL(__NR_syncfs, sys_syncfs) #define __NR_sendmmsg 374 diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl index cc134b1211aa..6d1e019817c8 100644 --- a/arch/mips/kernel/syscalls/syscall_n32.tbl +++ b/arch/mips/kernel/syscalls/syscall_n32.tbl @@ -41,7 +41,7 @@ 31 n32 dup sys_dup 32 n32 dup2 sys_dup2 33 n32 pause sys_pause -34 n32 nanosleep compat_sys_nanosleep +34 n32 nanosleep sys_nanosleep_time32 35 n32 getitimer compat_sys_getitimer 36 n32 setitimer compat_sys_setitimer 37 n32 alarm sys_alarm @@ -133,11 +133,11 @@ 123 n32 capget sys_capget 124 n32 capset sys_capset 125 n32 rt_sigpending compat_sys_rt_sigpending -126 n32 rt_sigtimedwait compat_sys_rt_sigtimedwait +126 n32 rt_sigtimedwait compat_sys_rt_sigtimedwait_time32 127 n32 rt_sigqueueinfo compat_sys_rt_sigqueueinfo 128 n32 rt_sigsuspend compat_sys_rt_sigsuspend 129 n32 sigaltstack compat_sys_sigaltstack -130 n32 utime compat_sys_utime +130 n32 utime sys_utime32 131 n32 mknod sys_mknod 132 n32 personality sys_32_personality 133 n32 ustat compat_sys_ustat @@ -152,7 +152,7 @@ 142 n32 sched_getscheduler sys_sched_getscheduler 143 n32 sched_get_priority_max sys_sched_get_priority_max 144 n32 sched_get_priority_min sys_sched_get_priority_min -145 n32 sched_rr_get_interval compat_sys_sched_rr_get_interval +145 n32 sched_rr_get_interval sys_sched_rr_get_interval_time32 146 n32 mlock sys_mlock 147 n32 munlock sys_munlock 148 n32 mlockall sys_mlockall @@ -161,7 +161,7 @@ 151 n32 pivot_root sys_pivot_root 152 n32 _sysctl compat_sys_sysctl 153 n32 prctl sys_prctl -154 n32 adjtimex compat_sys_adjtimex +154 n32 adjtimex sys_adjtimex_time32 155 n32 setrlimit compat_sys_setrlimit 156 n32 chroot sys_chroot 157 n32 sync sys_sync @@ -202,7 +202,7 @@ 191 n32 fremovexattr sys_fremovexattr 192 n32 tkill sys_tkill 193 n32 reserved193 sys_ni_syscall -194 n32 futex compat_sys_futex +194 n32 futex sys_futex_time32 195 n32 sched_setaffinity compat_sys_sched_setaffinity 196 n32 sched_getaffinity compat_sys_sched_getaffinity 197 n32 cacheflush sys_cacheflush @@ -210,7 +210,7 @@ 199 n32 sysmips __sys_sysmips 200 n32 io_setup compat_sys_io_setup 201 n32 io_destroy sys_io_destroy -202 n32 io_getevents compat_sys_io_getevents +202 n32 io_getevents sys_io_getevents_time32 203 n32 io_submit compat_sys_io_submit 204 n32 io_cancel sys_io_cancel 205 n32 exit_group sys_exit_group @@ -223,29 +223,29 @@ 212 n32 fcntl64 compat_sys_fcntl64 213 n32 set_tid_address sys_set_tid_address 214 n32 restart_syscall sys_restart_syscall -215 n32 semtimedop compat_sys_semtimedop +215 n32 semtimedop sys_semtimedop_time32 216 n32 fadvise64 sys_fadvise64_64 217 n32 statfs64 compat_sys_statfs64 218 n32 fstatfs64 compat_sys_fstatfs64 219 n32 sendfile64 sys_sendfile64 220 n32 timer_create compat_sys_timer_create -221 n32 timer_settime compat_sys_timer_settime -222 n32 timer_gettime compat_sys_timer_gettime +221 n32 timer_settime sys_timer_settime32 +222 n32 timer_gettime sys_timer_gettime32 223 n32 timer_getoverrun sys_timer_getoverrun 224 n32 timer_delete sys_timer_delete -225 n32 clock_settime compat_sys_clock_settime -226 n32 clock_gettime compat_sys_clock_gettime -227 n32 clock_getres compat_sys_clock_getres -228 n32 clock_nanosleep compat_sys_clock_nanosleep +225 n32 clock_settime sys_clock_settime32 +226 n32 clock_gettime sys_clock_gettime32 +227 n32 clock_getres sys_clock_getres_time32 +228 n32 clock_nanosleep sys_clock_nanosleep_time32 229 n32 tgkill sys_tgkill -230 n32 utimes compat_sys_utimes +230 n32 utimes sys_utimes_time32 231 n32 mbind compat_sys_mbind 232 n32 get_mempolicy compat_sys_get_mempolicy 233 n32 set_mempolicy compat_sys_set_mempolicy 234 n32 mq_open compat_sys_mq_open 235 n32 mq_unlink sys_mq_unlink -236 n32 mq_timedsend compat_sys_mq_timedsend -237 n32 mq_timedreceive compat_sys_mq_timedreceive +236 n32 mq_timedsend sys_mq_timedsend_time32 +237 n32 mq_timedreceive sys_mq_timedreceive_time32 238 n32 mq_notify compat_sys_mq_notify 239 n32 mq_getsetattr compat_sys_mq_getsetattr 240 n32 vserver sys_ni_syscall @@ -263,7 +263,7 @@ 252 n32 mkdirat sys_mkdirat 253 n32 mknodat sys_mknodat 254 n32 fchownat sys_fchownat -255 n32 futimesat compat_sys_futimesat +255 n32 futimesat sys_futimesat_time32 256 n32 newfstatat sys_newfstatat 257 n32 unlinkat sys_unlinkat 258 n32 renameat sys_renameat @@ -272,8 +272,8 @@ 261 n32 readlinkat sys_readlinkat 262 n32 fchmodat sys_fchmodat 263 n32 faccessat sys_faccessat -264 n32 pselect6 compat_sys_pselect6 -265 n32 ppoll compat_sys_ppoll +264 n32 pselect6 compat_sys_pselect6_time32 +265 n32 ppoll compat_sys_ppoll_time32 266 n32 unshare sys_unshare 267 n32 splice sys_splice 268 n32 sync_file_range sys_sync_file_range @@ -287,14 +287,14 @@ 276 n32 epoll_pwait compat_sys_epoll_pwait 277 n32 ioprio_set sys_ioprio_set 278 n32 ioprio_get sys_ioprio_get -279 n32 utimensat compat_sys_utimensat +279 n32 utimensat sys_utimensat_time32 280 n32 signalfd compat_sys_signalfd 281 n32 timerfd sys_ni_syscall 282 n32 eventfd sys_eventfd 283 n32 fallocate sys_fallocate 284 n32 timerfd_create sys_timerfd_create -285 n32 timerfd_gettime compat_sys_timerfd_gettime -286 n32 timerfd_settime compat_sys_timerfd_settime +285 n32 timerfd_gettime sys_timerfd_gettime32 +286 n32 timerfd_settime sys_timerfd_settime32 287 n32 signalfd4 compat_sys_signalfd4 288 n32 eventfd2 sys_eventfd2 289 n32 epoll_create1 sys_epoll_create1 @@ -306,14 +306,14 @@ 295 n32 rt_tgsigqueueinfo compat_sys_rt_tgsigqueueinfo 296 n32 perf_event_open sys_perf_event_open 297 n32 accept4 sys_accept4 -298 n32 recvmmsg compat_sys_recvmmsg +298 n32 recvmmsg compat_sys_recvmmsg_time32 299 n32 getdents64 sys_getdents64 300 n32 fanotify_init sys_fanotify_init 301 n32 fanotify_mark sys_fanotify_mark 302 n32 prlimit64 sys_prlimit64 303 n32 name_to_handle_at sys_name_to_handle_at 304 n32 open_by_handle_at sys_open_by_handle_at -305 n32 clock_adjtime compat_sys_clock_adjtime +305 n32 clock_adjtime sys_clock_adjtime32 306 n32 syncfs sys_syncfs 307 n32 sendmmsg compat_sys_sendmmsg 308 n32 setns sys_setns diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl index fa47ea8cc6ef..e9fec7bac5a9 100644 --- a/arch/mips/kernel/syscalls/syscall_o32.tbl +++ b/arch/mips/kernel/syscalls/syscall_o32.tbl @@ -20,7 +20,7 @@ 10 o32 unlink sys_unlink 11 o32 execve sys_execve compat_sys_execve 12 o32 chdir sys_chdir -13 o32 time sys_time compat_sys_time +13 o32 time sys_time sys_time32 14 o32 mknod sys_mknod 15 o32 chmod sys_chmod 16 o32 lchown sys_lchown @@ -33,13 +33,13 @@ 22 o32 umount sys_oldumount 23 o32 setuid sys_setuid 24 o32 getuid sys_getuid -25 o32 stime sys_stime compat_sys_stime +25 o32 stime sys_stime sys_stime32 26 o32 ptrace sys_ptrace compat_sys_ptrace 27 o32 alarm sys_alarm # 28 was sys_fstat 28 o32 unused28 sys_ni_syscall 29 o32 pause sys_pause -30 o32 utime sys_utime compat_sys_utime +30 o32 utime sys_utime sys_utime32 31 o32 stty sys_ni_syscall 32 o32 gtty sys_ni_syscall 33 o32 access sys_access @@ -135,7 +135,7 @@ 121 o32 setdomainname sys_setdomainname 122 o32 uname sys_newuname 123 o32 modify_ldt sys_ni_syscall -124 o32 adjtimex sys_adjtimex compat_sys_adjtimex +124 o32 adjtimex sys_adjtimex sys_adjtimex_time32 125 o32 mprotect sys_mprotect 126 o32 sigprocmask sys_sigprocmask compat_sys_sigprocmask 127 o32 create_module sys_ni_syscall @@ -176,8 +176,8 @@ 162 o32 sched_yield sys_sched_yield 163 o32 sched_get_priority_max sys_sched_get_priority_max 164 o32 sched_get_priority_min sys_sched_get_priority_min -165 o32 sched_rr_get_interval sys_sched_rr_get_interval compat_sys_sched_rr_get_interval -166 o32 nanosleep sys_nanosleep compat_sys_nanosleep +165 o32 sched_rr_get_interval sys_sched_rr_get_interval sys_sched_rr_get_interval_time32 +166 o32 nanosleep sys_nanosleep sys_nanosleep_time32 167 o32 mremap sys_mremap 168 o32 accept sys_accept 169 o32 bind sys_bind @@ -208,7 +208,7 @@ 194 o32 rt_sigaction sys_rt_sigaction compat_sys_rt_sigaction 195 o32 rt_sigprocmask sys_rt_sigprocmask compat_sys_rt_sigprocmask 196 o32 rt_sigpending sys_rt_sigpending compat_sys_rt_sigpending -197 o32 rt_sigtimedwait sys_rt_sigtimedwait compat_sys_rt_sigtimedwait +197 o32 rt_sigtimedwait sys_rt_sigtimedwait compat_sys_rt_sigtimedwait_time32 198 o32 rt_sigqueueinfo sys_rt_sigqueueinfo compat_sys_rt_sigqueueinfo 199 o32 rt_sigsuspend sys_rt_sigsuspend compat_sys_rt_sigsuspend 200 o32 pread64 sys_pread64 sys_32_pread @@ -249,12 +249,12 @@ 235 o32 fremovexattr sys_fremovexattr 236 o32 tkill sys_tkill 237 o32 sendfile64 sys_sendfile64 -238 o32 futex sys_futex compat_sys_futex +238 o32 futex sys_futex sys_futex_time32 239 o32 sched_setaffinity sys_sched_setaffinity compat_sys_sched_setaffinity 240 o32 sched_getaffinity sys_sched_getaffinity compat_sys_sched_getaffinity 241 o32 io_setup sys_io_setup compat_sys_io_setup 242 o32 io_destroy sys_io_destroy -243 o32 io_getevents sys_io_getevents compat_sys_io_getevents +243 o32 io_getevents sys_io_getevents sys_io_getevents_time32 244 o32 io_submit sys_io_submit compat_sys_io_submit 245 o32 io_cancel sys_io_cancel 246 o32 exit_group sys_exit_group @@ -269,23 +269,23 @@ 255 o32 statfs64 sys_statfs64 compat_sys_statfs64 256 o32 fstatfs64 sys_fstatfs64 compat_sys_fstatfs64 257 o32 timer_create sys_timer_create compat_sys_timer_create -258 o32 timer_settime sys_timer_settime compat_sys_timer_settime -259 o32 timer_gettime sys_timer_gettime compat_sys_timer_gettime +258 o32 timer_settime sys_timer_settime sys_timer_settime32 +259 o32 timer_gettime sys_timer_gettime sys_timer_gettime32 260 o32 timer_getoverrun sys_timer_getoverrun 261 o32 timer_delete sys_timer_delete -262 o32 clock_settime sys_clock_settime compat_sys_clock_settime -263 o32 clock_gettime sys_clock_gettime compat_sys_clock_gettime -264 o32 clock_getres sys_clock_getres compat_sys_clock_getres -265 o32 clock_nanosleep sys_clock_nanosleep compat_sys_clock_nanosleep +262 o32 clock_settime sys_clock_settime sys_clock_settime32 +263 o32 clock_gettime sys_clock_gettime sys_clock_gettime32 +264 o32 clock_getres sys_clock_getres sys_clock_getres_time32 +265 o32 clock_nanosleep sys_clock_nanosleep sys_clock_nanosleep_time32 266 o32 tgkill sys_tgkill -267 o32 utimes sys_utimes compat_sys_utimes +267 o32 utimes sys_utimes sys_utimes_time32 268 o32 mbind sys_mbind compat_sys_mbind 269 o32 get_mempolicy sys_get_mempolicy compat_sys_get_mempolicy 270 o32 set_mempolicy sys_set_mempolicy compat_sys_set_mempolicy 271 o32 mq_open sys_mq_open compat_sys_mq_open 272 o32 mq_unlink sys_mq_unlink -273 o32 mq_timedsend sys_mq_timedsend compat_sys_mq_timedsend -274 o32 mq_timedreceive sys_mq_timedreceive compat_sys_mq_timedreceive +273 o32 mq_timedsend sys_mq_timedsend sys_mq_timedsend_time32 +274 o32 mq_timedreceive sys_mq_timedreceive sys_mq_timedreceive_time32 275 o32 mq_notify sys_mq_notify compat_sys_mq_notify 276 o32 mq_getsetattr sys_mq_getsetattr compat_sys_mq_getsetattr 277 o32 vserver sys_ni_syscall @@ -303,7 +303,7 @@ 289 o32 mkdirat sys_mkdirat 290 o32 mknodat sys_mknodat 291 o32 fchownat sys_fchownat -292 o32 futimesat sys_futimesat compat_sys_futimesat +292 o32 futimesat sys_futimesat sys_futimesat_time32 293 o32 fstatat64 sys_fstatat64 sys_newfstatat 294 o32 unlinkat sys_unlinkat 295 o32 renameat sys_renameat @@ -312,8 +312,8 @@ 298 o32 readlinkat sys_readlinkat 299 o32 fchmodat sys_fchmodat 300 o32 faccessat sys_faccessat -301 o32 pselect6 sys_pselect6 compat_sys_pselect6 -302 o32 ppoll sys_ppoll compat_sys_ppoll +301 o32 pselect6 sys_pselect6 compat_sys_pselect6_time32 +302 o32 ppoll sys_ppoll compat_sys_ppoll_time32 303 o32 unshare sys_unshare 304 o32 splice sys_splice 305 o32 sync_file_range sys_sync_file_range sys32_sync_file_range @@ -327,14 +327,14 @@ 313 o32 epoll_pwait sys_epoll_pwait compat_sys_epoll_pwait 314 o32 ioprio_set sys_ioprio_set 315 o32 ioprio_get sys_ioprio_get -316 o32 utimensat sys_utimensat compat_sys_utimensat +316 o32 utimensat sys_utimensat sys_utimensat_time32 317 o32 signalfd sys_signalfd compat_sys_signalfd 318 o32 timerfd sys_ni_syscall 319 o32 eventfd sys_eventfd 320 o32 fallocate sys_fallocate sys32_fallocate 321 o32 timerfd_create sys_timerfd_create -322 o32 timerfd_gettime sys_timerfd_gettime compat_sys_timerfd_gettime -323 o32 timerfd_settime sys_timerfd_settime compat_sys_timerfd_settime +322 o32 timerfd_gettime sys_timerfd_gettime sys_timerfd_gettime32 +323 o32 timerfd_settime sys_timerfd_settime sys_timerfd_settime32 324 o32 signalfd4 sys_signalfd4 compat_sys_signalfd4 325 o32 eventfd2 sys_eventfd2 326 o32 epoll_create1 sys_epoll_create1 @@ -346,13 +346,13 @@ 332 o32 rt_tgsigqueueinfo sys_rt_tgsigqueueinfo compat_sys_rt_tgsigqueueinfo 333 o32 perf_event_open sys_perf_event_open 334 o32 accept4 sys_accept4 -335 o32 recvmmsg sys_recvmmsg compat_sys_recvmmsg +335 o32 recvmmsg sys_recvmmsg compat_sys_recvmmsg_time32 336 o32 fanotify_init sys_fanotify_init 337 o32 fanotify_mark sys_fanotify_mark compat_sys_fanotify_mark 338 o32 prlimit64 sys_prlimit64 339 o32 name_to_handle_at sys_name_to_handle_at 340 o32 open_by_handle_at sys_open_by_handle_at compat_sys_open_by_handle_at -341 o32 clock_adjtime sys_clock_adjtime compat_sys_clock_adjtime +341 o32 clock_adjtime sys_clock_adjtime sys_clock_adjtime32 342 o32 syncfs sys_syncfs 343 o32 sendmmsg sys_sendmmsg compat_sys_sendmmsg 344 o32 setns sys_setns diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl index 71873bb72782..f7440427d459 100644 --- a/arch/parisc/kernel/syscalls/syscall.tbl +++ b/arch/parisc/kernel/syscalls/syscall.tbl @@ -20,7 +20,7 @@ 10 common unlink sys_unlink 11 common execve sys_execve compat_sys_execve 12 common chdir sys_chdir -13 common time sys_time compat_sys_time +13 common time sys_time sys_time32 14 common mknod sys_mknod 15 common chmod sys_chmod 16 common lchown sys_lchown @@ -32,12 +32,12 @@ 22 common bind sys_bind 23 common setuid sys_setuid 24 common getuid sys_getuid -25 common stime sys_stime compat_sys_stime +25 common stime sys_stime sys_stime32 26 common ptrace sys_ptrace compat_sys_ptrace 27 common alarm sys_alarm 28 common fstat sys_newfstat compat_sys_newfstat 29 common pause sys_pause -30 common utime sys_utime compat_sys_utime +30 common utime sys_utime sys_utime32 31 common connect sys_connect 32 common listen sys_listen 33 common access sys_access @@ -133,7 +133,7 @@ 121 common setdomainname sys_setdomainname 122 common sendfile sys_sendfile compat_sys_sendfile 123 common recvfrom sys_recvfrom -124 common adjtimex sys_adjtimex compat_sys_adjtimex +124 common adjtimex sys_adjtimex sys_adjtimex_time32 125 common mprotect sys_mprotect 126 common sigprocmask sys_sigprocmask compat_sys_sigprocmask # 127 was create_module @@ -171,8 +171,8 @@ 158 common sched_yield sys_sched_yield 159 common sched_get_priority_max sys_sched_get_priority_max 160 common sched_get_priority_min sys_sched_get_priority_min -161 common sched_rr_get_interval sys_sched_rr_get_interval compat_sys_sched_rr_get_interval -162 common nanosleep sys_nanosleep compat_sys_nanosleep +161 common sched_rr_get_interval sys_sched_rr_get_interval sys_sched_rr_get_interval_time32 +162 common nanosleep sys_nanosleep sys_nanosleep_time32 163 common mremap sys_mremap 164 common setresuid sys_setresuid 165 common getresuid sys_getresuid @@ -187,7 +187,7 @@ 174 common rt_sigaction sys_rt_sigaction compat_sys_rt_sigaction 175 common rt_sigprocmask sys_rt_sigprocmask compat_sys_rt_sigprocmask 176 common rt_sigpending sys_rt_sigpending compat_sys_rt_sigpending -177 common rt_sigtimedwait sys_rt_sigtimedwait compat_sys_rt_sigtimedwait +177 common rt_sigtimedwait sys_rt_sigtimedwait compat_sys_rt_sigtimedwait_time32 178 common rt_sigqueueinfo sys_rt_sigqueueinfo compat_sys_rt_sigqueueinfo 179 common rt_sigsuspend sys_rt_sigsuspend compat_sys_rt_sigsuspend 180 common chown sys_chown @@ -223,14 +223,14 @@ 207 64 readahead sys_readahead 208 common tkill sys_tkill 209 common sendfile64 sys_sendfile64 compat_sys_sendfile64 -210 common futex sys_futex compat_sys_futex +210 common futex sys_futex sys_futex_time32 211 common sched_setaffinity sys_sched_setaffinity compat_sys_sched_setaffinity 212 common sched_getaffinity sys_sched_getaffinity compat_sys_sched_getaffinity # 213 was set_thread_area # 214 was get_thread_area 215 common io_setup sys_io_setup compat_sys_io_setup 216 common io_destroy sys_io_destroy -217 common io_getevents sys_io_getevents compat_sys_io_getevents +217 common io_getevents sys_io_getevents sys_io_getevents_time32 218 common io_submit sys_io_submit compat_sys_io_submit 219 common io_cancel sys_io_cancel # 220 was alloc_hugepages @@ -241,11 +241,11 @@ 225 common epoll_ctl sys_epoll_ctl 226 common epoll_wait sys_epoll_wait 227 common remap_file_pages sys_remap_file_pages -228 common semtimedop sys_semtimedop compat_sys_semtimedop +228 common semtimedop sys_semtimedop sys_semtimedop_time32 229 common mq_open sys_mq_open compat_sys_mq_open 230 common mq_unlink sys_mq_unlink -231 common mq_timedsend sys_mq_timedsend compat_sys_mq_timedsend -232 common mq_timedreceive sys_mq_timedreceive compat_sys_mq_timedreceive +231 common mq_timedsend sys_mq_timedsend sys_mq_timedsend_time32 +232 common mq_timedreceive sys_mq_timedreceive sys_mq_timedreceive_time32 233 common mq_notify sys_mq_notify compat_sys_mq_notify 234 common mq_getsetattr sys_mq_getsetattr compat_sys_mq_getsetattr 235 common waitid sys_waitid compat_sys_waitid @@ -265,14 +265,14 @@ 248 common lremovexattr sys_lremovexattr 249 common fremovexattr sys_fremovexattr 250 common timer_create sys_timer_create compat_sys_timer_create -251 common timer_settime sys_timer_settime compat_sys_timer_settime -252 common timer_gettime sys_timer_gettime compat_sys_timer_gettime +251 common timer_settime sys_timer_settime sys_timer_settime32 +252 common timer_gettime sys_timer_gettime sys_timer_gettime32 253 common timer_getoverrun sys_timer_getoverrun 254 common timer_delete sys_timer_delete -255 common clock_settime sys_clock_settime compat_sys_clock_settime -256 common clock_gettime sys_clock_gettime compat_sys_clock_gettime -257 common clock_getres sys_clock_getres compat_sys_clock_getres -258 common clock_nanosleep sys_clock_nanosleep compat_sys_clock_nanosleep +255 common clock_settime sys_clock_settime sys_clock_settime32 +256 common clock_gettime sys_clock_gettime sys_clock_gettime32 +257 common clock_getres sys_clock_getres sys_clock_getres_time32 +258 common clock_nanosleep sys_clock_nanosleep sys_clock_nanosleep_time32 259 common tgkill sys_tgkill 260 common mbind sys_mbind compat_sys_mbind 261 common get_mempolicy sys_get_mempolicy compat_sys_get_mempolicy @@ -287,13 +287,13 @@ 270 common inotify_add_watch sys_inotify_add_watch 271 common inotify_rm_watch sys_inotify_rm_watch 272 common migrate_pages sys_migrate_pages -273 common pselect6 sys_pselect6 compat_sys_pselect6 -274 common ppoll sys_ppoll compat_sys_ppoll +273 common pselect6 sys_pselect6 compat_sys_pselect6_time32 +274 common ppoll sys_ppoll compat_sys_ppoll_time32 275 common openat sys_openat compat_sys_openat 276 common mkdirat sys_mkdirat 277 common mknodat sys_mknodat 278 common fchownat sys_fchownat -279 common futimesat sys_futimesat compat_sys_futimesat +279 common futimesat sys_futimesat sys_futimesat_time32 280 common fstatat64 sys_fstatat64 281 common unlinkat sys_unlinkat 282 common renameat sys_renameat @@ -316,15 +316,15 @@ 298 common statfs64 sys_statfs64 compat_sys_statfs64 299 common fstatfs64 sys_fstatfs64 compat_sys_fstatfs64 300 common kexec_load sys_kexec_load compat_sys_kexec_load -301 common utimensat sys_utimensat compat_sys_utimensat +301 common utimensat sys_utimensat sys_utimensat_time32 302 common signalfd sys_signalfd compat_sys_signalfd # 303 was timerfd 304 common eventfd sys_eventfd 305 32 fallocate parisc_fallocate 305 64 fallocate sys_fallocate 306 common timerfd_create sys_timerfd_create -307 common timerfd_settime sys_timerfd_settime compat_sys_timerfd_settime -308 common timerfd_gettime sys_timerfd_gettime compat_sys_timerfd_gettime +307 common timerfd_settime sys_timerfd_settime sys_timerfd_settime32 +308 common timerfd_gettime sys_timerfd_gettime sys_timerfd_gettime32 309 common signalfd4 sys_signalfd4 compat_sys_signalfd4 310 common eventfd2 sys_eventfd2 311 common epoll_create1 sys_epoll_create1 @@ -335,12 +335,12 @@ 316 common pwritev sys_pwritev compat_sys_pwritev 317 common rt_tgsigqueueinfo sys_rt_tgsigqueueinfo compat_sys_rt_tgsigqueueinfo 318 common perf_event_open sys_perf_event_open -319 common recvmmsg sys_recvmmsg compat_sys_recvmmsg +319 common recvmmsg sys_recvmmsg compat_sys_recvmmsg_time32 320 common accept4 sys_accept4 321 common prlimit64 sys_prlimit64 322 common fanotify_init sys_fanotify_init 323 common fanotify_mark sys_fanotify_mark sys32_fanotify_mark -324 common clock_adjtime sys_clock_adjtime compat_sys_clock_adjtime +324 common clock_adjtime sys_clock_adjtime sys_clock_adjtime32 325 common name_to_handle_at sys_name_to_handle_at 326 common open_by_handle_at sys_open_by_handle_at compat_sys_open_by_handle_at 327 common syncfs sys_syncfs @@ -352,7 +352,7 @@ 333 common finit_module sys_finit_module 334 common sched_setattr sys_sched_setattr 335 common sched_getattr sys_sched_getattr -336 common utimes sys_utimes compat_sys_utimes +336 common utimes sys_utimes sys_utimes_time32 337 common renameat2 sys_renameat2 338 common seccomp sys_seccomp 339 common getrandom sys_getrandom diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl index 7555874ce39c..86650dcd2185 100644 --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl @@ -20,7 +20,7 @@ 10 common unlink sys_unlink 11 nospu execve sys_execve compat_sys_execve 12 common chdir sys_chdir -13 common time sys_time compat_sys_time +13 common time sys_time sys_time32 14 common mknod sys_mknod 15 common chmod sys_chmod 16 common lchown sys_lchown @@ -36,14 +36,14 @@ 22 spu umount sys_ni_syscall 23 common setuid sys_setuid 24 common getuid sys_getuid -25 common stime sys_stime compat_sys_stime +25 common stime sys_stime sys_stime32 26 nospu ptrace sys_ptrace compat_sys_ptrace 27 common alarm sys_alarm 28 32 oldfstat sys_fstat sys_ni_syscall 28 64 oldfstat sys_ni_syscall 28 spu oldfstat sys_ni_syscall 29 nospu pause sys_pause -30 nospu utime sys_utime compat_sys_utime +30 nospu utime sys_utime sys_utime32 31 common stty sys_ni_syscall 32 common gtty sys_ni_syscall 33 common access sys_access @@ -157,7 +157,7 @@ 121 common setdomainname sys_setdomainname 122 common uname sys_newuname 123 common modify_ldt sys_ni_syscall -124 common adjtimex sys_adjtimex compat_sys_adjtimex +124 common adjtimex sys_adjtimex sys_adjtimex_time32 125 common mprotect sys_mprotect 126 32 sigprocmask sys_sigprocmask compat_sys_sigprocmask 126 64 sigprocmask sys_ni_syscall @@ -198,8 +198,8 @@ 158 common sched_yield sys_sched_yield 159 common sched_get_priority_max sys_sched_get_priority_max 160 common sched_get_priority_min sys_sched_get_priority_min -161 common sched_rr_get_interval sys_sched_rr_get_interval compat_sys_sched_rr_get_interval -162 common nanosleep sys_nanosleep compat_sys_nanosleep +161 common sched_rr_get_interval sys_sched_rr_get_interval sys_sched_rr_get_interval_time32 +162 common nanosleep sys_nanosleep sys_nanosleep_time32 163 common mremap sys_mremap 164 common setresuid sys_setresuid 165 common getresuid sys_getresuid @@ -213,7 +213,7 @@ 173 nospu rt_sigaction sys_rt_sigaction compat_sys_rt_sigaction 174 nospu rt_sigprocmask sys_rt_sigprocmask compat_sys_rt_sigprocmask 175 nospu rt_sigpending sys_rt_sigpending compat_sys_rt_sigpending -176 nospu rt_sigtimedwait sys_rt_sigtimedwait compat_sys_rt_sigtimedwait +176 nospu rt_sigtimedwait sys_rt_sigtimedwait compat_sys_rt_sigtimedwait_time32 177 nospu rt_sigqueueinfo sys_rt_sigqueueinfo compat_sys_rt_sigqueueinfo 178 nospu rt_sigsuspend sys_rt_sigsuspend compat_sys_rt_sigsuspend 179 common pread64 sys_pread64 compat_sys_pread64 @@ -260,7 +260,7 @@ 218 common removexattr sys_removexattr 219 common lremovexattr sys_lremovexattr 220 common fremovexattr sys_fremovexattr -221 common futex sys_futex compat_sys_futex +221 common futex sys_futex sys_futex_time32 222 common sched_setaffinity sys_sched_setaffinity compat_sys_sched_setaffinity 223 common sched_getaffinity sys_sched_getaffinity compat_sys_sched_getaffinity # 224 unused @@ -268,7 +268,7 @@ 226 32 sendfile64 sys_sendfile64 compat_sys_sendfile64 227 common io_setup sys_io_setup compat_sys_io_setup 228 common io_destroy sys_io_destroy -229 common io_getevents sys_io_getevents compat_sys_io_getevents +229 common io_getevents sys_io_getevents sys_io_getevents_time32 230 common io_submit sys_io_submit compat_sys_io_submit 231 common io_cancel sys_io_cancel 232 nospu set_tid_address sys_set_tid_address @@ -280,19 +280,19 @@ 238 common epoll_wait sys_epoll_wait 239 common remap_file_pages sys_remap_file_pages 240 common timer_create sys_timer_create compat_sys_timer_create -241 common timer_settime sys_timer_settime compat_sys_timer_settime -242 common timer_gettime sys_timer_gettime compat_sys_timer_gettime +241 common timer_settime sys_timer_settime sys_timer_settime32 +242 common timer_gettime sys_timer_gettime sys_timer_gettime32 243 common timer_getoverrun sys_timer_getoverrun 244 common timer_delete sys_timer_delete -245 common clock_settime sys_clock_settime compat_sys_clock_settime -246 common clock_gettime sys_clock_gettime compat_sys_clock_gettime -247 common clock_getres sys_clock_getres compat_sys_clock_getres -248 common clock_nanosleep sys_clock_nanosleep compat_sys_clock_nanosleep +245 common clock_settime sys_clock_settime sys_clock_settime32 +246 common clock_gettime sys_clock_gettime sys_clock_gettime32 +247 common clock_getres sys_clock_getres sys_clock_getres_time32 +248 common clock_nanosleep sys_clock_nanosleep sys_clock_nanosleep_time32 249 32 swapcontext ppc_swapcontext ppc32_swapcontext 249 64 swapcontext ppc64_swapcontext 249 spu swapcontext sys_ni_syscall 250 common tgkill sys_tgkill -251 common utimes sys_utimes compat_sys_utimes +251 common utimes sys_utimes sys_utimes_time32 252 common statfs64 sys_statfs64 compat_sys_statfs64 253 common fstatfs64 sys_fstatfs64 compat_sys_fstatfs64 254 32 fadvise64_64 ppc_fadvise64_64 @@ -308,8 +308,8 @@ 261 nospu set_mempolicy sys_set_mempolicy compat_sys_set_mempolicy 262 nospu mq_open sys_mq_open compat_sys_mq_open 263 nospu mq_unlink sys_mq_unlink -264 nospu mq_timedsend sys_mq_timedsend compat_sys_mq_timedsend -265 nospu mq_timedreceive sys_mq_timedreceive compat_sys_mq_timedreceive +264 nospu mq_timedsend sys_mq_timedsend sys_mq_timedsend_time32 +265 nospu mq_timedreceive sys_mq_timedreceive sys_mq_timedreceive_time32 266 nospu mq_notify sys_mq_notify compat_sys_mq_notify 267 nospu mq_getsetattr sys_mq_getsetattr compat_sys_mq_getsetattr 268 nospu kexec_load sys_kexec_load compat_sys_kexec_load @@ -324,8 +324,8 @@ 277 nospu inotify_rm_watch sys_inotify_rm_watch 278 nospu spu_run sys_spu_run 279 nospu spu_create sys_spu_create -280 nospu pselect6 sys_pselect6 compat_sys_pselect6 -281 nospu ppoll sys_ppoll compat_sys_ppoll +280 nospu pselect6 sys_pselect6 compat_sys_pselect6_time32 +281 nospu ppoll sys_ppoll compat_sys_ppoll_time32 282 common unshare sys_unshare 283 common splice sys_splice 284 common tee sys_tee @@ -334,7 +334,7 @@ 287 common mkdirat sys_mkdirat 288 common mknodat sys_mknodat 289 common fchownat sys_fchownat -290 common futimesat sys_futimesat compat_sys_futimesat +290 common futimesat sys_futimesat sys_futimesat_time32 291 32 fstatat64 sys_fstatat64 291 64 newfstatat sys_newfstatat 291 spu newfstatat sys_newfstatat @@ -350,15 +350,15 @@ 301 common move_pages sys_move_pages compat_sys_move_pages 302 common getcpu sys_getcpu 303 nospu epoll_pwait sys_epoll_pwait compat_sys_epoll_pwait -304 common utimensat sys_utimensat compat_sys_utimensat +304 common utimensat sys_utimensat sys_utimensat_time32 305 common signalfd sys_signalfd compat_sys_signalfd 306 common timerfd_create sys_timerfd_create 307 common eventfd sys_eventfd 308 common sync_file_range2 sys_sync_file_range2 compat_sys_sync_file_range2 309 nospu fallocate sys_fallocate compat_sys_fallocate 310 nospu subpage_prot sys_subpage_prot -311 common timerfd_settime sys_timerfd_settime compat_sys_timerfd_settime -312 common timerfd_gettime sys_timerfd_gettime compat_sys_timerfd_gettime +311 common timerfd_settime sys_timerfd_settime sys_timerfd_settime32 +312 common timerfd_gettime sys_timerfd_gettime sys_timerfd_gettime32 313 common signalfd4 sys_signalfd4 compat_sys_signalfd4 314 common eventfd2 sys_eventfd2 315 common epoll_create1 sys_epoll_create1 @@ -389,11 +389,11 @@ 340 common getsockopt sys_getsockopt compat_sys_getsockopt 341 common sendmsg sys_sendmsg compat_sys_sendmsg 342 common recvmsg sys_recvmsg compat_sys_recvmsg -343 common recvmmsg sys_recvmmsg compat_sys_recvmmsg +343 common recvmmsg sys_recvmmsg compat_sys_recvmmsg_time32 344 common accept4 sys_accept4 345 common name_to_handle_at sys_name_to_handle_at 346 common open_by_handle_at sys_open_by_handle_at compat_sys_open_by_handle_at -347 common clock_adjtime sys_clock_adjtime compat_sys_clock_adjtime +347 common clock_adjtime sys_clock_adjtime sys_clock_adjtime32 348 common syncfs sys_syncfs 349 common sendmmsg sys_sendmmsg compat_sys_sendmmsg 350 common setns sys_setns diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index 620e222003ca..285201cf1f83 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -20,7 +20,7 @@ 10 common unlink sys_unlink sys_unlink 11 common execve sys_execve compat_sys_execve 12 common chdir sys_chdir sys_chdir -13 32 time - compat_sys_time +13 32 time - sys_time32 14 common mknod sys_mknod sys_mknod 15 common chmod sys_chmod sys_chmod 16 32 lchown - sys_lchown16 @@ -30,11 +30,11 @@ 22 common umount sys_oldumount sys_oldumount 23 32 setuid - sys_setuid16 24 32 getuid - sys_getuid16 -25 32 stime - compat_sys_stime +25 32 stime - sys_stime32 26 common ptrace sys_ptrace compat_sys_ptrace 27 common alarm sys_alarm sys_alarm 29 common pause sys_pause sys_pause -30 common utime sys_utime compat_sys_utime +30 common utime sys_utime sys_utime32 33 common access sys_access sys_access 34 common nice sys_nice sys_nice 36 common sync sys_sync sys_sync @@ -112,7 +112,7 @@ 120 common clone sys_clone sys_clone 121 common setdomainname sys_setdomainname sys_setdomainname 122 common uname sys_newuname sys_newuname -124 common adjtimex sys_adjtimex compat_sys_adjtimex +124 common adjtimex sys_adjtimex sys_adjtimex_time32 125 common mprotect sys_mprotect sys_mprotect 126 common sigprocmask sys_sigprocmask compat_sys_sigprocmask 127 common create_module - - @@ -150,8 +150,8 @@ 158 common sched_yield sys_sched_yield sys_sched_yield 159 common sched_get_priority_max sys_sched_get_priority_max sys_sched_get_priority_max 160 common sched_get_priority_min sys_sched_get_priority_min sys_sched_get_priority_min -161 common sched_rr_get_interval sys_sched_rr_get_interval compat_sys_sched_rr_get_interval -162 common nanosleep sys_nanosleep compat_sys_nanosleep +161 common sched_rr_get_interval sys_sched_rr_get_interval sys_sched_rr_get_interval_time32 +162 common nanosleep sys_nanosleep sys_nanosleep_time32 163 common mremap sys_mremap sys_mremap 164 32 setresuid - sys_setresuid16 165 32 getresuid - sys_getresuid16 @@ -165,7 +165,7 @@ 174 common rt_sigaction sys_rt_sigaction compat_sys_rt_sigaction 175 common rt_sigprocmask sys_rt_sigprocmask compat_sys_rt_sigprocmask 176 common rt_sigpending sys_rt_sigpending compat_sys_rt_sigpending -177 common rt_sigtimedwait sys_rt_sigtimedwait compat_sys_rt_sigtimedwait +177 common rt_sigtimedwait sys_rt_sigtimedwait compat_sys_rt_sigtimedwait_time32 178 common rt_sigqueueinfo sys_rt_sigqueueinfo compat_sys_rt_sigqueueinfo 179 common rt_sigsuspend sys_rt_sigsuspend compat_sys_rt_sigsuspend 180 common pread64 sys_pread64 compat_sys_s390_pread64 @@ -246,13 +246,13 @@ 235 common fremovexattr sys_fremovexattr sys_fremovexattr 236 common gettid sys_gettid sys_gettid 237 common tkill sys_tkill sys_tkill -238 common futex sys_futex compat_sys_futex +238 common futex sys_futex sys_futex_time32 239 common sched_setaffinity sys_sched_setaffinity compat_sys_sched_setaffinity 240 common sched_getaffinity sys_sched_getaffinity compat_sys_sched_getaffinity 241 common tgkill sys_tgkill sys_tgkill 243 common io_setup sys_io_setup compat_sys_io_setup 244 common io_destroy sys_io_destroy sys_io_destroy -245 common io_getevents sys_io_getevents compat_sys_io_getevents +245 common io_getevents sys_io_getevents sys_io_getevents_time32 246 common io_submit sys_io_submit compat_sys_io_submit 247 common io_cancel sys_io_cancel sys_io_cancel 248 common exit_group sys_exit_group sys_exit_group @@ -262,14 +262,14 @@ 252 common set_tid_address sys_set_tid_address sys_set_tid_address 253 common fadvise64 sys_fadvise64_64 compat_sys_s390_fadvise64 254 common timer_create sys_timer_create compat_sys_timer_create -255 common timer_settime sys_timer_settime compat_sys_timer_settime -256 common timer_gettime sys_timer_gettime compat_sys_timer_gettime +255 common timer_settime sys_timer_settime sys_timer_settime32 +256 common timer_gettime sys_timer_gettime sys_timer_gettime32 257 common timer_getoverrun sys_timer_getoverrun sys_timer_getoverrun 258 common timer_delete sys_timer_delete sys_timer_delete -259 common clock_settime sys_clock_settime compat_sys_clock_settime -260 common clock_gettime sys_clock_gettime compat_sys_clock_gettime -261 common clock_getres sys_clock_getres compat_sys_clock_getres -262 common clock_nanosleep sys_clock_nanosleep compat_sys_clock_nanosleep +259 common clock_settime sys_clock_settime sys_clock_settime32 +260 common clock_gettime sys_clock_gettime sys_clock_gettime32 +261 common clock_getres sys_clock_getres sys_clock_getres_time32 +262 common clock_nanosleep sys_clock_nanosleep sys_clock_nanosleep_time32 264 32 fadvise64_64 - compat_sys_s390_fadvise64_64 265 common statfs64 sys_statfs64 compat_sys_statfs64 266 common fstatfs64 sys_fstatfs64 compat_sys_fstatfs64 @@ -279,8 +279,8 @@ 270 common set_mempolicy sys_set_mempolicy compat_sys_set_mempolicy 271 common mq_open sys_mq_open compat_sys_mq_open 272 common mq_unlink sys_mq_unlink sys_mq_unlink -273 common mq_timedsend sys_mq_timedsend compat_sys_mq_timedsend -274 common mq_timedreceive sys_mq_timedreceive compat_sys_mq_timedreceive +273 common mq_timedsend sys_mq_timedsend sys_mq_timedsend_time32 +274 common mq_timedreceive sys_mq_timedreceive sys_mq_timedreceive_time32 275 common mq_notify sys_mq_notify compat_sys_mq_notify 276 common mq_getsetattr sys_mq_getsetattr compat_sys_mq_getsetattr 277 common kexec_load sys_kexec_load compat_sys_kexec_load @@ -298,7 +298,7 @@ 289 common mkdirat sys_mkdirat sys_mkdirat 290 common mknodat sys_mknodat sys_mknodat 291 common fchownat sys_fchownat sys_fchownat -292 common futimesat sys_futimesat compat_sys_futimesat +292 common futimesat sys_futimesat sys_futimesat_time32 293 32 fstatat64 - compat_sys_s390_fstatat64 293 64 newfstatat sys_newfstatat - 294 common unlinkat sys_unlinkat sys_unlinkat @@ -308,8 +308,8 @@ 298 common readlinkat sys_readlinkat sys_readlinkat 299 common fchmodat sys_fchmodat sys_fchmodat 300 common faccessat sys_faccessat sys_faccessat -301 common pselect6 sys_pselect6 compat_sys_pselect6 -302 common ppoll sys_ppoll compat_sys_ppoll +301 common pselect6 sys_pselect6 compat_sys_pselect6_time32 +302 common ppoll sys_ppoll compat_sys_ppoll_time32 303 common unshare sys_unshare sys_unshare 304 common set_robust_list sys_set_robust_list compat_sys_set_robust_list 305 common get_robust_list sys_get_robust_list compat_sys_get_robust_list @@ -320,15 +320,15 @@ 310 common move_pages sys_move_pages compat_sys_move_pages 311 common getcpu sys_getcpu sys_getcpu 312 common epoll_pwait sys_epoll_pwait compat_sys_epoll_pwait -313 common utimes sys_utimes compat_sys_utimes +313 common utimes sys_utimes sys_utimes_time32 314 common fallocate sys_fallocate compat_sys_s390_fallocate -315 common utimensat sys_utimensat compat_sys_utimensat +315 common utimensat sys_utimensat sys_utimensat_time32 316 common signalfd sys_signalfd compat_sys_signalfd 317 common timerfd - - 318 common eventfd sys_eventfd sys_eventfd 319 common timerfd_create sys_timerfd_create sys_timerfd_create -320 common timerfd_settime sys_timerfd_settime compat_sys_timerfd_settime -321 common timerfd_gettime sys_timerfd_gettime compat_sys_timerfd_gettime +320 common timerfd_settime sys_timerfd_settime sys_timerfd_settime32 +321 common timerfd_gettime sys_timerfd_gettime sys_timerfd_gettime32 322 common signalfd4 sys_signalfd4 compat_sys_signalfd4 323 common eventfd2 sys_eventfd2 sys_eventfd2 324 common inotify_init1 sys_inotify_init1 sys_inotify_init1 @@ -344,7 +344,7 @@ 334 common prlimit64 sys_prlimit64 sys_prlimit64 335 common name_to_handle_at sys_name_to_handle_at sys_name_to_handle_at 336 common open_by_handle_at sys_open_by_handle_at compat_sys_open_by_handle_at -337 common clock_adjtime sys_clock_adjtime compat_sys_clock_adjtime +337 common clock_adjtime sys_clock_adjtime sys_clock_adjtime32 338 common syncfs sys_syncfs sys_syncfs 339 common setns sys_setns sys_setns 340 common process_vm_readv sys_process_vm_readv compat_sys_process_vm_readv @@ -364,7 +364,7 @@ 354 common execveat sys_execveat compat_sys_execveat 355 common userfaultfd sys_userfaultfd sys_userfaultfd 356 common membarrier sys_membarrier sys_membarrier -357 common recvmmsg sys_recvmmsg compat_sys_recvmmsg +357 common recvmmsg sys_recvmmsg compat_sys_recvmmsg_time32 358 common sendmmsg sys_sendmmsg compat_sys_sendmmsg 359 common socket sys_socket sys_socket 360 common socketpair sys_socketpair sys_socketpair diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl index e63cd013cc77..7cb05b50aeaa 100644 --- a/arch/sparc/kernel/syscalls/syscall.tbl +++ b/arch/sparc/kernel/syscalls/syscall.tbl @@ -44,7 +44,7 @@ 28 common sigaltstack sys_sigaltstack compat_sys_sigaltstack 29 32 pause sys_pause 29 64 pause sys_nis_syscall -30 common utime sys_utime compat_sys_utime +30 common utime sys_utime sys_utime32 31 32 lchown32 sys_lchown 32 32 fchown32 sys_fchown 33 common access sys_access @@ -128,7 +128,7 @@ 102 common rt_sigaction sys_rt_sigaction compat_sys_rt_sigaction 103 common rt_sigprocmask sys_rt_sigprocmask compat_sys_rt_sigprocmask 104 common rt_sigpending sys_rt_sigpending compat_sys_rt_sigpending -105 common rt_sigtimedwait sys_rt_sigtimedwait compat_sys_rt_sigtimedwait +105 common rt_sigtimedwait sys_rt_sigtimedwait compat_sys_rt_sigtimedwait_time32 106 common rt_sigqueueinfo sys_rt_sigqueueinfo compat_sys_rt_sigqueueinfo 107 common rt_sigsuspend sys_rt_sigsuspend compat_sys_rt_sigsuspend 108 32 setresuid32 sys_setresuid @@ -168,11 +168,11 @@ 135 common socketpair sys_socketpair 136 common mkdir sys_mkdir 137 common rmdir sys_rmdir -138 common utimes sys_utimes compat_sys_utimes +138 common utimes sys_utimes sys_utimes_time32 139 common stat64 sys_stat64 compat_sys_stat64 140 common sendfile64 sys_sendfile64 141 common getpeername sys_getpeername -142 common futex sys_futex compat_sys_futex +142 common futex sys_futex sys_futex_time32 143 common gettid sys_gettid 144 common getrlimit sys_getrlimit compat_sys_getrlimit 145 common setrlimit sys_setrlimit compat_sys_setrlimit @@ -258,7 +258,7 @@ 216 64 sigreturn sys_nis_syscall 217 common clone sys_clone 218 common ioprio_get sys_ioprio_get -219 32 adjtimex sys_adjtimex compat_sys_adjtimex +219 32 adjtimex sys_adjtimex sys_adjtimex_time32 219 64 adjtimex sys_sparc_adjtimex 220 32 sigprocmask sys_sigprocmask compat_sys_sigprocmask 220 64 sigprocmask sys_nis_syscall @@ -272,9 +272,9 @@ 228 common setfsuid sys_setfsuid16 229 common setfsgid sys_setfsgid16 230 common _newselect sys_select compat_sys_select -231 32 time sys_time compat_sys_time +231 32 time sys_time sys_time32 232 common splice sys_splice -233 common stime sys_stime compat_sys_stime +233 common stime sys_stime sys_stime32 234 common statfs64 sys_statfs64 compat_sys_statfs64 235 common fstatfs64 sys_fstatfs64 compat_sys_fstatfs64 236 common _llseek sys_llseek @@ -289,8 +289,8 @@ 245 common sched_yield sys_sched_yield 246 common sched_get_priority_max sys_sched_get_priority_max 247 common sched_get_priority_min sys_sched_get_priority_min -248 common sched_rr_get_interval sys_sched_rr_get_interval compat_sys_sched_rr_get_interval -249 common nanosleep sys_nanosleep compat_sys_nanosleep +248 common sched_rr_get_interval sys_sched_rr_get_interval sys_sched_rr_get_interval_time32 +249 common nanosleep sys_nanosleep sys_nanosleep_time32 250 32 mremap sys_mremap 250 64 mremap sys_64_mremap 251 common _sysctl sys_sysctl compat_sys_sysctl @@ -299,14 +299,14 @@ 254 32 nfsservctl sys_ni_syscall sys_nis_syscall 254 64 nfsservctl sys_nis_syscall 255 common sync_file_range sys_sync_file_range compat_sys_sync_file_range -256 common clock_settime sys_clock_settime compat_sys_clock_settime -257 common clock_gettime sys_clock_gettime compat_sys_clock_gettime -258 common clock_getres sys_clock_getres compat_sys_clock_getres -259 common clock_nanosleep sys_clock_nanosleep compat_sys_clock_nanosleep +256 common clock_settime sys_clock_settime sys_clock_settime32 +257 common clock_gettime sys_clock_gettime sys_clock_gettime32 +258 common clock_getres sys_clock_getres sys_clock_getres_time32 +259 common clock_nanosleep sys_clock_nanosleep sys_clock_nanosleep_time32 260 common sched_getaffinity sys_sched_getaffinity compat_sys_sched_getaffinity 261 common sched_setaffinity sys_sched_setaffinity compat_sys_sched_setaffinity -262 common timer_settime sys_timer_settime compat_sys_timer_settime -263 common timer_gettime sys_timer_gettime compat_sys_timer_gettime +262 common timer_settime sys_timer_settime sys_timer_settime32 +263 common timer_gettime sys_timer_gettime sys_timer_gettime32 264 common timer_getoverrun sys_timer_getoverrun 265 common timer_delete sys_timer_delete 266 common timer_create sys_timer_create compat_sys_timer_create @@ -316,11 +316,11 @@ 269 common io_destroy sys_io_destroy 270 common io_submit sys_io_submit compat_sys_io_submit 271 common io_cancel sys_io_cancel -272 common io_getevents sys_io_getevents compat_sys_io_getevents +272 common io_getevents sys_io_getevents sys_io_getevents_time32 273 common mq_open sys_mq_open compat_sys_mq_open 274 common mq_unlink sys_mq_unlink -275 common mq_timedsend sys_mq_timedsend compat_sys_mq_timedsend -276 common mq_timedreceive sys_mq_timedreceive compat_sys_mq_timedreceive +275 common mq_timedsend sys_mq_timedsend sys_mq_timedsend_time32 +276 common mq_timedreceive sys_mq_timedreceive sys_mq_timedreceive_time32 277 common mq_notify sys_mq_notify compat_sys_mq_notify 278 common mq_getsetattr sys_mq_getsetattr compat_sys_mq_getsetattr 279 common waitid sys_waitid compat_sys_waitid @@ -332,7 +332,7 @@ 285 common mkdirat sys_mkdirat 286 common mknodat sys_mknodat 287 common fchownat sys_fchownat -288 common futimesat sys_futimesat compat_sys_futimesat +288 common futimesat sys_futimesat sys_futimesat_time32 289 common fstatat64 sys_fstatat64 compat_sys_fstatat64 290 common unlinkat sys_unlinkat 291 common renameat sys_renameat @@ -341,8 +341,8 @@ 294 common readlinkat sys_readlinkat 295 common fchmodat sys_fchmodat 296 common faccessat sys_faccessat -297 common pselect6 sys_pselect6 compat_sys_pselect6 -298 common ppoll sys_ppoll compat_sys_ppoll +297 common pselect6 sys_pselect6 compat_sys_pselect6_time32 +298 common ppoll sys_ppoll compat_sys_ppoll_time32 299 common unshare sys_unshare 300 common set_robust_list sys_set_robust_list compat_sys_set_robust_list 301 common get_robust_list sys_get_robust_list compat_sys_get_robust_list @@ -354,13 +354,13 @@ 307 common move_pages sys_move_pages compat_sys_move_pages 308 common getcpu sys_getcpu 309 common epoll_pwait sys_epoll_pwait compat_sys_epoll_pwait -310 common utimensat sys_utimensat compat_sys_utimensat +310 common utimensat sys_utimensat sys_utimensat_time32 311 common signalfd sys_signalfd compat_sys_signalfd 312 common timerfd_create sys_timerfd_create 313 common eventfd sys_eventfd 314 common fallocate sys_fallocate compat_sys_fallocate -315 common timerfd_settime sys_timerfd_settime compat_sys_timerfd_settime -316 common timerfd_gettime sys_timerfd_gettime compat_sys_timerfd_gettime +315 common timerfd_settime sys_timerfd_settime sys_timerfd_settime32 +316 common timerfd_gettime sys_timerfd_gettime sys_timerfd_gettime32 317 common signalfd4 sys_signalfd4 compat_sys_signalfd4 318 common eventfd2 sys_eventfd2 319 common epoll_create1 sys_epoll_create1 @@ -372,13 +372,13 @@ 325 common pwritev sys_pwritev compat_sys_pwritev 326 common rt_tgsigqueueinfo sys_rt_tgsigqueueinfo compat_sys_rt_tgsigqueueinfo 327 common perf_event_open sys_perf_event_open -328 common recvmmsg sys_recvmmsg compat_sys_recvmmsg +328 common recvmmsg sys_recvmmsg compat_sys_recvmmsg_time32 329 common fanotify_init sys_fanotify_init 330 common fanotify_mark sys_fanotify_mark compat_sys_fanotify_mark 331 common prlimit64 sys_prlimit64 332 common name_to_handle_at sys_name_to_handle_at 333 common open_by_handle_at sys_open_by_handle_at compat_sys_open_by_handle_at -334 32 clock_adjtime sys_clock_adjtime compat_sys_clock_adjtime +334 32 clock_adjtime sys_clock_adjtime sys_clock_adjtime32 334 64 clock_adjtime sys_sparc_clock_adjtime 335 common syncfs sys_syncfs 336 common sendmmsg sys_sendmmsg compat_sys_sendmmsg diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index 2be1d0eb7754..7705d5ecad25 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -24,7 +24,7 @@ 10 i386 unlink sys_unlink __ia32_sys_unlink 11 i386 execve sys_execve __ia32_compat_sys_execve 12 i386 chdir sys_chdir __ia32_sys_chdir -13 i386 time sys_time __ia32_compat_sys_time +13 i386 time sys_time __ia32_sys_time32 14 i386 mknod sys_mknod __ia32_sys_mknod 15 i386 chmod sys_chmod __ia32_sys_chmod 16 i386 lchown sys_lchown16 __ia32_sys_lchown16 @@ -36,12 +36,12 @@ 22 i386 umount sys_oldumount __ia32_sys_oldumount 23 i386 setuid sys_setuid16 __ia32_sys_setuid16 24 i386 getuid sys_getuid16 __ia32_sys_getuid16 -25 i386 stime sys_stime __ia32_compat_sys_stime +25 i386 stime sys_stime __ia32_sys_stime32 26 i386 ptrace sys_ptrace __ia32_compat_sys_ptrace 27 i386 alarm sys_alarm __ia32_sys_alarm 28 i386 oldfstat sys_fstat __ia32_sys_fstat 29 i386 pause sys_pause __ia32_sys_pause -30 i386 utime sys_utime __ia32_compat_sys_utime +30 i386 utime sys_utime __ia32_sys_utime32 31 i386 stty 32 i386 gtty 33 i386 access sys_access __ia32_sys_access @@ -135,7 +135,7 @@ 121 i386 setdomainname sys_setdomainname __ia32_sys_setdomainname 122 i386 uname sys_newuname __ia32_sys_newuname 123 i386 modify_ldt sys_modify_ldt __ia32_sys_modify_ldt -124 i386 adjtimex sys_adjtimex __ia32_compat_sys_adjtimex +124 i386 adjtimex sys_adjtimex __ia32_sys_adjtimex_time32 125 i386 mprotect sys_mprotect __ia32_sys_mprotect 126 i386 sigprocmask sys_sigprocmask __ia32_compat_sys_sigprocmask 127 i386 create_module @@ -172,8 +172,8 @@ 158 i386 sched_yield sys_sched_yield __ia32_sys_sched_yield 159 i386 sched_get_priority_max sys_sched_get_priority_max __ia32_sys_sched_get_priority_max 160 i386 sched_get_priority_min sys_sched_get_priority_min __ia32_sys_sched_get_priority_min -161 i386 sched_rr_get_interval sys_sched_rr_get_interval __ia32_compat_sys_sched_rr_get_interval -162 i386 nanosleep sys_nanosleep __ia32_compat_sys_nanosleep +161 i386 sched_rr_get_interval sys_sched_rr_get_interval __ia32_sys_sched_rr_get_interval_time32 +162 i386 nanosleep sys_nanosleep __ia32_sys_nanosleep_time32 163 i386 mremap sys_mremap __ia32_sys_mremap 164 i386 setresuid sys_setresuid16 __ia32_sys_setresuid16 165 i386 getresuid sys_getresuid16 __ia32_sys_getresuid16 @@ -188,7 +188,7 @@ 174 i386 rt_sigaction sys_rt_sigaction __ia32_compat_sys_rt_sigaction 175 i386 rt_sigprocmask sys_rt_sigprocmask __ia32_sys_rt_sigprocmask 176 i386 rt_sigpending sys_rt_sigpending __ia32_compat_sys_rt_sigpending -177 i386 rt_sigtimedwait sys_rt_sigtimedwait __ia32_compat_sys_rt_sigtimedwait +177 i386 rt_sigtimedwait sys_rt_sigtimedwait __ia32_compat_sys_rt_sigtimedwait_time32 178 i386 rt_sigqueueinfo sys_rt_sigqueueinfo __ia32_compat_sys_rt_sigqueueinfo 179 i386 rt_sigsuspend sys_rt_sigsuspend __ia32_sys_rt_sigsuspend 180 i386 pread64 sys_pread64 __ia32_compat_sys_x86_pread @@ -251,14 +251,14 @@ 237 i386 fremovexattr sys_fremovexattr __ia32_sys_fremovexattr 238 i386 tkill sys_tkill __ia32_sys_tkill 239 i386 sendfile64 sys_sendfile64 __ia32_sys_sendfile64 -240 i386 futex sys_futex __ia32_compat_sys_futex +240 i386 futex sys_futex __ia32_sys_futex_time32 241 i386 sched_setaffinity sys_sched_setaffinity __ia32_compat_sys_sched_setaffinity 242 i386 sched_getaffinity sys_sched_getaffinity __ia32_compat_sys_sched_getaffinity 243 i386 set_thread_area sys_set_thread_area __ia32_sys_set_thread_area 244 i386 get_thread_area sys_get_thread_area __ia32_sys_get_thread_area 245 i386 io_setup sys_io_setup __ia32_compat_sys_io_setup 246 i386 io_destroy sys_io_destroy __ia32_sys_io_destroy -247 i386 io_getevents sys_io_getevents __ia32_compat_sys_io_getevents +247 i386 io_getevents sys_io_getevents __ia32_sys_io_getevents_time32 248 i386 io_submit sys_io_submit __ia32_compat_sys_io_submit 249 i386 io_cancel sys_io_cancel __ia32_sys_io_cancel 250 i386 fadvise64 sys_fadvise64 __ia32_compat_sys_x86_fadvise64 @@ -271,18 +271,18 @@ 257 i386 remap_file_pages sys_remap_file_pages __ia32_sys_remap_file_pages 258 i386 set_tid_address sys_set_tid_address __ia32_sys_set_tid_address 259 i386 timer_create sys_timer_create __ia32_compat_sys_timer_create -260 i386 timer_settime sys_timer_settime __ia32_compat_sys_timer_settime -261 i386 timer_gettime sys_timer_gettime __ia32_compat_sys_timer_gettime +260 i386 timer_settime sys_timer_settime __ia32_sys_timer_settime32 +261 i386 timer_gettime sys_timer_gettime __ia32_sys_timer_gettime32 262 i386 timer_getoverrun sys_timer_getoverrun __ia32_sys_timer_getoverrun 263 i386 timer_delete sys_timer_delete __ia32_sys_timer_delete -264 i386 clock_settime sys_clock_settime __ia32_compat_sys_clock_settime -265 i386 clock_gettime sys_clock_gettime __ia32_compat_sys_clock_gettime -266 i386 clock_getres sys_clock_getres __ia32_compat_sys_clock_getres -267 i386 clock_nanosleep sys_clock_nanosleep __ia32_compat_sys_clock_nanosleep +264 i386 clock_settime sys_clock_settime __ia32_sys_clock_settime32 +265 i386 clock_gettime sys_clock_gettime __ia32_sys_clock_gettime32 +266 i386 clock_getres sys_clock_getres __ia32_sys_clock_getres_time32 +267 i386 clock_nanosleep sys_clock_nanosleep __ia32_sys_clock_nanosleep_time32 268 i386 statfs64 sys_statfs64 __ia32_compat_sys_statfs64 269 i386 fstatfs64 sys_fstatfs64 __ia32_compat_sys_fstatfs64 270 i386 tgkill sys_tgkill __ia32_sys_tgkill -271 i386 utimes sys_utimes __ia32_compat_sys_utimes +271 i386 utimes sys_utimes __ia32_sys_utimes_time32 272 i386 fadvise64_64 sys_fadvise64_64 __ia32_compat_sys_x86_fadvise64_64 273 i386 vserver 274 i386 mbind sys_mbind __ia32_sys_mbind @@ -290,8 +290,8 @@ 276 i386 set_mempolicy sys_set_mempolicy __ia32_sys_set_mempolicy 277 i386 mq_open sys_mq_open __ia32_compat_sys_mq_open 278 i386 mq_unlink sys_mq_unlink __ia32_sys_mq_unlink -279 i386 mq_timedsend sys_mq_timedsend __ia32_compat_sys_mq_timedsend -280 i386 mq_timedreceive sys_mq_timedreceive __ia32_compat_sys_mq_timedreceive +279 i386 mq_timedsend sys_mq_timedsend __ia32_sys_mq_timedsend_time32 +280 i386 mq_timedreceive sys_mq_timedreceive __ia32_sys_mq_timedreceive_time32 281 i386 mq_notify sys_mq_notify __ia32_compat_sys_mq_notify 282 i386 mq_getsetattr sys_mq_getsetattr __ia32_compat_sys_mq_getsetattr 283 i386 kexec_load sys_kexec_load __ia32_compat_sys_kexec_load @@ -310,7 +310,7 @@ 296 i386 mkdirat sys_mkdirat __ia32_sys_mkdirat 297 i386 mknodat sys_mknodat __ia32_sys_mknodat 298 i386 fchownat sys_fchownat __ia32_sys_fchownat -299 i386 futimesat sys_futimesat __ia32_compat_sys_futimesat +299 i386 futimesat sys_futimesat __ia32_sys_futimesat_time32 300 i386 fstatat64 sys_fstatat64 __ia32_compat_sys_x86_fstatat 301 i386 unlinkat sys_unlinkat __ia32_sys_unlinkat 302 i386 renameat sys_renameat __ia32_sys_renameat @@ -319,8 +319,8 @@ 305 i386 readlinkat sys_readlinkat __ia32_sys_readlinkat 306 i386 fchmodat sys_fchmodat __ia32_sys_fchmodat 307 i386 faccessat sys_faccessat __ia32_sys_faccessat -308 i386 pselect6 sys_pselect6 __ia32_compat_sys_pselect6 -309 i386 ppoll sys_ppoll __ia32_compat_sys_ppoll +308 i386 pselect6 sys_pselect6 __ia32_compat_sys_pselect6_time32 +309 i386 ppoll sys_ppoll __ia32_compat_sys_ppoll_time32 310 i386 unshare sys_unshare __ia32_sys_unshare 311 i386 set_robust_list sys_set_robust_list __ia32_compat_sys_set_robust_list 312 i386 get_robust_list sys_get_robust_list __ia32_compat_sys_get_robust_list @@ -331,13 +331,13 @@ 317 i386 move_pages sys_move_pages __ia32_compat_sys_move_pages 318 i386 getcpu sys_getcpu __ia32_sys_getcpu 319 i386 epoll_pwait sys_epoll_pwait __ia32_sys_epoll_pwait -320 i386 utimensat sys_utimensat __ia32_compat_sys_utimensat +320 i386 utimensat sys_utimensat __ia32_sys_utimensat_time32 321 i386 signalfd sys_signalfd __ia32_compat_sys_signalfd 322 i386 timerfd_create sys_timerfd_create __ia32_sys_timerfd_create 323 i386 eventfd sys_eventfd __ia32_sys_eventfd 324 i386 fallocate sys_fallocate __ia32_compat_sys_x86_fallocate -325 i386 timerfd_settime sys_timerfd_settime __ia32_compat_sys_timerfd_settime -326 i386 timerfd_gettime sys_timerfd_gettime __ia32_compat_sys_timerfd_gettime +325 i386 timerfd_settime sys_timerfd_settime __ia32_sys_timerfd_settime32 +326 i386 timerfd_gettime sys_timerfd_gettime __ia32_sys_timerfd_gettime32 327 i386 signalfd4 sys_signalfd4 __ia32_compat_sys_signalfd4 328 i386 eventfd2 sys_eventfd2 __ia32_sys_eventfd2 329 i386 epoll_create1 sys_epoll_create1 __ia32_sys_epoll_create1 @@ -348,13 +348,13 @@ 334 i386 pwritev sys_pwritev __ia32_compat_sys_pwritev 335 i386 rt_tgsigqueueinfo sys_rt_tgsigqueueinfo __ia32_compat_sys_rt_tgsigqueueinfo 336 i386 perf_event_open sys_perf_event_open __ia32_sys_perf_event_open -337 i386 recvmmsg sys_recvmmsg __ia32_compat_sys_recvmmsg +337 i386 recvmmsg sys_recvmmsg __ia32_compat_sys_recvmmsg_time32 338 i386 fanotify_init sys_fanotify_init __ia32_sys_fanotify_init 339 i386 fanotify_mark sys_fanotify_mark __ia32_compat_sys_fanotify_mark 340 i386 prlimit64 sys_prlimit64 __ia32_sys_prlimit64 341 i386 name_to_handle_at sys_name_to_handle_at __ia32_sys_name_to_handle_at 342 i386 open_by_handle_at sys_open_by_handle_at __ia32_compat_sys_open_by_handle_at -343 i386 clock_adjtime sys_clock_adjtime __ia32_compat_sys_clock_adjtime +343 i386 clock_adjtime sys_clock_adjtime __ia32_sys_clock_adjtime32 344 i386 syncfs sys_syncfs __ia32_sys_syncfs 345 i386 sendmmsg sys_sendmmsg __ia32_compat_sys_sendmmsg 346 i386 setns sys_setns __ia32_sys_setns diff --git a/fs/aio.c b/fs/aio.c index b906ff70c90f..4394d3fe116a 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -2198,11 +2198,11 @@ SYSCALL_DEFINE6(io_pgetevents_time32, #if defined(CONFIG_COMPAT_32BIT_TIME) -COMPAT_SYSCALL_DEFINE5(io_getevents, compat_aio_context_t, ctx_id, - compat_long_t, min_nr, - compat_long_t, nr, - struct io_event __user *, events, - struct old_timespec32 __user *, timeout) +SYSCALL_DEFINE5(io_getevents_time32, __u32, ctx_id, + __s32, min_nr, + __s32, nr, + struct io_event __user *, events, + struct old_timespec32 __user *, timeout) { struct timespec64 t; int ret; diff --git a/fs/select.c b/fs/select.c index d0f35dbc0e8f..6cbc9ff56ba0 100644 --- a/fs/select.c +++ b/fs/select.c @@ -1379,7 +1379,7 @@ COMPAT_SYSCALL_DEFINE6(pselect6_time64, int, n, compat_ulong_t __user *, inp, #if defined(CONFIG_COMPAT_32BIT_TIME) -COMPAT_SYSCALL_DEFINE6(pselect6, int, n, compat_ulong_t __user *, inp, +COMPAT_SYSCALL_DEFINE6(pselect6_time32, int, n, compat_ulong_t __user *, inp, compat_ulong_t __user *, outp, compat_ulong_t __user *, exp, struct old_timespec32 __user *, tsp, void __user *, sig) { @@ -1402,7 +1402,7 @@ COMPAT_SYSCALL_DEFINE6(pselect6, int, n, compat_ulong_t __user *, inp, #endif #if defined(CONFIG_COMPAT_32BIT_TIME) -COMPAT_SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds, +COMPAT_SYSCALL_DEFINE5(ppoll_time32, struct pollfd __user *, ufds, unsigned int, nfds, struct old_timespec32 __user *, tsp, const compat_sigset_t __user *, sigmask, compat_size_t, sigsetsize) { diff --git a/fs/timerfd.c b/fs/timerfd.c index 803ca070d42e..6a6fc8aa1de7 100644 --- a/fs/timerfd.c +++ b/fs/timerfd.c @@ -560,7 +560,7 @@ SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct __kernel_itimerspec __user *, } #ifdef CONFIG_COMPAT_32BIT_TIME -COMPAT_SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags, +SYSCALL_DEFINE4(timerfd_settime32, int, ufd, int, flags, const struct old_itimerspec32 __user *, utmr, struct old_itimerspec32 __user *, otmr) { @@ -577,7 +577,7 @@ COMPAT_SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags, return ret; } -COMPAT_SYSCALL_DEFINE2(timerfd_gettime, int, ufd, +SYSCALL_DEFINE2(timerfd_gettime32, int, ufd, struct old_itimerspec32 __user *, otmr) { struct itimerspec64 kotmr; diff --git a/fs/utimes.c b/fs/utimes.c index bdcf2daf39c1..350c9c16ace1 100644 --- a/fs/utimes.c +++ b/fs/utimes.c @@ -224,8 +224,8 @@ SYSCALL_DEFINE2(utime, char __user *, filename, struct utimbuf __user *, times) * of sys_utimes. */ #ifdef __ARCH_WANT_SYS_UTIME32 -COMPAT_SYSCALL_DEFINE2(utime, const char __user *, filename, - struct old_utimbuf32 __user *, t) +SYSCALL_DEFINE2(utime32, const char __user *, filename, + struct old_utimbuf32 __user *, t) { struct timespec64 tv[2]; @@ -240,7 +240,7 @@ COMPAT_SYSCALL_DEFINE2(utime, const char __user *, filename, } #endif -COMPAT_SYSCALL_DEFINE4(utimensat, unsigned int, dfd, const char __user *, filename, struct old_timespec32 __user *, t, int, flags) +SYSCALL_DEFINE4(utimensat_time32, unsigned int, dfd, const char __user *, filename, struct old_timespec32 __user *, t, int, flags) { struct timespec64 tv[2]; @@ -276,14 +276,14 @@ static long do_compat_futimesat(unsigned int dfd, const char __user *filename, return do_utimes(dfd, filename, t ? tv : NULL, 0); } -COMPAT_SYSCALL_DEFINE3(futimesat, unsigned int, dfd, +SYSCALL_DEFINE3(futimesat_time32, unsigned int, dfd, const char __user *, filename, struct old_timeval32 __user *, t) { return do_compat_futimesat(dfd, filename, t); } -COMPAT_SYSCALL_DEFINE2(utimes, const char __user *, filename, struct old_timeval32 __user *, t) +SYSCALL_DEFINE2(utimes_time32, const char __user *, filename, struct old_timeval32 __user *, t) { return do_compat_futimesat(AT_FDCWD, filename, t); } diff --git a/include/linux/compat.h b/include/linux/compat.h index 657ca6abd855..ebddcb6cfcf8 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -520,11 +520,6 @@ int __compat_save_altstack(compat_stack_t __user *, unsigned long); asmlinkage long compat_sys_io_setup(unsigned nr_reqs, u32 __user *ctx32p); asmlinkage long compat_sys_io_submit(compat_aio_context_t ctx_id, int nr, u32 __user *iocb); -asmlinkage long compat_sys_io_getevents(compat_aio_context_t ctx_id, - compat_long_t min_nr, - compat_long_t nr, - struct io_event __user *events, - struct old_timespec32 __user *timeout); asmlinkage long compat_sys_io_pgetevents(compat_aio_context_t ctx_id, compat_long_t min_nr, compat_long_t nr, @@ -617,7 +612,7 @@ asmlinkage long compat_sys_sendfile64(int out_fd, int in_fd, compat_loff_t __user *offset, compat_size_t count); /* fs/select.c */ -asmlinkage long compat_sys_pselect6(int n, compat_ulong_t __user *inp, +asmlinkage long compat_sys_pselect6_time32(int n, compat_ulong_t __user *inp, compat_ulong_t __user *outp, compat_ulong_t __user *exp, struct old_timespec32 __user *tsp, @@ -627,7 +622,7 @@ asmlinkage long compat_sys_pselect6_time64(int n, compat_ulong_t __user *inp, compat_ulong_t __user *exp, struct __kernel_timespec __user *tsp, void __user *sig); -asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds, +asmlinkage long compat_sys_ppoll_time32(struct pollfd __user *ufds, unsigned int nfds, struct old_timespec32 __user *tsp, const compat_sigset_t __user *sigmask, @@ -657,19 +652,6 @@ asmlinkage long compat_sys_newfstat(unsigned int fd, /* fs/sync.c: No generic prototype for sync_file_range and sync_file_range2 */ -/* fs/timerfd.c */ -asmlinkage long compat_sys_timerfd_gettime(int ufd, - struct old_itimerspec32 __user *otmr); -asmlinkage long compat_sys_timerfd_settime(int ufd, int flags, - const struct old_itimerspec32 __user *utmr, - struct old_itimerspec32 __user *otmr); - -/* fs/utimes.c */ -asmlinkage long compat_sys_utimensat(unsigned int dfd, - const char __user *filename, - struct old_timespec32 __user *t, - int flags); - /* kernel/exit.c */ asmlinkage long compat_sys_waitid(int, compat_pid_t, struct compat_siginfo __user *, int, @@ -678,9 +660,6 @@ asmlinkage long compat_sys_waitid(int, compat_pid_t, /* kernel/futex.c */ -asmlinkage long compat_sys_futex(u32 __user *uaddr, int op, u32 val, - struct old_timespec32 __user *utime, u32 __user *uaddr2, - u32 val3); asmlinkage long compat_sys_set_robust_list(struct compat_robust_list_head __user *head, compat_size_t len); @@ -688,10 +667,6 @@ asmlinkage long compat_sys_get_robust_list(int pid, compat_uptr_t __user *head_ptr, compat_size_t __user *len_ptr); -/* kernel/hrtimer.c */ -asmlinkage long compat_sys_nanosleep(struct old_timespec32 __user *rqtp, - struct old_timespec32 __user *rmtp); - /* kernel/itimer.c */ asmlinkage long compat_sys_getitimer(int which, struct compat_itimerval __user *it); @@ -709,20 +684,6 @@ asmlinkage long compat_sys_kexec_load(compat_ulong_t entry, asmlinkage long compat_sys_timer_create(clockid_t which_clock, struct compat_sigevent __user *timer_event_spec, timer_t __user *created_timer_id); -asmlinkage long compat_sys_timer_gettime(timer_t timer_id, - struct old_itimerspec32 __user *setting); -asmlinkage long compat_sys_timer_settime(timer_t timer_id, int flags, - struct old_itimerspec32 __user *new, - struct old_itimerspec32 __user *old); -asmlinkage long compat_sys_clock_settime(clockid_t which_clock, - struct old_timespec32 __user *tp); -asmlinkage long compat_sys_clock_gettime(clockid_t which_clock, - struct old_timespec32 __user *tp); -asmlinkage long compat_sys_clock_getres(clockid_t which_clock, - struct old_timespec32 __user *tp); -asmlinkage long compat_sys_clock_nanosleep(clockid_t which_clock, int flags, - struct old_timespec32 __user *rqtp, - struct old_timespec32 __user *rmtp); /* kernel/ptrace.c */ asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid, @@ -735,8 +696,6 @@ asmlinkage long compat_sys_sched_setaffinity(compat_pid_t pid, asmlinkage long compat_sys_sched_getaffinity(compat_pid_t pid, unsigned int len, compat_ulong_t __user *user_mask_ptr); -asmlinkage long compat_sys_sched_rr_get_interval(compat_pid_t pid, - struct old_timespec32 __user *interval); /* kernel/signal.c */ asmlinkage long compat_sys_sigaltstack(const compat_stack_t __user *uss_ptr, @@ -754,7 +713,7 @@ asmlinkage long compat_sys_rt_sigprocmask(int how, compat_sigset_t __user *set, compat_size_t sigsetsize); asmlinkage long compat_sys_rt_sigpending(compat_sigset_t __user *uset, compat_size_t sigsetsize); -asmlinkage long compat_sys_rt_sigtimedwait(compat_sigset_t __user *uthese, +asmlinkage long compat_sys_rt_sigtimedwait_time32(compat_sigset_t __user *uthese, struct compat_siginfo __user *uinfo, struct old_timespec32 __user *uts, compat_size_t sigsetsize); asmlinkage long compat_sys_rt_sigtimedwait_time64(compat_sigset_t __user *uthese, @@ -777,7 +736,6 @@ asmlinkage long compat_sys_gettimeofday(struct old_timeval32 __user *tv, struct timezone __user *tz); asmlinkage long compat_sys_settimeofday(struct old_timeval32 __user *tv, struct timezone __user *tz); -asmlinkage long compat_sys_adjtimex(struct old_timex32 __user *utp); /* kernel/timer.c */ asmlinkage long compat_sys_sysinfo(struct compat_sysinfo __user *info); @@ -786,14 +744,6 @@ asmlinkage long compat_sys_sysinfo(struct compat_sysinfo __user *info); asmlinkage long compat_sys_mq_open(const char __user *u_name, int oflag, compat_mode_t mode, struct compat_mq_attr __user *u_attr); -asmlinkage long compat_sys_mq_timedsend(mqd_t mqdes, - const char __user *u_msg_ptr, - compat_size_t msg_len, unsigned int msg_prio, - const struct old_timespec32 __user *u_abs_timeout); -asmlinkage ssize_t compat_sys_mq_timedreceive(mqd_t mqdes, - char __user *u_msg_ptr, - compat_size_t msg_len, unsigned int __user *u_msg_prio, - const struct old_timespec32 __user *u_abs_timeout); asmlinkage long compat_sys_mq_notify(mqd_t mqdes, const struct compat_sigevent __user *u_notification); asmlinkage long compat_sys_mq_getsetattr(mqd_t mqdes, @@ -809,8 +759,6 @@ asmlinkage long compat_sys_msgsnd(int msqid, compat_uptr_t msgp, /* ipc/sem.c */ asmlinkage long compat_sys_semctl(int semid, int semnum, int cmd, int arg); -asmlinkage long compat_sys_semtimedop(int semid, struct sembuf __user *tsems, - unsigned nsems, const struct old_timespec32 __user *timeout); /* ipc/shm.c */ asmlinkage long compat_sys_shmctl(int first, int second, void __user *uptr); @@ -868,7 +816,7 @@ asmlinkage long compat_sys_rt_tgsigqueueinfo(compat_pid_t tgid, asmlinkage long compat_sys_recvmmsg_time64(int fd, struct compat_mmsghdr __user *mmsg, unsigned vlen, unsigned int flags, struct __kernel_timespec __user *timeout); -asmlinkage long compat_sys_recvmmsg(int fd, struct compat_mmsghdr __user *mmsg, +asmlinkage long compat_sys_recvmmsg_time32(int fd, struct compat_mmsghdr __user *mmsg, unsigned vlen, unsigned int flags, struct old_timespec32 __user *timeout); asmlinkage long compat_sys_wait4(compat_pid_t pid, @@ -879,8 +827,6 @@ asmlinkage long compat_sys_fanotify_mark(int, unsigned int, __u32, __u32, asmlinkage long compat_sys_open_by_handle_at(int mountdirfd, struct file_handle __user *handle, int flags); -asmlinkage long compat_sys_clock_adjtime(clockid_t which_clock, - struct old_timex32 __user *tp); asmlinkage long compat_sys_sendmmsg(int fd, struct compat_mmsghdr __user *mmsg, unsigned vlen, unsigned int flags); asmlinkage ssize_t compat_sys_process_vm_readv(compat_pid_t pid, @@ -921,8 +867,6 @@ asmlinkage long compat_sys_pwritev64v2(unsigned long fd, /* __ARCH_WANT_SYSCALL_NO_AT */ asmlinkage long compat_sys_open(const char __user *filename, int flags, umode_t mode); -asmlinkage long compat_sys_utimes(const char __user *filename, - struct old_timeval32 __user *t); /* __ARCH_WANT_SYSCALL_NO_FLAGS */ asmlinkage long compat_sys_signalfd(int ufd, @@ -936,12 +880,6 @@ asmlinkage long compat_sys_newlstat(const char __user *filename, struct compat_stat __user *statbuf); /* __ARCH_WANT_SYSCALL_DEPRECATED */ -asmlinkage long compat_sys_time(old_time32_t __user *tloc); -asmlinkage long compat_sys_utime(const char __user *filename, - struct old_utimbuf32 __user *t); -asmlinkage long compat_sys_futimesat(unsigned int dfd, - const char __user *filename, - struct old_timeval32 __user *t); asmlinkage long compat_sys_select(int n, compat_ulong_t __user *inp, compat_ulong_t __user *outp, compat_ulong_t __user *exp, struct old_timeval32 __user *tvp); @@ -976,9 +914,6 @@ asmlinkage long compat_sys_sigaction(int sig, struct compat_old_sigaction __user *oact); #endif -/* obsolete: kernel/time/time.c */ -asmlinkage long compat_sys_stime(old_time32_t __user *tptr); - /* obsolete: net/socket.c */ asmlinkage long compat_sys_socketcall(int call, u32 __user *args); diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 09330d5bda0c..94369f5bd8e5 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -297,6 +297,11 @@ asmlinkage long sys_io_getevents(aio_context_t ctx_id, long nr, struct io_event __user *events, struct __kernel_timespec __user *timeout); +asmlinkage long sys_io_getevents_time32(__u32 ctx_id, + __s32 min_nr, + __s32 nr, + struct io_event __user *events, + struct old_timespec32 __user *timeout); asmlinkage long sys_io_pgetevents(aio_context_t ctx_id, long min_nr, long nr, @@ -522,11 +527,19 @@ asmlinkage long sys_timerfd_settime(int ufd, int flags, const struct __kernel_itimerspec __user *utmr, struct __kernel_itimerspec __user *otmr); asmlinkage long sys_timerfd_gettime(int ufd, struct __kernel_itimerspec __user *otmr); +asmlinkage long sys_timerfd_gettime32(int ufd, + struct old_itimerspec32 __user *otmr); +asmlinkage long sys_timerfd_settime32(int ufd, int flags, + const struct old_itimerspec32 __user *utmr, + struct old_itimerspec32 __user *otmr); /* fs/utimes.c */ asmlinkage long sys_utimensat(int dfd, const char __user *filename, struct __kernel_timespec __user *utimes, int flags); +asmlinkage long sys_utimensat_time32(unsigned int dfd, + const char __user *filename, + struct old_timespec32 __user *t, int flags); /* kernel/acct.c */ asmlinkage long sys_acct(const char __user *name); @@ -555,6 +568,9 @@ asmlinkage long sys_unshare(unsigned long unshare_flags); asmlinkage long sys_futex(u32 __user *uaddr, int op, u32 val, struct __kernel_timespec __user *utime, u32 __user *uaddr2, u32 val3); +asmlinkage long sys_futex_time32(u32 __user *uaddr, int op, u32 val, + struct old_timespec32 __user *utime, u32 __user *uaddr2, + u32 val3); asmlinkage long sys_get_robust_list(int pid, struct robust_list_head __user * __user *head_ptr, size_t __user *len_ptr); @@ -564,6 +580,8 @@ asmlinkage long sys_set_robust_list(struct robust_list_head __user *head, /* kernel/hrtimer.c */ asmlinkage long sys_nanosleep(struct __kernel_timespec __user *rqtp, struct __kernel_timespec __user *rmtp); +asmlinkage long sys_nanosleep_time32(struct old_timespec32 __user *rqtp, + struct old_timespec32 __user *rmtp); /* kernel/itimer.c */ asmlinkage long sys_getitimer(int which, struct itimerval __user *value); @@ -602,6 +620,20 @@ asmlinkage long sys_clock_getres(clockid_t which_clock, asmlinkage long sys_clock_nanosleep(clockid_t which_clock, int flags, const struct __kernel_timespec __user *rqtp, struct __kernel_timespec __user *rmtp); +asmlinkage long sys_timer_gettime32(timer_t timer_id, + struct old_itimerspec32 __user *setting); +asmlinkage long sys_timer_settime32(timer_t timer_id, int flags, + struct old_itimerspec32 __user *new, + struct old_itimerspec32 __user *old); +asmlinkage long sys_clock_settime32(clockid_t which_clock, + struct old_timespec32 __user *tp); +asmlinkage long sys_clock_gettime32(clockid_t which_clock, + struct old_timespec32 __user *tp); +asmlinkage long sys_clock_getres_time32(clockid_t which_clock, + struct old_timespec32 __user *tp); +asmlinkage long sys_clock_nanosleep_time32(clockid_t which_clock, int flags, + struct old_timespec32 __user *rqtp, + struct old_timespec32 __user *rmtp); /* kernel/printk.c */ asmlinkage long sys_syslog(int type, char __user *buf, int len); @@ -627,6 +659,8 @@ asmlinkage long sys_sched_get_priority_max(int policy); asmlinkage long sys_sched_get_priority_min(int policy); asmlinkage long sys_sched_rr_get_interval(pid_t pid, struct __kernel_timespec __user *interval); +asmlinkage long sys_sched_rr_get_interval_time32(pid_t pid, + struct old_timespec32 __user *interval); /* kernel/signal.c */ asmlinkage long sys_restart_syscall(void); @@ -696,6 +730,7 @@ asmlinkage long sys_gettimeofday(struct timeval __user *tv, asmlinkage long sys_settimeofday(struct timeval __user *tv, struct timezone __user *tz); asmlinkage long sys_adjtimex(struct __kernel_timex __user *txc_p); +asmlinkage long sys_adjtimex_time32(struct old_timex32 __user *txc_p); /* kernel/timer.c */ asmlinkage long sys_getpid(void); @@ -714,6 +749,14 @@ asmlinkage long sys_mq_timedsend(mqd_t mqdes, const char __user *msg_ptr, size_t asmlinkage long sys_mq_timedreceive(mqd_t mqdes, char __user *msg_ptr, size_t msg_len, unsigned int __user *msg_prio, const struct __kernel_timespec __user *abs_timeout); asmlinkage long sys_mq_notify(mqd_t mqdes, const struct sigevent __user *notification); asmlinkage long sys_mq_getsetattr(mqd_t mqdes, const struct mq_attr __user *mqstat, struct mq_attr __user *omqstat); +asmlinkage long sys_mq_timedreceive_time32(mqd_t mqdes, + char __user *u_msg_ptr, + unsigned int msg_len, unsigned int __user *u_msg_prio, + const struct old_timespec32 __user *u_abs_timeout); +asmlinkage long sys_mq_timedsend_time32(mqd_t mqdes, + const char __user *u_msg_ptr, + unsigned int msg_len, unsigned int msg_prio, + const struct old_timespec32 __user *u_abs_timeout); /* ipc/msg.c */ asmlinkage long sys_msgget(key_t key, int msgflg); @@ -731,6 +774,9 @@ asmlinkage long sys_old_semctl(int semid, int semnum, int cmd, unsigned long arg asmlinkage long sys_semtimedop(int semid, struct sembuf __user *sops, unsigned nsops, const struct __kernel_timespec __user *timeout); +asmlinkage long sys_semtimedop_time32(int semid, struct sembuf __user *sops, + unsigned nsops, + const struct old_timespec32 __user *timeout); asmlinkage long sys_semop(int semid, struct sembuf __user *sops, unsigned nsops); @@ -871,6 +917,8 @@ asmlinkage long sys_open_by_handle_at(int mountdirfd, int flags); asmlinkage long sys_clock_adjtime(clockid_t which_clock, struct __kernel_timex __user *tx); +asmlinkage long sys_clock_adjtime32(clockid_t which_clock, + struct old_timex32 __user *tx); asmlinkage long sys_syncfs(int fd); asmlinkage long sys_setns(int fd, int nstype); asmlinkage long sys_sendmmsg(int fd, struct mmsghdr __user *msg, @@ -1006,6 +1054,7 @@ asmlinkage long sys_alarm(unsigned int seconds); asmlinkage long sys_getpgrp(void); asmlinkage long sys_pause(void); asmlinkage long sys_time(time_t __user *tloc); +asmlinkage long sys_time32(old_time32_t __user *tloc); #ifdef __ARCH_WANT_SYS_UTIME asmlinkage long sys_utime(char __user *filename, struct utimbuf __user *times); @@ -1014,6 +1063,13 @@ asmlinkage long sys_utimes(char __user *filename, asmlinkage long sys_futimesat(int dfd, const char __user *filename, struct timeval __user *utimes); #endif +asmlinkage long sys_futimesat_time32(unsigned int dfd, + const char __user *filename, + struct old_timeval32 __user *t); +asmlinkage long sys_utime32(const char __user *filename, + struct old_utimbuf32 __user *t); +asmlinkage long sys_utimes_time32(const char __user *filename, + struct old_timeval32 __user *t); asmlinkage long sys_creat(const char __user *pathname, umode_t mode); asmlinkage long sys_getdents(unsigned int fd, struct linux_dirent __user *dirent, @@ -1038,6 +1094,7 @@ asmlinkage long sys_fork(void); /* obsolete: kernel/time/time.c */ asmlinkage long sys_stime(time_t __user *tptr); +asmlinkage long sys_stime32(old_time32_t __user *tptr); /* obsolete: kernel/signal.c */ asmlinkage long sys_sigpending(old_sigset_t __user *uset); diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index 509484dbfd5d..153b55b94234 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -39,7 +39,7 @@ __SC_COMP(__NR_io_submit, sys_io_submit, compat_sys_io_submit) #define __NR_io_cancel 3 __SYSCALL(__NR_io_cancel, sys_io_cancel) #define __NR_io_getevents 4 -__SC_COMP(__NR_io_getevents, sys_io_getevents, compat_sys_io_getevents) +__SC_COMP(__NR_io_getevents, sys_io_getevents, sys_io_getevents_time32) /* fs/xattr.c */ #define __NR_setxattr 5 @@ -223,9 +223,9 @@ __SYSCALL(__NR3264_sendfile, sys_sendfile64) /* fs/select.c */ #define __NR_pselect6 72 -__SC_COMP(__NR_pselect6, sys_pselect6, compat_sys_pselect6) +__SC_COMP(__NR_pselect6, sys_pselect6, compat_sys_pselect6_time32) #define __NR_ppoll 73 -__SC_COMP(__NR_ppoll, sys_ppoll, compat_sys_ppoll) +__SC_COMP(__NR_ppoll, sys_ppoll, compat_sys_ppoll_time32) /* fs/signalfd.c */ #define __NR_signalfd4 74 @@ -271,14 +271,14 @@ __SC_COMP(__NR_sync_file_range, sys_sync_file_range, \ __SYSCALL(__NR_timerfd_create, sys_timerfd_create) #define __NR_timerfd_settime 86 __SC_COMP(__NR_timerfd_settime, sys_timerfd_settime, \ - compat_sys_timerfd_settime) + sys_timerfd_settime32) #define __NR_timerfd_gettime 87 __SC_COMP(__NR_timerfd_gettime, sys_timerfd_gettime, \ - compat_sys_timerfd_gettime) + sys_timerfd_gettime32) /* fs/utimes.c */ #define __NR_utimensat 88 -__SC_COMP(__NR_utimensat, sys_utimensat, compat_sys_utimensat) +__SC_COMP(__NR_utimensat, sys_utimensat, sys_utimensat_time32) /* kernel/acct.c */ #define __NR_acct 89 @@ -310,7 +310,7 @@ __SYSCALL(__NR_unshare, sys_unshare) /* kernel/futex.c */ #define __NR_futex 98 -__SC_COMP(__NR_futex, sys_futex, compat_sys_futex) +__SC_COMP(__NR_futex, sys_futex, sys_futex_time32) #define __NR_set_robust_list 99 __SC_COMP(__NR_set_robust_list, sys_set_robust_list, \ compat_sys_set_robust_list) @@ -320,7 +320,7 @@ __SC_COMP(__NR_get_robust_list, sys_get_robust_list, \ /* kernel/hrtimer.c */ #define __NR_nanosleep 101 -__SC_COMP(__NR_nanosleep, sys_nanosleep, compat_sys_nanosleep) +__SC_COMP(__NR_nanosleep, sys_nanosleep, sys_nanosleep_time32) /* kernel/itimer.c */ #define __NR_getitimer 102 @@ -342,22 +342,22 @@ __SYSCALL(__NR_delete_module, sys_delete_module) #define __NR_timer_create 107 __SC_COMP(__NR_timer_create, sys_timer_create, compat_sys_timer_create) #define __NR_timer_gettime 108 -__SC_COMP(__NR_timer_gettime, sys_timer_gettime, compat_sys_timer_gettime) +__SC_COMP(__NR_timer_gettime, sys_timer_gettime, sys_timer_gettime32) #define __NR_timer_getoverrun 109 __SYSCALL(__NR_timer_getoverrun, sys_timer_getoverrun) #define __NR_timer_settime 110 -__SC_COMP(__NR_timer_settime, sys_timer_settime, compat_sys_timer_settime) +__SC_COMP(__NR_timer_settime, sys_timer_settime, sys_timer_settime32) #define __NR_timer_delete 111 __SYSCALL(__NR_timer_delete, sys_timer_delete) #define __NR_clock_settime 112 -__SC_COMP(__NR_clock_settime, sys_clock_settime, compat_sys_clock_settime) +__SC_COMP(__NR_clock_settime, sys_clock_settime, sys_clock_settime32) #define __NR_clock_gettime 113 -__SC_COMP(__NR_clock_gettime, sys_clock_gettime, compat_sys_clock_gettime) +__SC_COMP(__NR_clock_gettime, sys_clock_gettime, sys_clock_gettime32) #define __NR_clock_getres 114 -__SC_COMP(__NR_clock_getres, sys_clock_getres, compat_sys_clock_getres) +__SC_COMP(__NR_clock_getres, sys_clock_getres, sys_clock_getres_time32) #define __NR_clock_nanosleep 115 __SC_COMP(__NR_clock_nanosleep, sys_clock_nanosleep, \ - compat_sys_clock_nanosleep) + sys_clock_nanosleep_time32) /* kernel/printk.c */ #define __NR_syslog 116 @@ -390,7 +390,7 @@ __SYSCALL(__NR_sched_get_priority_max, sys_sched_get_priority_max) __SYSCALL(__NR_sched_get_priority_min, sys_sched_get_priority_min) #define __NR_sched_rr_get_interval 127 __SC_COMP(__NR_sched_rr_get_interval, sys_sched_rr_get_interval, \ - compat_sys_sched_rr_get_interval) + sys_sched_rr_get_interval_time32) /* kernel/signal.c */ #define __NR_restart_syscall 128 @@ -413,7 +413,7 @@ __SC_COMP(__NR_rt_sigprocmask, sys_rt_sigprocmask, compat_sys_rt_sigprocmask) __SC_COMP(__NR_rt_sigpending, sys_rt_sigpending, compat_sys_rt_sigpending) #define __NR_rt_sigtimedwait 137 __SC_COMP(__NR_rt_sigtimedwait, sys_rt_sigtimedwait, \ - compat_sys_rt_sigtimedwait) + compat_sys_rt_sigtimedwait_time32) #define __NR_rt_sigqueueinfo 138 __SC_COMP(__NR_rt_sigqueueinfo, sys_rt_sigqueueinfo, \ compat_sys_rt_sigqueueinfo) @@ -486,7 +486,7 @@ __SC_COMP(__NR_gettimeofday, sys_gettimeofday, compat_sys_gettimeofday) #define __NR_settimeofday 170 __SC_COMP(__NR_settimeofday, sys_settimeofday, compat_sys_settimeofday) #define __NR_adjtimex 171 -__SC_COMP(__NR_adjtimex, sys_adjtimex, compat_sys_adjtimex) +__SC_COMP(__NR_adjtimex, sys_adjtimex, sys_adjtimex_time32) /* kernel/timer.c */ #define __NR_getpid 172 @@ -512,10 +512,10 @@ __SC_COMP(__NR_mq_open, sys_mq_open, compat_sys_mq_open) #define __NR_mq_unlink 181 __SYSCALL(__NR_mq_unlink, sys_mq_unlink) #define __NR_mq_timedsend 182 -__SC_COMP(__NR_mq_timedsend, sys_mq_timedsend, compat_sys_mq_timedsend) +__SC_COMP(__NR_mq_timedsend, sys_mq_timedsend, sys_mq_timedsend_time32) #define __NR_mq_timedreceive 183 __SC_COMP(__NR_mq_timedreceive, sys_mq_timedreceive, \ - compat_sys_mq_timedreceive) + sys_mq_timedreceive_time32) #define __NR_mq_notify 184 __SC_COMP(__NR_mq_notify, sys_mq_notify, compat_sys_mq_notify) #define __NR_mq_getsetattr 185 @@ -537,7 +537,7 @@ __SYSCALL(__NR_semget, sys_semget) #define __NR_semctl 191 __SC_COMP(__NR_semctl, sys_semctl, compat_sys_semctl) #define __NR_semtimedop 192 -__SC_COMP(__NR_semtimedop, sys_semtimedop, compat_sys_semtimedop) +__SC_COMP(__NR_semtimedop, sys_semtimedop, sys_semtimedop_time32) #define __NR_semop 193 __SYSCALL(__NR_semop, sys_semop) @@ -659,7 +659,7 @@ __SYSCALL(__NR_perf_event_open, sys_perf_event_open) #define __NR_accept4 242 __SYSCALL(__NR_accept4, sys_accept4) #define __NR_recvmmsg 243 -__SC_COMP(__NR_recvmmsg, sys_recvmmsg, compat_sys_recvmmsg) +__SC_COMP(__NR_recvmmsg, sys_recvmmsg, compat_sys_recvmmsg_time32) /* * Architectures may provide up to 16 syscalls of their own @@ -681,7 +681,7 @@ __SYSCALL(__NR_name_to_handle_at, sys_name_to_handle_at) __SC_COMP(__NR_open_by_handle_at, sys_open_by_handle_at, \ compat_sys_open_by_handle_at) #define __NR_clock_adjtime 266 -__SC_COMP(__NR_clock_adjtime, sys_clock_adjtime, compat_sys_clock_adjtime) +__SC_COMP(__NR_clock_adjtime, sys_clock_adjtime, sys_clock_adjtime32) #define __NR_syncfs 267 __SYSCALL(__NR_syncfs, sys_syncfs) #define __NR_setns 268 diff --git a/ipc/mqueue.c b/ipc/mqueue.c index c595bed7bfcb..c839bf83231d 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -1471,10 +1471,10 @@ static int compat_prepare_timeout(const struct old_timespec32 __user *p, return 0; } -COMPAT_SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, - const char __user *, u_msg_ptr, - compat_size_t, msg_len, unsigned int, msg_prio, - const struct old_timespec32 __user *, u_abs_timeout) +SYSCALL_DEFINE5(mq_timedsend_time32, mqd_t, mqdes, + const char __user *, u_msg_ptr, + unsigned int, msg_len, unsigned int, msg_prio, + const struct old_timespec32 __user *, u_abs_timeout) { struct timespec64 ts, *p = NULL; if (u_abs_timeout) { @@ -1486,10 +1486,10 @@ COMPAT_SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, return do_mq_timedsend(mqdes, u_msg_ptr, msg_len, msg_prio, p); } -COMPAT_SYSCALL_DEFINE5(mq_timedreceive, mqd_t, mqdes, - char __user *, u_msg_ptr, - compat_size_t, msg_len, unsigned int __user *, u_msg_prio, - const struct old_timespec32 __user *, u_abs_timeout) +SYSCALL_DEFINE5(mq_timedreceive_time32, mqd_t, mqdes, + char __user *, u_msg_ptr, + unsigned int, msg_len, unsigned int __user *, u_msg_prio, + const struct old_timespec32 __user *, u_abs_timeout) { struct timespec64 ts, *p = NULL; if (u_abs_timeout) { diff --git a/ipc/sem.c b/ipc/sem.c index d1efff3a81bb..80909464acff 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -2250,7 +2250,7 @@ long compat_ksys_semtimedop(int semid, struct sembuf __user *tsems, return do_semtimedop(semid, tsems, nsops, NULL); } -COMPAT_SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsems, +SYSCALL_DEFINE4(semtimedop_time32, int, semid, struct sembuf __user *, tsems, unsigned int, nsops, const struct old_timespec32 __user *, timeout) { diff --git a/kernel/futex.c b/kernel/futex.c index be3bff2315ff..caead6c113d4 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -3812,7 +3812,7 @@ err_unlock: #endif /* CONFIG_COMPAT */ #ifdef CONFIG_COMPAT_32BIT_TIME -COMPAT_SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, +SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val, struct old_timespec32 __user *, utime, u32 __user *, uaddr2, u32, val3) { diff --git a/kernel/sched/core.c b/kernel/sched/core.c index a674c7db2f29..62862419cd05 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5252,9 +5252,8 @@ SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid, } #ifdef CONFIG_COMPAT_32BIT_TIME -COMPAT_SYSCALL_DEFINE2(sched_rr_get_interval, - compat_pid_t, pid, - struct old_timespec32 __user *, interval) +SYSCALL_DEFINE2(sched_rr_get_interval_time32, pid_t, pid, + struct old_timespec32 __user *, interval) { struct timespec64 t; int retval = sched_rr_get_interval(pid, &t); diff --git a/kernel/signal.c b/kernel/signal.c index e1d7ad8e6ab1..af27629918cf 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -3397,7 +3397,7 @@ COMPAT_SYSCALL_DEFINE4(rt_sigtimedwait_time64, compat_sigset_t __user *, uthese, } #ifdef CONFIG_COMPAT_32BIT_TIME -COMPAT_SYSCALL_DEFINE4(rt_sigtimedwait, compat_sigset_t __user *, uthese, +COMPAT_SYSCALL_DEFINE4(rt_sigtimedwait_time32, compat_sigset_t __user *, uthese, struct compat_siginfo __user *, uinfo, struct old_timespec32 __user *, uts, compat_size_t, sigsetsize) { diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index ce04431a40d1..85e5ccec0955 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -42,9 +42,11 @@ COND_SYSCALL(io_destroy); COND_SYSCALL(io_submit); COND_SYSCALL_COMPAT(io_submit); COND_SYSCALL(io_cancel); +COND_SYSCALL(io_getevents_time32); COND_SYSCALL(io_getevents); +COND_SYSCALL(io_pgetevents_time32); COND_SYSCALL(io_pgetevents); -COND_SYSCALL_COMPAT(io_getevents); +COND_SYSCALL_COMPAT(io_pgetevents_time32); COND_SYSCALL_COMPAT(io_pgetevents); /* fs/xattr.c */ @@ -114,9 +116,9 @@ COND_SYSCALL_COMPAT(signalfd4); /* fs/timerfd.c */ COND_SYSCALL(timerfd_create); COND_SYSCALL(timerfd_settime); -COND_SYSCALL_COMPAT(timerfd_settime); +COND_SYSCALL(timerfd_settime32); COND_SYSCALL(timerfd_gettime); -COND_SYSCALL_COMPAT(timerfd_gettime); +COND_SYSCALL(timerfd_gettime32); /* fs/utimes.c */ @@ -135,7 +137,7 @@ COND_SYSCALL(capset); /* kernel/futex.c */ COND_SYSCALL(futex); -COND_SYSCALL_COMPAT(futex); +COND_SYSCALL(futex_time32); COND_SYSCALL(set_robust_list); COND_SYSCALL_COMPAT(set_robust_list); COND_SYSCALL(get_robust_list); @@ -187,9 +189,9 @@ COND_SYSCALL(mq_open); COND_SYSCALL_COMPAT(mq_open); COND_SYSCALL(mq_unlink); COND_SYSCALL(mq_timedsend); -COND_SYSCALL_COMPAT(mq_timedsend); +COND_SYSCALL(mq_timedsend_time32); COND_SYSCALL(mq_timedreceive); -COND_SYSCALL_COMPAT(mq_timedreceive); +COND_SYSCALL(mq_timedreceive_time32); COND_SYSCALL(mq_notify); COND_SYSCALL_COMPAT(mq_notify); COND_SYSCALL(mq_getsetattr); @@ -211,7 +213,7 @@ COND_SYSCALL(old_semctl); COND_SYSCALL(semctl); COND_SYSCALL_COMPAT(semctl); COND_SYSCALL(semtimedop); -COND_SYSCALL_COMPAT(semtimedop); +COND_SYSCALL(semtimedop_time32); COND_SYSCALL(semop); /* ipc/shm.c */ @@ -288,7 +290,7 @@ COND_SYSCALL(perf_event_open); COND_SYSCALL(accept4); COND_SYSCALL(recvmmsg); COND_SYSCALL(recvmmsg_time32); -COND_SYSCALL_COMPAT(recvmmsg); +COND_SYSCALL_COMPAT(recvmmsg_time32); COND_SYSCALL_COMPAT(recvmmsg_time64); /* diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index f5cfa1b73d6f..0f5f96075110 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -1771,7 +1771,7 @@ SYSCALL_DEFINE2(nanosleep, struct __kernel_timespec __user *, rqtp, #ifdef CONFIG_COMPAT_32BIT_TIME -COMPAT_SYSCALL_DEFINE2(nanosleep, struct old_timespec32 __user *, rqtp, +SYSCALL_DEFINE2(nanosleep_time32, struct old_timespec32 __user *, rqtp, struct old_timespec32 __user *, rmtp) { struct timespec64 tu; diff --git a/kernel/time/posix-stubs.c b/kernel/time/posix-stubs.c index a51895486e5e..67df65f887ac 100644 --- a/kernel/time/posix-stubs.c +++ b/kernel/time/posix-stubs.c @@ -45,6 +45,7 @@ SYS_NI(timer_delete); SYS_NI(clock_adjtime); SYS_NI(getitimer); SYS_NI(setitimer); +SYS_NI(clock_adjtime32); #ifdef __ARCH_WANT_SYS_ALARM SYS_NI(alarm); #endif @@ -150,16 +151,16 @@ SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags, #ifdef CONFIG_COMPAT COMPAT_SYS_NI(timer_create); -COMPAT_SYS_NI(clock_adjtime); -COMPAT_SYS_NI(timer_settime); -COMPAT_SYS_NI(timer_gettime); COMPAT_SYS_NI(getitimer); COMPAT_SYS_NI(setitimer); #endif #ifdef CONFIG_COMPAT_32BIT_TIME -COMPAT_SYSCALL_DEFINE2(clock_settime, const clockid_t, which_clock, - struct old_timespec32 __user *, tp) +SYS_NI(timer_settime32); +SYS_NI(timer_gettime32); + +SYSCALL_DEFINE2(clock_settime32, const clockid_t, which_clock, + struct old_timespec32 __user *, tp) { struct timespec64 new_tp; @@ -171,8 +172,8 @@ COMPAT_SYSCALL_DEFINE2(clock_settime, const clockid_t, which_clock, return do_sys_settimeofday64(&new_tp, NULL); } -COMPAT_SYSCALL_DEFINE2(clock_gettime, clockid_t, which_clock, - struct old_timespec32 __user *, tp) +SYSCALL_DEFINE2(clock_gettime32, clockid_t, which_clock, + struct old_timespec32 __user *, tp) { int ret; struct timespec64 kernel_tp; @@ -186,8 +187,8 @@ COMPAT_SYSCALL_DEFINE2(clock_gettime, clockid_t, which_clock, return 0; } -COMPAT_SYSCALL_DEFINE2(clock_getres, clockid_t, which_clock, - struct old_timespec32 __user *, tp) +SYSCALL_DEFINE2(clock_getres_time32, clockid_t, which_clock, + struct old_timespec32 __user *, tp) { struct timespec64 rtn_tp = { .tv_sec = 0, @@ -206,9 +207,9 @@ COMPAT_SYSCALL_DEFINE2(clock_getres, clockid_t, which_clock, } } -COMPAT_SYSCALL_DEFINE4(clock_nanosleep, clockid_t, which_clock, int, flags, - struct old_timespec32 __user *, rqtp, - struct old_timespec32 __user *, rmtp) +SYSCALL_DEFINE4(clock_nanosleep_time32, clockid_t, which_clock, int, flags, + struct old_timespec32 __user *, rqtp, + struct old_timespec32 __user *, rmtp) { struct timespec64 t; diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index de79f85ae14f..29176635991f 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -730,8 +730,8 @@ SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id, #ifdef CONFIG_COMPAT_32BIT_TIME -COMPAT_SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id, - struct old_itimerspec32 __user *, setting) +SYSCALL_DEFINE2(timer_gettime32, timer_t, timer_id, + struct old_itimerspec32 __user *, setting) { struct itimerspec64 cur_setting; @@ -903,9 +903,9 @@ SYSCALL_DEFINE4(timer_settime, timer_t, timer_id, int, flags, } #ifdef CONFIG_COMPAT_32BIT_TIME -COMPAT_SYSCALL_DEFINE4(timer_settime, timer_t, timer_id, int, flags, - struct old_itimerspec32 __user *, new, - struct old_itimerspec32 __user *, old) +SYSCALL_DEFINE4(timer_settime32, timer_t, timer_id, int, flags, + struct old_itimerspec32 __user *, new, + struct old_itimerspec32 __user *, old) { struct itimerspec64 new_spec, old_spec; struct itimerspec64 *rtn = old ? &old_spec : NULL; @@ -1096,8 +1096,8 @@ SYSCALL_DEFINE2(clock_getres, const clockid_t, which_clock, #ifdef CONFIG_COMPAT_32BIT_TIME -COMPAT_SYSCALL_DEFINE2(clock_settime, clockid_t, which_clock, - struct old_timespec32 __user *, tp) +SYSCALL_DEFINE2(clock_settime32, clockid_t, which_clock, + struct old_timespec32 __user *, tp) { const struct k_clock *kc = clockid_to_kclock(which_clock); struct timespec64 ts; @@ -1111,8 +1111,8 @@ COMPAT_SYSCALL_DEFINE2(clock_settime, clockid_t, which_clock, return kc->clock_set(which_clock, &ts); } -COMPAT_SYSCALL_DEFINE2(clock_gettime, clockid_t, which_clock, - struct old_timespec32 __user *, tp) +SYSCALL_DEFINE2(clock_gettime32, clockid_t, which_clock, + struct old_timespec32 __user *, tp) { const struct k_clock *kc = clockid_to_kclock(which_clock); struct timespec64 ts; @@ -1129,8 +1129,8 @@ COMPAT_SYSCALL_DEFINE2(clock_gettime, clockid_t, which_clock, return err; } -COMPAT_SYSCALL_DEFINE2(clock_adjtime, clockid_t, which_clock, - struct old_timex32 __user *, utp) +SYSCALL_DEFINE2(clock_adjtime32, clockid_t, which_clock, + struct old_timex32 __user *, utp) { struct __kernel_timex ktx; int err; @@ -1147,8 +1147,8 @@ COMPAT_SYSCALL_DEFINE2(clock_adjtime, clockid_t, which_clock, return err; } -COMPAT_SYSCALL_DEFINE2(clock_getres, clockid_t, which_clock, - struct old_timespec32 __user *, tp) +SYSCALL_DEFINE2(clock_getres_time32, clockid_t, which_clock, + struct old_timespec32 __user *, tp) { const struct k_clock *kc = clockid_to_kclock(which_clock); struct timespec64 ts; @@ -1204,9 +1204,9 @@ SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags, #ifdef CONFIG_COMPAT_32BIT_TIME -COMPAT_SYSCALL_DEFINE4(clock_nanosleep, clockid_t, which_clock, int, flags, - struct old_timespec32 __user *, rqtp, - struct old_timespec32 __user *, rmtp) +SYSCALL_DEFINE4(clock_nanosleep_time32, clockid_t, which_clock, int, flags, + struct old_timespec32 __user *, rqtp, + struct old_timespec32 __user *, rmtp) { const struct k_clock *kc = clockid_to_kclock(which_clock); struct timespec64 t; diff --git a/kernel/time/time.c b/kernel/time/time.c index 78b5c8f1495a..6261f969dcb7 100644 --- a/kernel/time/time.c +++ b/kernel/time/time.c @@ -98,11 +98,11 @@ SYSCALL_DEFINE1(stime, time_t __user *, tptr) #endif /* __ARCH_WANT_SYS_TIME */ -#ifdef CONFIG_COMPAT +#ifdef CONFIG_COMPAT_32BIT_TIME #ifdef __ARCH_WANT_COMPAT_SYS_TIME /* old_time32_t is a 32 bit "long" and needs to get converted. */ -COMPAT_SYSCALL_DEFINE1(time, old_time32_t __user *, tloc) +SYSCALL_DEFINE1(time32, old_time32_t __user *, tloc) { old_time32_t i; @@ -116,7 +116,7 @@ COMPAT_SYSCALL_DEFINE1(time, old_time32_t __user *, tloc) return i; } -COMPAT_SYSCALL_DEFINE1(stime, old_time32_t __user *, tptr) +SYSCALL_DEFINE1(stime32, old_time32_t __user *, tptr) { struct timespec64 tv; int err; @@ -344,7 +344,7 @@ int put_old_timex32(struct old_timex32 __user *utp, const struct __kernel_timex return 0; } -COMPAT_SYSCALL_DEFINE1(adjtimex, struct old_timex32 __user *, utp) +SYSCALL_DEFINE1(adjtimex_time32, struct old_timex32 __user *, utp) { struct __kernel_timex txc; int err, ret; diff --git a/net/compat.c b/net/compat.c index 959d1c51826d..2fef7b9db434 100644 --- a/net/compat.c +++ b/net/compat.c @@ -822,7 +822,7 @@ COMPAT_SYSCALL_DEFINE5(recvmmsg_time64, int, fd, struct compat_mmsghdr __user *, } #ifdef CONFIG_COMPAT_32BIT_TIME -COMPAT_SYSCALL_DEFINE5(recvmmsg, int, fd, struct compat_mmsghdr __user *, mmsg, +COMPAT_SYSCALL_DEFINE5(recvmmsg_time32, int, fd, struct compat_mmsghdr __user *, mmsg, unsigned int, vlen, unsigned int, flags, struct old_timespec32 __user *, timeout) { -- cgit v1.2.3 From c70a772fda11570ebddecbce1543a3fda008db4a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 7 Jan 2019 00:00:34 +0100 Subject: y2038: remove struct definition redirects We now use 64-bit time_t on all architectures, so the __kernel_timex, __kernel_timeval and __kernel_timespec redirects can be removed after having served their purpose. This makes it all much less confusing, as the __kernel_* types now always refer to the same layout based on 64-bit time_t across all 32-bit and 64-bit architectures. Signed-off-by: Arnd Bergmann --- include/linux/time64.h | 8 -------- include/linux/timex.h | 7 ------- include/uapi/linux/time.h | 4 ---- include/uapi/linux/timex.h | 2 -- 4 files changed, 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/time64.h b/include/linux/time64.h index 05634afba0db..f38d382ffec1 100644 --- a/include/linux/time64.h +++ b/include/linux/time64.h @@ -7,14 +7,6 @@ typedef __s64 time64_t; typedef __u64 timeu64_t; -/* CONFIG_64BIT_TIME enables new 64 bit time_t syscalls in the compat path - * and 32-bit emulation. - */ -#ifndef CONFIG_64BIT_TIME -#define __kernel_timespec timespec -#define __kernel_itimerspec itimerspec -#endif - #include struct timespec64 { diff --git a/include/linux/timex.h b/include/linux/timex.h index 4aff9f0d1367..ce0859763670 100644 --- a/include/linux/timex.h +++ b/include/linux/timex.h @@ -53,13 +53,6 @@ #ifndef _LINUX_TIMEX_H #define _LINUX_TIMEX_H -/* CONFIG_64BIT_TIME enables new 64 bit time_t syscalls in the compat path - * and 32-bit emulation. - */ -#ifndef CONFIG_64BIT_TIME -#define __kernel_timex timex -#endif - #include #define ADJ_ADJTIME 0x8000 /* switch between adjtime/adjtimex modes */ diff --git a/include/uapi/linux/time.h b/include/uapi/linux/time.h index 6b56a2208be7..b03f8717c312 100644 --- a/include/uapi/linux/time.h +++ b/include/uapi/linux/time.h @@ -42,19 +42,15 @@ struct itimerval { struct timeval it_value; /* current value */ }; -#ifndef __kernel_timespec struct __kernel_timespec { __kernel_time64_t tv_sec; /* seconds */ long long tv_nsec; /* nanoseconds */ }; -#endif -#ifndef __kernel_itimerspec struct __kernel_itimerspec { struct __kernel_timespec it_interval; /* timer period */ struct __kernel_timespec it_value; /* timer expiration */ }; -#endif /* * legacy timeval structure, only embedded in structures that diff --git a/include/uapi/linux/timex.h b/include/uapi/linux/timex.h index a1c6b73016a5..9f517f9010bb 100644 --- a/include/uapi/linux/timex.h +++ b/include/uapi/linux/timex.h @@ -97,7 +97,6 @@ struct __kernel_timex_timeval { long long tv_usec; }; -#ifndef __kernel_timex struct __kernel_timex { unsigned int modes; /* mode selector */ int :32; /* pad */ @@ -131,7 +130,6 @@ struct __kernel_timex { int :32; int :32; int :32; int :32; int :32; int :32; int :32; }; -#endif /* * Mode codes (timex.mode) -- cgit v1.2.3 From a4f342b9607d8c2034d3135cbbb11b4028be3678 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Mon, 4 Feb 2019 11:09:48 +0000 Subject: PM / OPP: Introduce a power estimation helper The Energy Model (EM) framework provides an API to let drivers register the active power of CPUs. The drivers are expected to provide a callback method which estimates the power consumed by a CPU at each available performance levels. How exactly this should be implemented, however, depends on the platform. On some systems, PM_OPP knows the voltage and frequency at which CPUs can run. When coupled with the CPU 'capacitance' (as provided by the 'dynamic-power-coefficient' devicetree binding), it is possible to estimate the dynamic power consumption of a CPU as P = C * V^2 * f, with C its capacitance and V and f respectively the voltage and frequency of the OPP. The Intelligent Power Allocator (IPA) thermal governor already implements that estimation method, in the thermal framework. However, this power estimation method can be applied to any platform where all the parameters are known (C, V and f), and not only those suffering thermal issues. As such, the code implementing this feature can be re-used to also populate the EM framework now used by EAS. As a first step, introduce in PM_OPP a helper function which CPUFreq drivers can use to register into the EM framework. This duplicates the power estimation done in IPA until it can be migrated to using the EM framework. This will be done later, once the EM framework has support for at least all platforms currently supported by IPA. Signed-off-by: Quentin Perret Tested-by: Matthias Kaehlcke Reviewed-by: Matthias Kaehlcke Signed-off-by: Viresh Kumar --- drivers/opp/of.c | 99 ++++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/pm_opp.h | 6 +++ 2 files changed, 105 insertions(+) (limited to 'include/linux') diff --git a/drivers/opp/of.c b/drivers/opp/of.c index 06f0f632ec47..cd58959e5158 100644 --- a/drivers/opp/of.c +++ b/drivers/opp/of.c @@ -20,6 +20,7 @@ #include #include #include +#include #include "opp.h" @@ -1047,3 +1048,101 @@ struct device_node *dev_pm_opp_get_of_node(struct dev_pm_opp *opp) return of_node_get(opp->np); } EXPORT_SYMBOL_GPL(dev_pm_opp_get_of_node); + +/* + * Callback function provided to the Energy Model framework upon registration. + * This computes the power estimated by @CPU at @kHz if it is the frequency + * of an existing OPP, or at the frequency of the first OPP above @kHz otherwise + * (see dev_pm_opp_find_freq_ceil()). This function updates @kHz to the ceiled + * frequency and @mW to the associated power. The power is estimated as + * P = C * V^2 * f with C being the CPU's capacitance and V and f respectively + * the voltage and frequency of the OPP. + * + * Returns -ENODEV if the CPU device cannot be found, -EINVAL if the power + * calculation failed because of missing parameters, 0 otherwise. + */ +static int __maybe_unused _get_cpu_power(unsigned long *mW, unsigned long *kHz, + int cpu) +{ + struct device *cpu_dev; + struct dev_pm_opp *opp; + struct device_node *np; + unsigned long mV, Hz; + u32 cap; + u64 tmp; + int ret; + + cpu_dev = get_cpu_device(cpu); + if (!cpu_dev) + return -ENODEV; + + np = of_node_get(cpu_dev->of_node); + if (!np) + return -EINVAL; + + ret = of_property_read_u32(np, "dynamic-power-coefficient", &cap); + of_node_put(np); + if (ret) + return -EINVAL; + + Hz = *kHz * 1000; + opp = dev_pm_opp_find_freq_ceil(cpu_dev, &Hz); + if (IS_ERR(opp)) + return -EINVAL; + + mV = dev_pm_opp_get_voltage(opp) / 1000; + dev_pm_opp_put(opp); + if (!mV) + return -EINVAL; + + tmp = (u64)cap * mV * mV * (Hz / 1000000); + do_div(tmp, 1000000000); + + *mW = (unsigned long)tmp; + *kHz = Hz / 1000; + + return 0; +} + +/** + * dev_pm_opp_of_register_em() - Attempt to register an Energy Model + * @cpus : CPUs for which an Energy Model has to be registered + * + * This checks whether the "dynamic-power-coefficient" devicetree property has + * been specified, and tries to register an Energy Model with it if it has. + */ +void dev_pm_opp_of_register_em(struct cpumask *cpus) +{ + struct em_data_callback em_cb = EM_DATA_CB(_get_cpu_power); + int ret, nr_opp, cpu = cpumask_first(cpus); + struct device *cpu_dev; + struct device_node *np; + u32 cap; + + cpu_dev = get_cpu_device(cpu); + if (!cpu_dev) + return; + + nr_opp = dev_pm_opp_get_opp_count(cpu_dev); + if (nr_opp <= 0) + return; + + np = of_node_get(cpu_dev->of_node); + if (!np) + return; + + /* + * Register an EM only if the 'dynamic-power-coefficient' property is + * set in devicetree. It is assumed the voltage values are known if that + * property is set since it is useless otherwise. If voltages are not + * known, just let the EM registration fail with an error to alert the + * user about the inconsistent configuration. + */ + ret = of_property_read_u32(np, "dynamic-power-coefficient", &cap); + of_node_put(np); + if (ret || !cap) + return; + + em_register_perf_domain(cpus, nr_opp, &em_cb); +} +EXPORT_SYMBOL_GPL(dev_pm_opp_of_register_em); diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index 0a2a88e5a383..1470c57933cf 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -322,6 +322,7 @@ int dev_pm_opp_of_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpuma struct device_node *dev_pm_opp_of_get_opp_desc_node(struct device *dev); struct device_node *dev_pm_opp_get_of_node(struct dev_pm_opp *opp); int of_get_required_opp_performance_state(struct device_node *np, int index); +void dev_pm_opp_of_register_em(struct cpumask *cpus); #else static inline int dev_pm_opp_of_add_table(struct device *dev) { @@ -360,6 +361,11 @@ static inline struct device_node *dev_pm_opp_get_of_node(struct dev_pm_opp *opp) { return NULL; } + +static inline void dev_pm_opp_of_register_em(struct cpumask *cpus) +{ +} + static inline int of_get_required_opp_performance_state(struct device_node *np, int index) { return -ENOTSUPP; -- cgit v1.2.3 From 752b5da2359fee342d5264e2c10352daf5b9a199 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Mon, 21 Jan 2019 16:45:46 +0100 Subject: phy: dphy: Remove unused header The videomode.h header inclusion is an artifact from the patches development, remove it. Suggested-by: Sakari Ailus Acked-by: Sakari Ailus Signed-off-by: Maxime Ripard Signed-off-by: Kishon Vijay Abraham I --- include/linux/phy/phy-mipi-dphy.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/phy/phy-mipi-dphy.h b/include/linux/phy/phy-mipi-dphy.h index c08aacc0ac35..9cf97cd1d303 100644 --- a/include/linux/phy/phy-mipi-dphy.h +++ b/include/linux/phy/phy-mipi-dphy.h @@ -6,8 +6,6 @@ #ifndef __PHY_MIPI_DPHY_H_ #define __PHY_MIPI_DPHY_H_ -#include