From 0bf5f9492389aa8df5c8e38fcb4488802d24504d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 16 Jul 2019 16:26:27 -0700 Subject: mm: fix the MAP_UNINITIALIZED flag We can't expose UAPI symbols differently based on CONFIG_ symbols, as userspace won't have them available. Instead always define the flag, but only respect it based on the config option. Link: http://lkml.kernel.org/r/20190703122359.18200-2-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Vladimir Murzin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/uapi/asm-generic/mman-common.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/asm-generic/mman-common.h b/include/uapi/asm-generic/mman-common.h index abd238d0f7a4..cb556b430e71 100644 --- a/include/uapi/asm-generic/mman-common.h +++ b/include/uapi/asm-generic/mman-common.h @@ -19,15 +19,13 @@ #define MAP_TYPE 0x0f /* Mask for type of mapping */ #define MAP_FIXED 0x10 /* Interpret addr exactly */ #define MAP_ANONYMOUS 0x20 /* don't use a file */ -#ifdef CONFIG_MMAP_ALLOW_UNINITIALIZED -# define MAP_UNINITIALIZED 0x4000000 /* For anonymous mmap, memory could be uninitialized */ -#else -# define MAP_UNINITIALIZED 0x0 /* Don't support this flag */ -#endif /* 0x0100 - 0x80000 flags are defined in asm-generic/mman.h */ #define MAP_FIXED_NOREPLACE 0x100000 /* MAP_FIXED which doesn't unmap underlying mapping */ +#define MAP_UNINITIALIZED 0x4000000 /* For anonymous mmap, memory could be + * uninitialized */ + /* * Flags for mlock */ -- cgit v1.2.3 From 694a58e29ef27c4c26f103a9decfd053f94dd34c Mon Sep 17 00:00:00 2001 From: Mikko Rapeli Date: Tue, 16 Jul 2019 16:28:07 -0700 Subject: uapi linux/coda.h: use __kernel_pid_t for userspace Part of a patch by Mikko Rapeli, as Arnd Bergman commented on the original patch. pid_t might differ between libc and the kernel, so the kernel interface has to use types that the kernel defines. Link: http://lkml.kernel.org/r/f374a71f4d351bc8c8b3ac18ad7765c88d806d10.1558117389.git.jaharkes@cs.cmu.edu Signed-off-by: Mikko Rapeli Signed-off-by: Jan Harkes Cc: Arnd Bergmann Cc: Colin Ian King Cc: Dan Carpenter Cc: David Howells Cc: Fabian Frederick Cc: Sam Protsenko Cc: Yann Droneaud Cc: Zhouyang Jia Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/uapi/linux/coda.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/coda.h b/include/uapi/linux/coda.h index 695fade33c64..ed8cb263e482 100644 --- a/include/uapi/linux/coda.h +++ b/include/uapi/linux/coda.h @@ -295,8 +295,8 @@ struct coda_statfs { struct coda_in_hdr { u_int32_t opcode; u_int32_t unique; /* Keep multiple outstanding msgs distinct */ - pid_t pid; - pid_t pgid; + __kernel_pid_t pid; + __kernel_pid_t pgid; vuid_t uid; }; -- cgit v1.2.3 From f90fb3c7e2c13ae829db2274b88b845a75038b8a Mon Sep 17 00:00:00 2001 From: Mikko Rapeli Date: Tue, 16 Jul 2019 16:28:10 -0700 Subject: uapi linux/coda_psdev.h: move upc_req definition from uapi to kernel side headers Only users of upc_req in kernel side fs/coda/psdev.c and fs/coda/upcall.c already include linux/coda_psdev.h. Suggested by Jan Harkes in https://lore.kernel.org/lkml/20150531111913.GA23377@cs.cmu.edu/ Fixes these include/uapi/linux/coda_psdev.h compilation errors in userspace: linux/coda_psdev.h:12:19: error: field `uc_chain' has incomplete type struct list_head uc_chain; ^ linux/coda_psdev.h:13:2: error: unknown type name `caddr_t' caddr_t uc_data; ^ linux/coda_psdev.h:14:2: error: unknown type name `u_short' u_short uc_flags; ^ linux/coda_psdev.h:15:2: error: unknown type name `u_short' u_short uc_inSize; /* Size is at most 5000 bytes */ ^ linux/coda_psdev.h:16:2: error: unknown type name `u_short' u_short uc_outSize; ^ linux/coda_psdev.h:17:2: error: unknown type name `u_short' u_short uc_opcode; /* copied from data to save lookup */ ^ linux/coda_psdev.h:19:2: error: unknown type name `wait_queue_head_t' wait_queue_head_t uc_sleep; /* process' wait queue */ ^ Link: http://lkml.kernel.org/r/9f99f5ce6a0563d5266e6cf7aa9585aac2cae971.1558117389.git.jaharkes@cs.cmu.edu Signed-off-by: Mikko Rapeli Signed-off-by: Jan Harkes Cc: Arnd Bergmann Cc: Colin Ian King Cc: Dan Carpenter Cc: David Howells Cc: Fabian Frederick Cc: Sam Protsenko Cc: Yann Droneaud Cc: Zhouyang Jia Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/coda_psdev.h | 11 +++++++++++ include/uapi/linux/coda_psdev.h | 13 ------------- 2 files changed, 11 insertions(+), 13 deletions(-) (limited to 'include/uapi') diff --git a/include/linux/coda_psdev.h b/include/linux/coda_psdev.h index 15170954aa2b..57d2b2faf6a3 100644 --- a/include/linux/coda_psdev.h +++ b/include/linux/coda_psdev.h @@ -19,6 +19,17 @@ struct venus_comm { struct mutex vc_mutex; }; +/* messages between coda filesystem in kernel and Venus */ +struct upc_req { + struct list_head uc_chain; + caddr_t uc_data; + u_short uc_flags; + u_short uc_inSize; /* Size is at most 5000 bytes */ + u_short uc_outSize; + u_short uc_opcode; /* copied from data to save lookup */ + int uc_unique; + wait_queue_head_t uc_sleep; /* process' wait queue */ +}; static inline struct venus_comm *coda_vcp(struct super_block *sb) { diff --git a/include/uapi/linux/coda_psdev.h b/include/uapi/linux/coda_psdev.h index aa6623efd2dd..d50d51a57fe4 100644 --- a/include/uapi/linux/coda_psdev.h +++ b/include/uapi/linux/coda_psdev.h @@ -7,19 +7,6 @@ #define CODA_PSDEV_MAJOR 67 #define MAX_CODADEVS 5 /* how many do we allow */ - -/* messages between coda filesystem in kernel and Venus */ -struct upc_req { - struct list_head uc_chain; - caddr_t uc_data; - u_short uc_flags; - u_short uc_inSize; /* Size is at most 5000 bytes */ - u_short uc_outSize; - u_short uc_opcode; /* copied from data to save lookup */ - int uc_unique; - wait_queue_head_t uc_sleep; /* process' wait queue */ -}; - #define CODA_REQ_ASYNC 0x1 #define CODA_REQ_READ 0x2 #define CODA_REQ_WRITE 0x4 -- cgit v1.2.3 From 2fe7491d219428a32f09948e88bfaf8e71b9a66b Mon Sep 17 00:00:00 2001 From: Jan Harkes Date: Tue, 16 Jul 2019 16:28:26 -0700 Subject: uapi linux/coda_psdev.h: move CODA_REQ_ from uapi to kernel side headers These constants only used internally and not exposed to userspace. Link: http://lkml.kernel.org/r/baeafc30dad70d8b422ee679420099c2d8aa7da0.1558117389.git.jaharkes@cs.cmu.edu Signed-off-by: Jan Harkes Cc: Arnd Bergmann Cc: Colin Ian King Cc: Dan Carpenter Cc: David Howells Cc: Fabian Frederick Cc: Mikko Rapeli Cc: Sam Protsenko Cc: Yann Droneaud Cc: Zhouyang Jia Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/coda_psdev.h | 5 +++++ include/uapi/linux/coda_psdev.h | 5 ----- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/uapi') diff --git a/include/linux/coda_psdev.h b/include/linux/coda_psdev.h index d1672fd5e638..9487f792770c 100644 --- a/include/linux/coda_psdev.h +++ b/include/linux/coda_psdev.h @@ -31,6 +31,11 @@ struct upc_req { wait_queue_head_t uc_sleep; /* process' wait queue */ }; +#define CODA_REQ_ASYNC 0x1 +#define CODA_REQ_READ 0x2 +#define CODA_REQ_WRITE 0x4 +#define CODA_REQ_ABORT 0x8 + static inline struct venus_comm *coda_vcp(struct super_block *sb) { return (struct venus_comm *)((sb)->s_fs_info); diff --git a/include/uapi/linux/coda_psdev.h b/include/uapi/linux/coda_psdev.h index d50d51a57fe4..3dacb7fad66a 100644 --- a/include/uapi/linux/coda_psdev.h +++ b/include/uapi/linux/coda_psdev.h @@ -7,9 +7,4 @@ #define CODA_PSDEV_MAJOR 67 #define MAX_CODADEVS 5 /* how many do we allow */ -#define CODA_REQ_ASYNC 0x1 -#define CODA_REQ_READ 0x2 -#define CODA_REQ_WRITE 0x4 -#define CODA_REQ_ABORT 0x8 - #endif /* _UAPI__CODA_PSDEV_H */ -- cgit v1.2.3 From 6ced9aa7b56baeb241a715df4539e60d5e3118e2 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 16 Jul 2019 16:28:32 -0700 Subject: coda: stop using 'struct timespec' in user API We exchange file timestamps with user space using psdev device read/write operations with a fixed but architecture specific binary layout. On 32-bit systems, this uses a 'timespec' structure that is defined by the C library to contain two 32-bit values for seconds and nanoseconds. As we get ready for the year 2038 overflow of the 32-bit signed seconds, the kernel now uses 64-bit timestamps internally, and user space will do the same change by changing the 'timespec' definition in the future. Unfortunately, this breaks the layout of the coda_vattr structure, so we need to redefine that in terms of something that does not change. I'm introducing a new 'struct vtimespec' structure here that keeps the existing layout, and the same change has to be done in the coda user space copy of linux/coda.h before anyone can use that on a 32-bit architecture with 64-bit time_t. An open question is what should happen to actual times past y2038, as they are now truncated to the last valid date when sent to user space, and interpreted as pre-1970 times when a timestamp with the MSB set is read back into the kernel. Alternatively, we could change the new timespec64_to_coda()/coda_to_timespec64() functions to use a different interpretation and extend the available range further to the future by disallowing past timestamps. This would require more changes in the user space side though. Link: http://lkml.kernel.org/r/562b7324149461743e4fbe2fedbf7c242f7e274a.1558117389.git.jaharkes@cs.cmu.edu Link: https://patchwork.kernel.org/patch/10474735/ Signed-off-by: Arnd Bergmann Signed-off-by: Jan Harkes Acked-by: Jan Harkes Cc: Colin Ian King Cc: Dan Carpenter Cc: David Howells Cc: Fabian Frederick Cc: Mikko Rapeli Cc: Sam Protsenko Cc: Yann Droneaud Cc: Zhouyang Jia Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/filesystems/coda.txt | 11 ++++++--- fs/coda/coda_linux.c | 50 +++++++++++++++++++++++++++++--------- include/uapi/linux/coda.h | 20 ++++++++++++--- 3 files changed, 62 insertions(+), 19 deletions(-) (limited to 'include/uapi') diff --git a/Documentation/filesystems/coda.txt b/Documentation/filesystems/coda.txt index 61311356025d..ea5969068895 100644 --- a/Documentation/filesystems/coda.txt +++ b/Documentation/filesystems/coda.txt @@ -481,7 +481,10 @@ kernel support. - + struct vtimespec { + long tv_sec; /* seconds */ + long tv_nsec; /* nanoseconds */ + }; struct coda_vattr { enum coda_vtype va_type; /* vnode type (for create) */ @@ -493,9 +496,9 @@ kernel support. long va_fileid; /* file id */ u_quad_t va_size; /* file size in bytes */ long va_blocksize; /* blocksize preferred for i/o */ - struct timespec va_atime; /* time of last access */ - struct timespec va_mtime; /* time of last modification */ - struct timespec va_ctime; /* time file changed */ + struct vtimespec va_atime; /* time of last access */ + struct vtimespec va_mtime; /* time of last modification */ + struct vtimespec va_ctime; /* time file changed */ u_long va_gen; /* generation number of file */ u_long va_flags; /* flags defined for file */ dev_t va_rdev; /* device special file represents */ diff --git a/fs/coda/coda_linux.c b/fs/coda/coda_linux.c index f3d543dd9a98..8addcd166908 100644 --- a/fs/coda/coda_linux.c +++ b/fs/coda/coda_linux.c @@ -66,6 +66,32 @@ unsigned short coda_flags_to_cflags(unsigned short flags) return coda_flags; } +static struct timespec64 coda_to_timespec64(struct vtimespec ts) +{ + /* + * We interpret incoming timestamps as 'signed' to match traditional + * usage and support pre-1970 timestamps, but this breaks in y2038 + * on 32-bit machines. + */ + struct timespec64 ts64 = { + .tv_sec = ts.tv_sec, + .tv_nsec = ts.tv_nsec, + }; + + return ts64; +} + +static struct vtimespec timespec64_to_coda(struct timespec64 ts64) +{ + /* clamp the timestamps to the maximum range rather than wrapping */ + struct vtimespec ts = { + .tv_sec = lower_32_bits(clamp_t(time64_t, ts64.tv_sec, + LONG_MIN, LONG_MAX)), + .tv_nsec = ts64.tv_nsec, + }; + + return ts; +} /* utility functions below */ void coda_vattr_to_iattr(struct inode *inode, struct coda_vattr *attr) @@ -105,11 +131,11 @@ void coda_vattr_to_iattr(struct inode *inode, struct coda_vattr *attr) if (attr->va_size != -1) inode->i_blocks = (attr->va_size + 511) >> 9; if (attr->va_atime.tv_sec != -1) - inode->i_atime = timespec_to_timespec64(attr->va_atime); + inode->i_atime = coda_to_timespec64(attr->va_atime); if (attr->va_mtime.tv_sec != -1) - inode->i_mtime = timespec_to_timespec64(attr->va_mtime); + inode->i_mtime = coda_to_timespec64(attr->va_mtime); if (attr->va_ctime.tv_sec != -1) - inode->i_ctime = timespec_to_timespec64(attr->va_ctime); + inode->i_ctime = coda_to_timespec64(attr->va_ctime); } @@ -130,12 +156,12 @@ void coda_iattr_to_vattr(struct iattr *iattr, struct coda_vattr *vattr) vattr->va_uid = (vuid_t) -1; vattr->va_gid = (vgid_t) -1; vattr->va_size = (off_t) -1; - vattr->va_atime.tv_sec = (time_t) -1; - vattr->va_atime.tv_nsec = (time_t) -1; - vattr->va_mtime.tv_sec = (time_t) -1; - vattr->va_mtime.tv_nsec = (time_t) -1; - vattr->va_ctime.tv_sec = (time_t) -1; - vattr->va_ctime.tv_nsec = (time_t) -1; + vattr->va_atime.tv_sec = (long) -1; + vattr->va_atime.tv_nsec = (long) -1; + vattr->va_mtime.tv_sec = (long) -1; + vattr->va_mtime.tv_nsec = (long) -1; + vattr->va_ctime.tv_sec = (long) -1; + vattr->va_ctime.tv_nsec = (long) -1; vattr->va_type = C_VNON; vattr->va_fileid = -1; vattr->va_gen = -1; @@ -175,13 +201,13 @@ void coda_iattr_to_vattr(struct iattr *iattr, struct coda_vattr *vattr) vattr->va_size = iattr->ia_size; } if ( valid & ATTR_ATIME ) { - vattr->va_atime = timespec64_to_timespec(iattr->ia_atime); + vattr->va_atime = timespec64_to_coda(iattr->ia_atime); } if ( valid & ATTR_MTIME ) { - vattr->va_mtime = timespec64_to_timespec(iattr->ia_mtime); + vattr->va_mtime = timespec64_to_coda(iattr->ia_mtime); } if ( valid & ATTR_CTIME ) { - vattr->va_ctime = timespec64_to_timespec(iattr->ia_ctime); + vattr->va_ctime = timespec64_to_coda(iattr->ia_ctime); } } diff --git a/include/uapi/linux/coda.h b/include/uapi/linux/coda.h index ed8cb263e482..fc5f7874208a 100644 --- a/include/uapi/linux/coda.h +++ b/include/uapi/linux/coda.h @@ -211,6 +211,20 @@ struct CodaFid { */ enum coda_vtype { C_VNON, C_VREG, C_VDIR, C_VBLK, C_VCHR, C_VLNK, C_VSOCK, C_VFIFO, C_VBAD }; +#ifdef __linux__ +/* + * This matches the traditional Linux 'timespec' structure binary layout, + * before using 64-bit time_t everywhere. Overflows in y2038 on 32-bit + * architectures. + */ +struct vtimespec { + long tv_sec; /* seconds */ + long tv_nsec; /* nanoseconds */ +}; +#else +#define vtimespec timespec +#endif + struct coda_vattr { long va_type; /* vnode type (for create) */ u_short va_mode; /* files access mode and type */ @@ -220,9 +234,9 @@ struct coda_vattr { long va_fileid; /* file id */ u_quad_t va_size; /* file size in bytes */ long va_blocksize; /* blocksize preferred for i/o */ - struct timespec va_atime; /* time of last access */ - struct timespec va_mtime; /* time of last modification */ - struct timespec va_ctime; /* time file changed */ + struct vtimespec va_atime; /* time of last access */ + struct vtimespec va_mtime; /* time of last modification */ + struct vtimespec va_ctime; /* time file changed */ u_long va_gen; /* generation number of file */ u_long va_flags; /* flags defined for file */ cdev_t va_rdev; /* device special file represents */ -- cgit v1.2.3 From 5e7c31dfe74703f428220384b2863525957cc160 Mon Sep 17 00:00:00 2001 From: Jan Harkes Date: Tue, 16 Jul 2019 16:28:35 -0700 Subject: coda: change Coda's user api to use 64-bit time_t in timespec Move the 32-bit time_t problems to userspace. Link: http://lkml.kernel.org/r/8d089068823bfb292a4020f773922fbd82ffad39.1558117389.git.jaharkes@cs.cmu.edu Signed-off-by: Jan Harkes Cc: Arnd Bergmann Cc: Colin Ian King Cc: Dan Carpenter Cc: David Howells Cc: Fabian Frederick Cc: Mikko Rapeli Cc: Sam Protsenko Cc: Yann Droneaud Cc: Zhouyang Jia Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/filesystems/coda.txt | 10 +++++----- fs/coda/coda_linux.c | 21 +++++++-------------- include/uapi/linux/coda.h | 33 +++++++-------------------------- 3 files changed, 19 insertions(+), 45 deletions(-) (limited to 'include/uapi') diff --git a/Documentation/filesystems/coda.txt b/Documentation/filesystems/coda.txt index ea5969068895..545262c167c3 100644 --- a/Documentation/filesystems/coda.txt +++ b/Documentation/filesystems/coda.txt @@ -481,8 +481,8 @@ kernel support. - struct vtimespec { - long tv_sec; /* seconds */ + struct coda_timespec { + int64_t tv_sec; /* seconds */ long tv_nsec; /* nanoseconds */ }; @@ -496,9 +496,9 @@ kernel support. long va_fileid; /* file id */ u_quad_t va_size; /* file size in bytes */ long va_blocksize; /* blocksize preferred for i/o */ - struct vtimespec va_atime; /* time of last access */ - struct vtimespec va_mtime; /* time of last modification */ - struct vtimespec va_ctime; /* time file changed */ + struct coda_timespec va_atime; /* time of last access */ + struct coda_timespec va_mtime; /* time of last modification */ + struct coda_timespec va_ctime; /* time file changed */ u_long va_gen; /* generation number of file */ u_long va_flags; /* flags defined for file */ dev_t va_rdev; /* device special file represents */ diff --git a/fs/coda/coda_linux.c b/fs/coda/coda_linux.c index 8addcd166908..e4b5f02f0dd4 100644 --- a/fs/coda/coda_linux.c +++ b/fs/coda/coda_linux.c @@ -66,13 +66,8 @@ unsigned short coda_flags_to_cflags(unsigned short flags) return coda_flags; } -static struct timespec64 coda_to_timespec64(struct vtimespec ts) +static struct timespec64 coda_to_timespec64(struct coda_timespec ts) { - /* - * We interpret incoming timestamps as 'signed' to match traditional - * usage and support pre-1970 timestamps, but this breaks in y2038 - * on 32-bit machines. - */ struct timespec64 ts64 = { .tv_sec = ts.tv_sec, .tv_nsec = ts.tv_nsec, @@ -81,12 +76,10 @@ static struct timespec64 coda_to_timespec64(struct vtimespec ts) return ts64; } -static struct vtimespec timespec64_to_coda(struct timespec64 ts64) +static struct coda_timespec timespec64_to_coda(struct timespec64 ts64) { - /* clamp the timestamps to the maximum range rather than wrapping */ - struct vtimespec ts = { - .tv_sec = lower_32_bits(clamp_t(time64_t, ts64.tv_sec, - LONG_MIN, LONG_MAX)), + struct coda_timespec ts = { + .tv_sec = ts64.tv_sec, .tv_nsec = ts64.tv_nsec, }; @@ -156,11 +149,11 @@ void coda_iattr_to_vattr(struct iattr *iattr, struct coda_vattr *vattr) vattr->va_uid = (vuid_t) -1; vattr->va_gid = (vgid_t) -1; vattr->va_size = (off_t) -1; - vattr->va_atime.tv_sec = (long) -1; + vattr->va_atime.tv_sec = (int64_t) -1; vattr->va_atime.tv_nsec = (long) -1; - vattr->va_mtime.tv_sec = (long) -1; + vattr->va_mtime.tv_sec = (int64_t) -1; vattr->va_mtime.tv_nsec = (long) -1; - vattr->va_ctime.tv_sec = (long) -1; + vattr->va_ctime.tv_sec = (int64_t) -1; vattr->va_ctime.tv_nsec = (long) -1; vattr->va_type = C_VNON; vattr->va_fileid = -1; diff --git a/include/uapi/linux/coda.h b/include/uapi/linux/coda.h index fc5f7874208a..5dba636b6e11 100644 --- a/include/uapi/linux/coda.h +++ b/include/uapi/linux/coda.h @@ -86,10 +86,6 @@ typedef unsigned long long u_quad_t; #define inline -struct timespec { - long ts_sec; - long ts_nsec; -}; #else /* DJGPP but not KERNEL */ #include typedef unsigned long long u_quad_t; @@ -110,13 +106,6 @@ typedef unsigned long long u_quad_t; #define cdev_t dev_t #endif -#ifdef __CYGWIN32__ -struct timespec { - time_t tv_sec; /* seconds */ - long tv_nsec; /* nanoseconds */ -}; -#endif - #ifndef __BIT_TYPES_DEFINED__ #define __BIT_TYPES_DEFINED__ typedef signed char int8_t; @@ -211,19 +200,10 @@ struct CodaFid { */ enum coda_vtype { C_VNON, C_VREG, C_VDIR, C_VBLK, C_VCHR, C_VLNK, C_VSOCK, C_VFIFO, C_VBAD }; -#ifdef __linux__ -/* - * This matches the traditional Linux 'timespec' structure binary layout, - * before using 64-bit time_t everywhere. Overflows in y2038 on 32-bit - * architectures. - */ -struct vtimespec { - long tv_sec; /* seconds */ +struct coda_timespec { + int64_t tv_sec; /* seconds */ long tv_nsec; /* nanoseconds */ }; -#else -#define vtimespec timespec -#endif struct coda_vattr { long va_type; /* vnode type (for create) */ @@ -234,9 +214,9 @@ struct coda_vattr { long va_fileid; /* file id */ u_quad_t va_size; /* file size in bytes */ long va_blocksize; /* blocksize preferred for i/o */ - struct vtimespec va_atime; /* time of last access */ - struct vtimespec va_mtime; /* time of last modification */ - struct vtimespec va_ctime; /* time file changed */ + struct coda_timespec va_atime; /* time of last access */ + struct coda_timespec va_mtime; /* time of last modification */ + struct coda_timespec va_ctime; /* time file changed */ u_long va_gen; /* generation number of file */ u_long va_flags; /* flags defined for file */ cdev_t va_rdev; /* device special file represents */ @@ -301,7 +281,8 @@ struct coda_statfs { #define CIOC_KERNEL_VERSION _IOWR('c', 10, size_t) -#define CODA_KERNEL_VERSION 3 /* 128-bit file identifiers */ +// CODA_KERNEL_VERSION 3 /* 128-bit file identifiers */ +#define CODA_KERNEL_VERSION 4 /* 64-bit timespec */ /* * Venus <-> Coda RPC arguments -- cgit v1.2.3 From 6dc280ebeed2c96a2fb933103dafe655a922b9c1 Mon Sep 17 00:00:00 2001 From: Jan Harkes Date: Tue, 16 Jul 2019 16:28:51 -0700 Subject: coda: remove uapi/linux/coda_psdev.h Nothing is left in this header that is used by userspace. Link: http://lkml.kernel.org/r/bb11378cef94739f2cf89425dd6d302a52c64480.1558117389.git.jaharkes@cs.cmu.edu Signed-off-by: Jan Harkes Cc: Arnd Bergmann Cc: Colin Ian King Cc: Dan Carpenter Cc: David Howells Cc: Fabian Frederick Cc: Mikko Rapeli Cc: Sam Protsenko Cc: Yann Droneaud Cc: Zhouyang Jia Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/coda/coda_psdev.h | 5 ++++- include/uapi/linux/coda_psdev.h | 10 ---------- 2 files changed, 4 insertions(+), 11 deletions(-) delete mode 100644 include/uapi/linux/coda_psdev.h (limited to 'include/uapi') diff --git a/fs/coda/coda_psdev.h b/fs/coda/coda_psdev.h index 012e16f741a6..801423cbbdfc 100644 --- a/fs/coda/coda_psdev.h +++ b/fs/coda/coda_psdev.h @@ -3,8 +3,11 @@ #define __CODA_PSDEV_H #include +#include #include -#include + +#define CODA_PSDEV_MAJOR 67 +#define MAX_CODADEVS 5 /* how many do we allow */ struct kstatfs; diff --git a/include/uapi/linux/coda_psdev.h b/include/uapi/linux/coda_psdev.h deleted file mode 100644 index 3dacb7fad66a..000000000000 --- a/include/uapi/linux/coda_psdev.h +++ /dev/null @@ -1,10 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _UAPI__CODA_PSDEV_H -#define _UAPI__CODA_PSDEV_H - -#include - -#define CODA_PSDEV_MAJOR 67 -#define MAX_CODADEVS 5 /* how many do we allow */ - -#endif /* _UAPI__CODA_PSDEV_H */ -- cgit v1.2.3 From a9fba24c6ac9b66c09dfc2a0e845ecace187e89c Mon Sep 17 00:00:00 2001 From: Pedro Cuadra Date: Tue, 16 Jul 2019 16:29:13 -0700 Subject: coda: add hinting support for partial file caching This adds support for partial file caching in Coda. Every read, write and mmap informs the userspace cache manager about what part of a file is about to be accessed so that the cache manager can ensure the relevant parts are available before the operation is allowed to proceed. When a read or write operation completes, this is also reported to allow the cache manager to track when partially cached content can be released. If the cache manager does not support partial file caching, or when the entire file has been fetched into the local cache, the cache manager may return an EOPNOTSUPP error to indicate that intent upcalls are no longer necessary until the file is closed. [akpm@linux-foundation.org: little whitespace fixup] Link: http://lkml.kernel.org/r/20190618181301.6960-1-jaharkes@cs.cmu.edu Signed-off-by: Pedro Cuadra Signed-off-by: Jan Harkes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/coda/coda_fs_i.h | 1 + fs/coda/coda_psdev.h | 3 ++ fs/coda/file.c | 61 +++++++++++++++++++++++++++++++++-------- fs/coda/psdev.c | 2 +- fs/coda/upcall.c | 70 +++++++++++++++++++++++++++++++++++++++-------- include/uapi/linux/coda.h | 29 ++++++++++++++++++-- 6 files changed, 139 insertions(+), 27 deletions(-) (limited to 'include/uapi') diff --git a/fs/coda/coda_fs_i.h b/fs/coda/coda_fs_i.h index c99d574d1c43..1763ff95d865 100644 --- a/fs/coda/coda_fs_i.h +++ b/fs/coda/coda_fs_i.h @@ -40,6 +40,7 @@ struct coda_file_info { int cfi_magic; /* magic number */ struct file *cfi_container; /* container file for this cnode */ unsigned int cfi_mapcount; /* nr of times this file is mapped */ + bool cfi_access_intent; /* is access intent supported */ }; /* flags */ diff --git a/fs/coda/coda_psdev.h b/fs/coda/coda_psdev.h index 801423cbbdfc..52da08c770b0 100644 --- a/fs/coda/coda_psdev.h +++ b/fs/coda/coda_psdev.h @@ -83,6 +83,9 @@ int coda_downcall(struct venus_comm *vcp, int opcode, union outputArgs *out, size_t nbytes); int venus_fsync(struct super_block *sb, struct CodaFid *fid); int venus_statfs(struct dentry *dentry, struct kstatfs *sfs); +int venus_access_intent(struct super_block *sb, struct CodaFid *fid, + bool *access_intent_supported, + size_t count, loff_t ppos, int type); /* * Statistics diff --git a/fs/coda/file.c b/fs/coda/file.c index 0dbd13ab72e3..128d63df5bfb 100644 --- a/fs/coda/file.c +++ b/fs/coda/file.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include "coda_psdev.h" @@ -37,9 +38,25 @@ static ssize_t coda_file_read_iter(struct kiocb *iocb, struct iov_iter *to) { struct file *coda_file = iocb->ki_filp; + struct inode *coda_inode = file_inode(coda_file); struct coda_file_info *cfi = coda_ftoc(coda_file); + loff_t ki_pos = iocb->ki_pos; + size_t count = iov_iter_count(to); + ssize_t ret; + + ret = venus_access_intent(coda_inode->i_sb, coda_i2f(coda_inode), + &cfi->cfi_access_intent, + count, ki_pos, CODA_ACCESS_TYPE_READ); + if (ret) + goto finish_read; - return vfs_iter_read(cfi->cfi_container, to, &iocb->ki_pos, 0); + ret = vfs_iter_read(cfi->cfi_container, to, &iocb->ki_pos, 0); + +finish_read: + venus_access_intent(coda_inode->i_sb, coda_i2f(coda_inode), + &cfi->cfi_access_intent, + count, ki_pos, CODA_ACCESS_TYPE_READ_FINISH); + return ret; } static ssize_t @@ -48,10 +65,17 @@ coda_file_write_iter(struct kiocb *iocb, struct iov_iter *to) struct file *coda_file = iocb->ki_filp; struct inode *coda_inode = file_inode(coda_file); struct coda_file_info *cfi = coda_ftoc(coda_file); - struct file *host_file; + struct file *host_file = cfi->cfi_container; + loff_t ki_pos = iocb->ki_pos; + size_t count = iov_iter_count(to); ssize_t ret; - host_file = cfi->cfi_container; + ret = venus_access_intent(coda_inode->i_sb, coda_i2f(coda_inode), + &cfi->cfi_access_intent, + count, ki_pos, CODA_ACCESS_TYPE_WRITE); + if (ret) + goto finish_write; + file_start_write(host_file); inode_lock(coda_inode); ret = vfs_iter_write(cfi->cfi_container, to, &iocb->ki_pos, 0); @@ -60,6 +84,11 @@ coda_file_write_iter(struct kiocb *iocb, struct iov_iter *to) coda_inode->i_mtime = coda_inode->i_ctime = current_time(coda_inode); inode_unlock(coda_inode); file_end_write(host_file); + +finish_write: + venus_access_intent(coda_inode->i_sb, coda_i2f(coda_inode), + &cfi->cfi_access_intent, + count, ki_pos, CODA_ACCESS_TYPE_WRITE_FINISH); return ret; } @@ -94,29 +123,35 @@ coda_vm_close(struct vm_area_struct *vma) static int coda_file_mmap(struct file *coda_file, struct vm_area_struct *vma) { - struct coda_file_info *cfi; + struct inode *coda_inode = file_inode(coda_file); + struct coda_file_info *cfi = coda_ftoc(coda_file); + struct file *host_file = cfi->cfi_container; + struct inode *host_inode = file_inode(host_file); struct coda_inode_info *cii; - struct file *host_file; - struct inode *coda_inode, *host_inode; struct coda_vm_ops *cvm_ops; + loff_t ppos; + size_t count; int ret; - cfi = coda_ftoc(coda_file); - host_file = cfi->cfi_container; - if (!host_file->f_op->mmap) return -ENODEV; if (WARN_ON(coda_file != vma->vm_file)) return -EIO; + count = vma->vm_end - vma->vm_start; + ppos = vma->vm_pgoff * PAGE_SIZE; + + ret = venus_access_intent(coda_inode->i_sb, coda_i2f(coda_inode), + &cfi->cfi_access_intent, + count, ppos, CODA_ACCESS_TYPE_MMAP); + if (ret) + return ret; + cvm_ops = kmalloc(sizeof(struct coda_vm_ops), GFP_KERNEL); if (!cvm_ops) return -ENOMEM; - coda_inode = file_inode(coda_file); - host_inode = file_inode(host_file); - cii = ITOC(coda_inode); spin_lock(&cii->c_lock); coda_file->f_mapping = host_file->f_mapping; @@ -188,6 +223,8 @@ int coda_open(struct inode *coda_inode, struct file *coda_file) cfi->cfi_magic = CODA_MAGIC; cfi->cfi_mapcount = 0; cfi->cfi_container = host_file; + /* assume access intents are supported unless we hear otherwise */ + cfi->cfi_access_intent = true; BUG_ON(coda_file->private_data != NULL); coda_file->private_data = cfi; diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c index ebfbbea9fa48..240669f51eac 100644 --- a/fs/coda/psdev.c +++ b/fs/coda/psdev.c @@ -388,7 +388,7 @@ MODULE_AUTHOR("Jan Harkes, Peter J. Braam"); MODULE_DESCRIPTION("Coda Distributed File System VFS interface"); MODULE_ALIAS_CHARDEV_MAJOR(CODA_PSDEV_MAJOR); MODULE_LICENSE("GPL"); -MODULE_VERSION("6.11"); +MODULE_VERSION("7.0"); static int __init init_coda(void) { diff --git a/fs/coda/upcall.c b/fs/coda/upcall.c index 15c0e4fdb0e3..eb3b1898da46 100644 --- a/fs/coda/upcall.c +++ b/fs/coda/upcall.c @@ -569,6 +569,47 @@ int venus_statfs(struct dentry *dentry, struct kstatfs *sfs) return error; } +int venus_access_intent(struct super_block *sb, struct CodaFid *fid, + bool *access_intent_supported, + size_t count, loff_t ppos, int type) +{ + union inputArgs *inp; + union outputArgs *outp; + int insize, outsize, error; + bool finalizer = + type == CODA_ACCESS_TYPE_READ_FINISH || + type == CODA_ACCESS_TYPE_WRITE_FINISH; + + if (!*access_intent_supported && !finalizer) + return 0; + + insize = SIZE(access_intent); + UPARG(CODA_ACCESS_INTENT); + + inp->coda_access_intent.VFid = *fid; + inp->coda_access_intent.count = count; + inp->coda_access_intent.pos = ppos; + inp->coda_access_intent.type = type; + + error = coda_upcall(coda_vcp(sb), insize, + finalizer ? NULL : &outsize, inp); + + /* + * we have to free the request buffer for synchronous upcalls + * or when asynchronous upcalls fail, but not when asynchronous + * upcalls succeed + */ + if (!finalizer || error) + kvfree(inp); + + /* Chunked access is not supported or an old Coda client */ + if (error == -EOPNOTSUPP) { + *access_intent_supported = false; + error = 0; + } + return error; +} + /* * coda_upcall and coda_downcall routines. */ @@ -598,10 +639,12 @@ static void coda_unblock_signals(sigset_t *old) * has seen them, * - CODA_CLOSE or CODA_RELEASE upcall (to avoid reference count problems) * - CODA_STORE (to avoid data loss) + * - CODA_ACCESS_INTENT (to avoid reference count problems) */ #define CODA_INTERRUPTIBLE(r) (!coda_hard && \ (((r)->uc_opcode != CODA_CLOSE && \ (r)->uc_opcode != CODA_STORE && \ + (r)->uc_opcode != CODA_ACCESS_INTENT && \ (r)->uc_opcode != CODA_RELEASE) || \ (r)->uc_flags & CODA_REQ_READ)) @@ -687,21 +730,25 @@ static int coda_upcall(struct venus_comm *vcp, goto exit; } + buffer->ih.unique = ++vcp->vc_seq; + req->uc_data = (void *)buffer; - req->uc_flags = 0; + req->uc_flags = outSize ? 0 : CODA_REQ_ASYNC; req->uc_inSize = inSize; - req->uc_outSize = *outSize ? *outSize : inSize; - req->uc_opcode = ((union inputArgs *)buffer)->ih.opcode; - req->uc_unique = ++vcp->vc_seq; + req->uc_outSize = (outSize && *outSize) ? *outSize : inSize; + req->uc_opcode = buffer->ih.opcode; + req->uc_unique = buffer->ih.unique; init_waitqueue_head(&req->uc_sleep); - /* Fill in the common input args. */ - ((union inputArgs *)buffer)->ih.unique = req->uc_unique; - /* Append msg to pending queue and poke Venus. */ list_add_tail(&req->uc_chain, &vcp->vc_pending); - wake_up_interruptible(&vcp->vc_waitq); + + if (req->uc_flags & CODA_REQ_ASYNC) { + mutex_unlock(&vcp->vc_mutex); + return 0; + } + /* We can be interrupted while we wait for Venus to process * our request. If the interrupt occurs before Venus has read * the request, we dequeue and return. If it occurs after the @@ -743,20 +790,20 @@ static int coda_upcall(struct venus_comm *vcp, sig_req = kmalloc(sizeof(struct upc_req), GFP_KERNEL); if (!sig_req) goto exit; - sig_req->uc_data = kvzalloc(sizeof(struct coda_in_hdr), GFP_KERNEL); - if (!sig_req->uc_data) { + sig_inputArgs = kvzalloc(sizeof(struct coda_in_hdr), GFP_KERNEL); + if (!sig_inputArgs) { kfree(sig_req); goto exit; } error = -EINTR; - sig_inputArgs = (union inputArgs *)sig_req->uc_data; sig_inputArgs->ih.opcode = CODA_SIGNAL; sig_inputArgs->ih.unique = req->uc_unique; sig_req->uc_flags = CODA_REQ_ASYNC; sig_req->uc_opcode = sig_inputArgs->ih.opcode; sig_req->uc_unique = sig_inputArgs->ih.unique; + sig_req->uc_data = (void *)sig_inputArgs; sig_req->uc_inSize = sizeof(struct coda_in_hdr); sig_req->uc_outSize = sizeof(struct coda_in_hdr); @@ -911,4 +958,3 @@ unlock_out: iput(inode); return 0; } - diff --git a/include/uapi/linux/coda.h b/include/uapi/linux/coda.h index 5dba636b6e11..aa34c2dcae8d 100644 --- a/include/uapi/linux/coda.h +++ b/include/uapi/linux/coda.h @@ -271,7 +271,8 @@ struct coda_statfs { #define CODA_STATFS 34 #define CODA_STORE 35 #define CODA_RELEASE 36 -#define CODA_NCALLS 37 +#define CODA_ACCESS_INTENT 37 +#define CODA_NCALLS 38 #define DOWNCALL(opcode) (opcode >= CODA_REPLACE && opcode <= CODA_PURGEFID) @@ -281,8 +282,12 @@ struct coda_statfs { #define CIOC_KERNEL_VERSION _IOWR('c', 10, size_t) +// CODA_KERNEL_VERSION 0 /* don't care about kernel version number */ +// CODA_KERNEL_VERSION 1 /* The old venus 4.6 compatible interface */ +// CODA_KERNEL_VERSION 2 /* venus_lookup gets an extra parameter */ // CODA_KERNEL_VERSION 3 /* 128-bit file identifiers */ -#define CODA_KERNEL_VERSION 4 /* 64-bit timespec */ +// CODA_KERNEL_VERSION 4 /* 64-bit timespec */ +#define CODA_KERNEL_VERSION 5 /* access intent support */ /* * Venus <-> Coda RPC arguments @@ -637,6 +642,25 @@ struct coda_statfs_out { struct coda_statfs stat; }; +#define CODA_ACCESS_TYPE_READ 1 +#define CODA_ACCESS_TYPE_WRITE 2 +#define CODA_ACCESS_TYPE_MMAP 3 +#define CODA_ACCESS_TYPE_READ_FINISH 4 +#define CODA_ACCESS_TYPE_WRITE_FINISH 5 + +/* coda_access_intent: NO_OUT */ +struct coda_access_intent_in { + struct coda_in_hdr ih; + struct CodaFid VFid; + int count; + int pos; + int type; +}; + +struct coda_access_intent_out { + struct coda_out_hdr out; +}; + /* * Occasionally, we don't cache the fid returned by CODA_LOOKUP. * For instance, if the fid is inconsistent. @@ -668,6 +692,7 @@ union inputArgs { struct coda_open_by_fd_in coda_open_by_fd; struct coda_open_by_path_in coda_open_by_path; struct coda_statfs_in coda_statfs; + struct coda_access_intent_in coda_access_intent; }; union outputArgs { -- cgit v1.2.3 From 201766a20e30f982ccfe36bebfad9602c3ff574a Mon Sep 17 00:00:00 2001 From: Elvira Khabirova Date: Tue, 16 Jul 2019 16:29:42 -0700 Subject: ptrace: add PTRACE_GET_SYSCALL_INFO request PTRACE_GET_SYSCALL_INFO is a generic ptrace API that lets ptracer obtain details of the syscall the tracee is blocked in. There are two reasons for a special syscall-related ptrace request. Firstly, with the current ptrace API there are cases when ptracer cannot retrieve necessary information about syscalls. Some examples include: * The notorious int-0x80-from-64-bit-task issue. See [1] for details. In short, if a 64-bit task performs a syscall through int 0x80, its tracer has no reliable means to find out that the syscall was, in fact, a compat syscall, and misidentifies it. * Syscall-enter-stop and syscall-exit-stop look the same for the tracer. Common practice is to keep track of the sequence of ptrace-stops in order not to mix the two syscall-stops up. But it is not as simple as it looks; for example, strace had a (just recently fixed) long-standing bug where attaching strace to a tracee that is performing the execve system call led to the tracer identifying the following syscall-exit-stop as syscall-enter-stop, which messed up all the state tracking. * Since the introduction of commit 84d77d3f06e7 ("ptrace: Don't allow accessing an undumpable mm"), both PTRACE_PEEKDATA and process_vm_readv become unavailable when the process dumpable flag is cleared. On such architectures as ia64 this results in all syscall arguments being unavailable for the tracer. Secondly, ptracers also have to support a lot of arch-specific code for obtaining information about the tracee. For some architectures, this requires a ptrace(PTRACE_PEEKUSER, ...) invocation for every syscall argument and return value. ptrace(2) man page: long ptrace(enum __ptrace_request request, pid_t pid, void *addr, void *data); ... PTRACE_GET_SYSCALL_INFO Retrieve information about the syscall that caused the stop. The information is placed into the buffer pointed by "data" argument, which should be a pointer to a buffer of type "struct ptrace_syscall_info". The "addr" argument contains the size of the buffer pointed to by "data" argument (i.e., sizeof(struct ptrace_syscall_info)). The return value contains the number of bytes available to be written by the kernel. If the size of data to be written by the kernel exceeds the size specified by "addr" argument, the output is truncated. [ldv@altlinux.org: selftests/seccomp/seccomp_bpf: update for PTRACE_GET_SYSCALL_INFO] Link: http://lkml.kernel.org/r/20190708182904.GA12332@altlinux.org Link: http://lkml.kernel.org/r/20190510152842.GF28558@altlinux.org Signed-off-by: Elvira Khabirova Co-developed-by: Dmitry V. Levin Signed-off-by: Dmitry V. Levin Reviewed-by: Oleg Nesterov Reviewed-by: Kees Cook Reviewed-by: Andy Lutomirski Cc: Eugene Syromyatnikov Cc: Benjamin Herrenschmidt Cc: Greentime Hu Cc: Helge Deller [parisc] Cc: James E.J. Bottomley Cc: James Hogan Cc: kbuild test robot Cc: Michael Ellerman Cc: Paul Burton Cc: Paul Mackerras Cc: Ralf Baechle Cc: Richard Kuo Cc: Shuah Khan Cc: Vincent Chen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/tracehook.h | 9 ++- include/uapi/linux/ptrace.h | 35 +++++++++ kernel/ptrace.c | 101 +++++++++++++++++++++++++- tools/testing/selftests/seccomp/seccomp_bpf.c | 13 +++- 4 files changed, 150 insertions(+), 8 deletions(-) (limited to 'include/uapi') diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h index 8446573cc682..36fb3bbed6b2 100644 --- a/include/linux/tracehook.h +++ b/include/linux/tracehook.h @@ -54,13 +54,15 @@ struct linux_binprm; /* * ptrace report for syscall entry and exit looks identical. */ -static inline int ptrace_report_syscall(struct pt_regs *regs) +static inline int ptrace_report_syscall(struct pt_regs *regs, + unsigned long message) { int ptrace = current->ptrace; if (!(ptrace & PT_PTRACED)) return 0; + current->ptrace_message = message; ptrace_notify(SIGTRAP | ((ptrace & PT_TRACESYSGOOD) ? 0x80 : 0)); /* @@ -73,6 +75,7 @@ static inline int ptrace_report_syscall(struct pt_regs *regs) current->exit_code = 0; } + current->ptrace_message = 0; return fatal_signal_pending(current); } @@ -98,7 +101,7 @@ static inline int ptrace_report_syscall(struct pt_regs *regs) static inline __must_check int tracehook_report_syscall_entry( struct pt_regs *regs) { - return ptrace_report_syscall(regs); + return ptrace_report_syscall(regs, PTRACE_EVENTMSG_SYSCALL_ENTRY); } /** @@ -123,7 +126,7 @@ static inline void tracehook_report_syscall_exit(struct pt_regs *regs, int step) if (step) user_single_step_report(regs); else - ptrace_report_syscall(regs); + ptrace_report_syscall(regs, PTRACE_EVENTMSG_SYSCALL_EXIT); } /** diff --git a/include/uapi/linux/ptrace.h b/include/uapi/linux/ptrace.h index d5a1b8a492b9..a71b6e3b03eb 100644 --- a/include/uapi/linux/ptrace.h +++ b/include/uapi/linux/ptrace.h @@ -73,6 +73,41 @@ struct seccomp_metadata { __u64 flags; /* Output: filter's flags */ }; +#define PTRACE_GET_SYSCALL_INFO 0x420e +#define PTRACE_SYSCALL_INFO_NONE 0 +#define PTRACE_SYSCALL_INFO_ENTRY 1 +#define PTRACE_SYSCALL_INFO_EXIT 2 +#define PTRACE_SYSCALL_INFO_SECCOMP 3 + +struct ptrace_syscall_info { + __u8 op; /* PTRACE_SYSCALL_INFO_* */ + __u32 arch __attribute__((__aligned__(sizeof(__u32)))); + __u64 instruction_pointer; + __u64 stack_pointer; + union { + struct { + __u64 nr; + __u64 args[6]; + } entry; + struct { + __s64 rval; + __u8 is_error; + } exit; + struct { + __u64 nr; + __u64 args[6]; + __u32 ret_data; + } seccomp; + }; +}; + +/* + * These values are stored in task->ptrace_message + * by tracehook_report_syscall_* to describe the current syscall-stop. + */ +#define PTRACE_EVENTMSG_SYSCALL_ENTRY 1 +#define PTRACE_EVENTMSG_SYSCALL_EXIT 2 + /* Read signals from a shared (process wide) queue */ #define PTRACE_PEEKSIGINFO_SHARED (1 << 0) diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 83a531cea2f3..cb9ddcc08119 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -32,6 +32,8 @@ #include #include +#include /* for syscall_get_* */ + /* * Access another process' address space via ptrace. * Source/target buffer must be kernel space, @@ -897,7 +899,100 @@ static int ptrace_regset(struct task_struct *task, int req, unsigned int type, * to ensure no machine forgets it. */ EXPORT_SYMBOL_GPL(task_user_regset_view); -#endif + +static unsigned long +ptrace_get_syscall_info_entry(struct task_struct *child, struct pt_regs *regs, + struct ptrace_syscall_info *info) +{ + unsigned long args[ARRAY_SIZE(info->entry.args)]; + int i; + + info->op = PTRACE_SYSCALL_INFO_ENTRY; + info->entry.nr = syscall_get_nr(child, regs); + syscall_get_arguments(child, regs, args); + for (i = 0; i < ARRAY_SIZE(args); i++) + info->entry.args[i] = args[i]; + + /* args is the last field in struct ptrace_syscall_info.entry */ + return offsetofend(struct ptrace_syscall_info, entry.args); +} + +static unsigned long +ptrace_get_syscall_info_seccomp(struct task_struct *child, struct pt_regs *regs, + struct ptrace_syscall_info *info) +{ + /* + * As struct ptrace_syscall_info.entry is currently a subset + * of struct ptrace_syscall_info.seccomp, it makes sense to + * initialize that subset using ptrace_get_syscall_info_entry(). + * This can be reconsidered in the future if these structures + * diverge significantly enough. + */ + ptrace_get_syscall_info_entry(child, regs, info); + info->op = PTRACE_SYSCALL_INFO_SECCOMP; + info->seccomp.ret_data = child->ptrace_message; + + /* ret_data is the last field in struct ptrace_syscall_info.seccomp */ + return offsetofend(struct ptrace_syscall_info, seccomp.ret_data); +} + +static unsigned long +ptrace_get_syscall_info_exit(struct task_struct *child, struct pt_regs *regs, + struct ptrace_syscall_info *info) +{ + info->op = PTRACE_SYSCALL_INFO_EXIT; + info->exit.rval = syscall_get_error(child, regs); + info->exit.is_error = !!info->exit.rval; + if (!info->exit.is_error) + info->exit.rval = syscall_get_return_value(child, regs); + + /* is_error is the last field in struct ptrace_syscall_info.exit */ + return offsetofend(struct ptrace_syscall_info, exit.is_error); +} + +static int +ptrace_get_syscall_info(struct task_struct *child, unsigned long user_size, + void __user *datavp) +{ + struct pt_regs *regs = task_pt_regs(child); + struct ptrace_syscall_info info = { + .op = PTRACE_SYSCALL_INFO_NONE, + .arch = syscall_get_arch(child), + .instruction_pointer = instruction_pointer(regs), + .stack_pointer = user_stack_pointer(regs), + }; + unsigned long actual_size = offsetof(struct ptrace_syscall_info, entry); + unsigned long write_size; + + /* + * This does not need lock_task_sighand() to access + * child->last_siginfo because ptrace_freeze_traced() + * called earlier by ptrace_check_attach() ensures that + * the tracee cannot go away and clear its last_siginfo. + */ + switch (child->last_siginfo ? child->last_siginfo->si_code : 0) { + case SIGTRAP | 0x80: + switch (child->ptrace_message) { + case PTRACE_EVENTMSG_SYSCALL_ENTRY: + actual_size = ptrace_get_syscall_info_entry(child, regs, + &info); + break; + case PTRACE_EVENTMSG_SYSCALL_EXIT: + actual_size = ptrace_get_syscall_info_exit(child, regs, + &info); + break; + } + break; + case SIGTRAP | (PTRACE_EVENT_SECCOMP << 8): + actual_size = ptrace_get_syscall_info_seccomp(child, regs, + &info); + break; + } + + write_size = min(actual_size, user_size); + return copy_to_user(datavp, &info, write_size) ? -EFAULT : actual_size; +} +#endif /* CONFIG_HAVE_ARCH_TRACEHOOK */ int ptrace_request(struct task_struct *child, long request, unsigned long addr, unsigned long data) @@ -1114,6 +1209,10 @@ int ptrace_request(struct task_struct *child, long request, ret = __put_user(kiov.iov_len, &uiov->iov_len); break; } + + case PTRACE_GET_SYSCALL_INFO: + ret = ptrace_get_syscall_info(child, addr, datavp); + break; #endif case PTRACE_SECCOMP_GET_FILTER: diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index dc66fe852768..6ef7f16c4cf5 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -1775,13 +1775,18 @@ void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee, unsigned long msg; static bool entry; - /* Make sure we got an empty message. */ + /* + * The traditional way to tell PTRACE_SYSCALL entry/exit + * is by counting. + */ + entry = !entry; + + /* Make sure we got an appropriate message. */ ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg); EXPECT_EQ(0, ret); - EXPECT_EQ(0, msg); + EXPECT_EQ(entry ? PTRACE_EVENTMSG_SYSCALL_ENTRY + : PTRACE_EVENTMSG_SYSCALL_EXIT, msg); - /* The only way to tell PTRACE_SYSCALL entry/exit is by counting. */ - entry = !entry; if (!entry) return; -- cgit v1.2.3 From 22fcea6f85f2cc74e61bd8b3640faa8467553c24 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Tue, 16 Jul 2019 16:30:38 -0700 Subject: mm: move MAP_SYNC to asm-generic/mman-common.h This enables support for synchronous DAX fault on powerpc The generic changes are added as part of b6fb293f2497 ("mm: Define MAP_SYNC and VM_SYNC flags") Without this, mmap returns EOPNOTSUPP for MAP_SYNC with MAP_SHARED_VALIDATE Instead of adding MAP_SYNC with same value to arch/powerpc/include/uapi/asm/mman.h, I am moving the #define to asm-generic/mman-common.h. Two architectures using mman-common.h directly are sparc and powerpc. We should be able to consloidate more #defines to mman-common.h. That can be done as a separate patch. Link: http://lkml.kernel.org/r/20190528091120.13322-1-aneesh.kumar@linux.ibm.com Signed-off-by: Aneesh Kumar K.V Reviewed-by: Jan Kara Cc: Michael Ellerman Cc: Ross Zwisler Cc: Dan Williams Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/uapi/asm-generic/mman-common.h | 3 ++- include/uapi/asm-generic/mman.h | 1 - 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/asm-generic/mman-common.h b/include/uapi/asm-generic/mman-common.h index cb556b430e71..93a8b420ce1e 100644 --- a/include/uapi/asm-generic/mman-common.h +++ b/include/uapi/asm-generic/mman-common.h @@ -20,7 +20,8 @@ #define MAP_FIXED 0x10 /* Interpret addr exactly */ #define MAP_ANONYMOUS 0x20 /* don't use a file */ -/* 0x0100 - 0x80000 flags are defined in asm-generic/mman.h */ +/* 0x0100 - 0x40000 flags are defined in asm-generic/mman.h */ +#define MAP_SYNC 0x080000 /* perform synchronous page faults for the mapping */ #define MAP_FIXED_NOREPLACE 0x100000 /* MAP_FIXED which doesn't unmap underlying mapping */ #define MAP_UNINITIALIZED 0x4000000 /* For anonymous mmap, memory could be diff --git a/include/uapi/asm-generic/mman.h b/include/uapi/asm-generic/mman.h index 653687d9771b..2dffcbf705b3 100644 --- a/include/uapi/asm-generic/mman.h +++ b/include/uapi/asm-generic/mman.h @@ -13,7 +13,6 @@ #define MAP_NONBLOCK 0x10000 /* do not block on IO */ #define MAP_STACK 0x20000 /* give out an address that is best suited for process/thread stacks */ #define MAP_HUGETLB 0x40000 /* create a huge page mapping */ -#define MAP_SYNC 0x80000 /* perform synchronous page faults for the mapping */ /* Bits [26:31] are reserved, see mman-common.h for MAP_HUGETLB usage */ -- cgit v1.2.3 From 8aa3c927ec10d1230c3ace8357f624479665f701 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Tue, 16 Jul 2019 16:30:41 -0700 Subject: mm/mmap: move common defines to mman-common.h Two architecture that use arch specific MMAP flags are powerpc and sparc. We still have few flag values common across them and other architectures. Consolidate this in mman-common.h. Also update the comment to indicate where to find HugeTLB specific reserved values Link: http://lkml.kernel.org/r/20190604090950.31417-1-aneesh.kumar@linux.ibm.com Signed-off-by: Aneesh Kumar K.V Reviewed-by: Andrew Morton Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/include/uapi/asm/mman.h | 6 +----- arch/sparc/include/uapi/asm/mman.h | 6 ------ include/uapi/asm-generic/mman-common.h | 6 +++++- include/uapi/asm-generic/mman.h | 9 ++++----- 4 files changed, 10 insertions(+), 17 deletions(-) (limited to 'include/uapi') diff --git a/arch/powerpc/include/uapi/asm/mman.h b/arch/powerpc/include/uapi/asm/mman.h index 65065ce32814..c0c737215b00 100644 --- a/arch/powerpc/include/uapi/asm/mman.h +++ b/arch/powerpc/include/uapi/asm/mman.h @@ -21,15 +21,11 @@ #define MAP_DENYWRITE 0x0800 /* ETXTBSY */ #define MAP_EXECUTABLE 0x1000 /* mark it as an executable */ + #define MCL_CURRENT 0x2000 /* lock all currently mapped pages */ #define MCL_FUTURE 0x4000 /* lock all additions to address space */ #define MCL_ONFAULT 0x8000 /* lock all pages that are faulted in */ -#define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */ -#define MAP_NONBLOCK 0x10000 /* do not block on IO */ -#define MAP_STACK 0x20000 /* give out an address that is best suited for process/thread stacks */ -#define MAP_HUGETLB 0x40000 /* create a huge page mapping */ - /* Override any generic PKEY permission defines */ #define PKEY_DISABLE_EXECUTE 0x4 #undef PKEY_ACCESS_MASK diff --git a/arch/sparc/include/uapi/asm/mman.h b/arch/sparc/include/uapi/asm/mman.h index f6f99ec65bb3..cec9f4109687 100644 --- a/arch/sparc/include/uapi/asm/mman.h +++ b/arch/sparc/include/uapi/asm/mman.h @@ -22,10 +22,4 @@ #define MCL_FUTURE 0x4000 /* lock all additions to address space */ #define MCL_ONFAULT 0x8000 /* lock all pages that are faulted in */ -#define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */ -#define MAP_NONBLOCK 0x10000 /* do not block on IO */ -#define MAP_STACK 0x20000 /* give out an address that is best suited for process/thread stacks */ -#define MAP_HUGETLB 0x40000 /* create a huge page mapping */ - - #endif /* _UAPI__SPARC_MMAN_H__ */ diff --git a/include/uapi/asm-generic/mman-common.h b/include/uapi/asm-generic/mman-common.h index 93a8b420ce1e..63b1f506ea67 100644 --- a/include/uapi/asm-generic/mman-common.h +++ b/include/uapi/asm-generic/mman-common.h @@ -20,7 +20,11 @@ #define MAP_FIXED 0x10 /* Interpret addr exactly */ #define MAP_ANONYMOUS 0x20 /* don't use a file */ -/* 0x0100 - 0x40000 flags are defined in asm-generic/mman.h */ +/* 0x0100 - 0x4000 flags are defined in asm-generic/mman.h */ +#define MAP_POPULATE 0x008000 /* populate (prefault) pagetables */ +#define MAP_NONBLOCK 0x010000 /* do not block on IO */ +#define MAP_STACK 0x020000 /* give out an address that is best suited for process/thread stacks */ +#define MAP_HUGETLB 0x040000 /* create a huge page mapping */ #define MAP_SYNC 0x080000 /* perform synchronous page faults for the mapping */ #define MAP_FIXED_NOREPLACE 0x100000 /* MAP_FIXED which doesn't unmap underlying mapping */ diff --git a/include/uapi/asm-generic/mman.h b/include/uapi/asm-generic/mman.h index 2dffcbf705b3..57e8195d0b53 100644 --- a/include/uapi/asm-generic/mman.h +++ b/include/uapi/asm-generic/mman.h @@ -9,12 +9,11 @@ #define MAP_EXECUTABLE 0x1000 /* mark it as an executable */ #define MAP_LOCKED 0x2000 /* pages are locked */ #define MAP_NORESERVE 0x4000 /* don't check for reservations */ -#define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */ -#define MAP_NONBLOCK 0x10000 /* do not block on IO */ -#define MAP_STACK 0x20000 /* give out an address that is best suited for process/thread stacks */ -#define MAP_HUGETLB 0x40000 /* create a huge page mapping */ -/* Bits [26:31] are reserved, see mman-common.h for MAP_HUGETLB usage */ +/* + * Bits [26:31] are reserved, see asm-generic/hugetlb_encode.h + * for MAP_HUGETLB usage + */ #define MCL_CURRENT 1 /* lock all current mappings */ #define MCL_FUTURE 2 /* lock all future mappings */ -- cgit v1.2.3