diff options
30 files changed, 746 insertions, 1077 deletions
| diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index 5da47e26a012..8114744bf30c 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -889,7 +889,7 @@ static int pipe_to_sg(struct pipe_inode_info *pipe, struct pipe_buffer *buf,  		return 0;  	/* Try lock this page */ -	if (buf->ops->steal(pipe, buf) == 0) { +	if (pipe_buf_steal(pipe, buf) == 0) {  		/* Get reference and unlock page for moving */  		get_page(buf->page);  		unlock_page(buf->page); diff --git a/drivers/staging/lustre/lustre/llite/file.c b/drivers/staging/lustre/lustre/llite/file.c index 6e3a188baaae..d56863ff5866 100644 --- a/drivers/staging/lustre/lustre/llite/file.c +++ b/drivers/staging/lustre/lustre/llite/file.c @@ -1138,45 +1138,31 @@ restart:  			range_lock_init(&range, *ppos, *ppos + count - 1);  		vio->vui_fd  = LUSTRE_FPRIVATE(file); -		vio->vui_io_subtype = args->via_io_subtype; +		vio->vui_iter = args->u.normal.via_iter; +		vio->vui_iocb = args->u.normal.via_iocb; +		/* +		 * Direct IO reads must also take range lock, +		 * or multiple reads will try to work on the same pages +		 * See LU-6227 for details. +		 */ +		if (((iot == CIT_WRITE) || +		     (iot == CIT_READ && (file->f_flags & O_DIRECT))) && +		    !(vio->vui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) { +			CDEBUG(D_VFSTRACE, "Range lock [%llu, %llu]\n", +			       range.rl_node.in_extent.start, +			       range.rl_node.in_extent.end); +			result = range_lock(&lli->lli_write_tree, +					    &range); +			if (result < 0) +				goto out; -		switch (vio->vui_io_subtype) { -		case IO_NORMAL: -			vio->vui_iter = args->u.normal.via_iter; -			vio->vui_iocb = args->u.normal.via_iocb; -			/* -			 * Direct IO reads must also take range lock, -			 * or multiple reads will try to work on the same pages -			 * See LU-6227 for details. -			 */ -			if (((iot == CIT_WRITE) || -			     (iot == CIT_READ && (file->f_flags & O_DIRECT))) && -			    !(vio->vui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) { -				CDEBUG(D_VFSTRACE, "Range lock [%llu, %llu]\n", -				       range.rl_node.in_extent.start, -				       range.rl_node.in_extent.end); -				result = range_lock(&lli->lli_write_tree, -						    &range); -				if (result < 0) -					goto out; - -				range_locked = true; -			} -			down_read(&lli->lli_trunc_sem); -			break; -		case IO_SPLICE: -			vio->u.splice.vui_pipe = args->u.splice.via_pipe; -			vio->u.splice.vui_flags = args->u.splice.via_flags; -			break; -		default: -			CERROR("Unknown IO type - %u\n", vio->vui_io_subtype); -			LBUG(); +			range_locked = true;  		} +		down_read(&lli->lli_trunc_sem);  		ll_cl_add(file, env, io);  		result = cl_io_loop(env, io);  		ll_cl_remove(file, env); -		if (args->via_io_subtype == IO_NORMAL) -			up_read(&lli->lli_trunc_sem); +		up_read(&lli->lli_trunc_sem);  		if (range_locked) {  			CDEBUG(D_VFSTRACE, "Range unlock [%llu, %llu]\n",  			       range.rl_node.in_extent.start, @@ -1235,7 +1221,7 @@ static ssize_t ll_file_read_iter(struct kiocb *iocb, struct iov_iter *to)  	if (IS_ERR(env))  		return PTR_ERR(env); -	args = ll_env_args(env, IO_NORMAL); +	args = ll_env_args(env);  	args->u.normal.via_iter = to;  	args->u.normal.via_iocb = iocb; @@ -1259,7 +1245,7 @@ static ssize_t ll_file_write_iter(struct kiocb *iocb, struct iov_iter *from)  	if (IS_ERR(env))  		return PTR_ERR(env); -	args = ll_env_args(env, IO_NORMAL); +	args = ll_env_args(env);  	args->u.normal.via_iter = from;  	args->u.normal.via_iocb = iocb; @@ -1269,31 +1255,6 @@ static ssize_t ll_file_write_iter(struct kiocb *iocb, struct iov_iter *from)  	return result;  } -/* - * Send file content (through pagecache) somewhere with helper - */ -static ssize_t ll_file_splice_read(struct file *in_file, loff_t *ppos, -				   struct pipe_inode_info *pipe, size_t count, -				   unsigned int flags) -{ -	struct lu_env      *env; -	struct vvp_io_args *args; -	ssize_t	     result; -	int		 refcheck; - -	env = cl_env_get(&refcheck); -	if (IS_ERR(env)) -		return PTR_ERR(env); - -	args = ll_env_args(env, IO_SPLICE); -	args->u.splice.via_pipe = pipe; -	args->u.splice.via_flags = flags; - -	result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count); -	cl_env_put(env, &refcheck); -	return result; -} -  int ll_lov_setstripe_ea_info(struct inode *inode, struct dentry *dentry,  			     __u64 flags, struct lov_user_md *lum,  			     int lum_size) @@ -3267,7 +3228,7 @@ struct file_operations ll_file_operations = {  	.release	= ll_file_release,  	.mmap	   = ll_file_mmap,  	.llseek	 = ll_file_seek, -	.splice_read    = ll_file_splice_read, +	.splice_read    = generic_file_splice_read,  	.fsync	  = ll_fsync,  	.flush	  = ll_flush  }; @@ -3280,7 +3241,7 @@ struct file_operations ll_file_operations_flock = {  	.release	= ll_file_release,  	.mmap	   = ll_file_mmap,  	.llseek	 = ll_file_seek, -	.splice_read    = ll_file_splice_read, +	.splice_read    = generic_file_splice_read,  	.fsync	  = ll_fsync,  	.flush	  = ll_flush,  	.flock	  = ll_file_flock, @@ -3296,7 +3257,7 @@ struct file_operations ll_file_operations_noflock = {  	.release	= ll_file_release,  	.mmap	   = ll_file_mmap,  	.llseek	 = ll_file_seek, -	.splice_read    = ll_file_splice_read, +	.splice_read    = generic_file_splice_read,  	.fsync	  = ll_fsync,  	.flush	  = ll_flush,  	.flock	  = ll_file_noflock, diff --git a/drivers/staging/lustre/lustre/llite/llite_internal.h b/drivers/staging/lustre/lustre/llite/llite_internal.h index 3e98bd685061..4bc551279aa4 100644 --- a/drivers/staging/lustre/lustre/llite/llite_internal.h +++ b/drivers/staging/lustre/lustre/llite/llite_internal.h @@ -908,17 +908,11 @@ void vvp_write_complete(struct vvp_object *club, struct vvp_page *page);   */  struct vvp_io_args {  	/** normal/splice */ -	enum vvp_io_subtype via_io_subtype; -  	union {  		struct {  			struct kiocb      *via_iocb;  			struct iov_iter   *via_iter;  		} normal; -		struct { -			struct pipe_inode_info  *via_pipe; -			unsigned int       via_flags; -		} splice;  	} u;  }; @@ -946,14 +940,9 @@ static inline struct ll_thread_info *ll_env_info(const struct lu_env *env)  	return lti;  } -static inline struct vvp_io_args *ll_env_args(const struct lu_env *env, -					      enum vvp_io_subtype type) +static inline struct vvp_io_args *ll_env_args(const struct lu_env *env)  { -	struct vvp_io_args *via = &ll_env_info(env)->lti_args; - -	via->via_io_subtype = type; - -	return via; +	return &ll_env_info(env)->lti_args;  }  void ll_queue_done_writing(struct inode *inode, unsigned long flags); diff --git a/drivers/staging/lustre/lustre/llite/vvp_internal.h b/drivers/staging/lustre/lustre/llite/vvp_internal.h index 5802da81cd0e..4464ad258387 100644 --- a/drivers/staging/lustre/lustre/llite/vvp_internal.h +++ b/drivers/staging/lustre/lustre/llite/vvp_internal.h @@ -49,14 +49,6 @@ struct obd_device;  struct obd_export;  struct page; -/* specific architecture can implement only part of this list */ -enum vvp_io_subtype { -	/** normal IO */ -	IO_NORMAL, -	/** io started from splice_{read|write} */ -	IO_SPLICE -}; -  /**   * IO state private to IO state private to VVP layer.   */ @@ -99,10 +91,6 @@ struct vvp_io {  			bool		ft_flags_valid;  		} fault;  		struct { -			struct pipe_inode_info	*vui_pipe; -			unsigned int		 vui_flags; -		} splice; -		struct {  			struct cl_page_list vui_queue;  			unsigned long vui_written;  			int vui_from; @@ -110,8 +98,6 @@ struct vvp_io {  		} write;  	} u; -	enum vvp_io_subtype	vui_io_subtype; -  	/**  	 * Layout version when this IO is initialized  	 */ diff --git a/drivers/staging/lustre/lustre/llite/vvp_io.c b/drivers/staging/lustre/lustre/llite/vvp_io.c index 2ab450359b6d..2b7f182a15e2 100644 --- a/drivers/staging/lustre/lustre/llite/vvp_io.c +++ b/drivers/staging/lustre/lustre/llite/vvp_io.c @@ -54,18 +54,6 @@ static struct vvp_io *cl2vvp_io(const struct lu_env *env,  }  /** - * True, if \a io is a normal io, False for splice_{read,write} - */ -static int cl_is_normalio(const struct lu_env *env, const struct cl_io *io) -{ -	struct vvp_io *vio = vvp_env_io(env); - -	LASSERT(io->ci_type == CIT_READ || io->ci_type == CIT_WRITE); - -	return vio->vui_io_subtype == IO_NORMAL; -} - -/**   * For swapping layout. The file's layout may have changed.   * To avoid populating pages to a wrong stripe, we have to verify the   * correctness of layout. It works because swapping layout processes @@ -390,9 +378,6 @@ static int vvp_mmap_locks(const struct lu_env *env,  	LASSERT(io->ci_type == CIT_READ || io->ci_type == CIT_WRITE); -	if (!cl_is_normalio(env, io)) -		return 0; -  	if (!vio->vui_iter) /* nfs or loop back device write */  		return 0; @@ -461,15 +446,10 @@ static void vvp_io_advance(const struct lu_env *env,  			   const struct cl_io_slice *ios,  			   size_t nob)  { -	struct vvp_io    *vio = cl2vvp_io(env, ios); -	struct cl_io     *io  = ios->cis_io;  	struct cl_object *obj = ios->cis_io->ci_obj; - +	struct vvp_io	 *vio = cl2vvp_io(env, ios);  	CLOBINVRNT(env, obj, vvp_object_invariant(obj)); -	if (!cl_is_normalio(env, io)) -		return; -  	iov_iter_reexpand(vio->vui_iter, vio->vui_tot_count  -= nob);  } @@ -478,7 +458,7 @@ static void vvp_io_update_iov(const struct lu_env *env,  {  	size_t size = io->u.ci_rw.crw_count; -	if (!cl_is_normalio(env, io) || !vio->vui_iter) +	if (!vio->vui_iter)  		return;  	iov_iter_truncate(vio->vui_iter, size); @@ -715,25 +695,8 @@ static int vvp_io_read_start(const struct lu_env *env,  	/* BUG: 5972 */  	file_accessed(file); -	switch (vio->vui_io_subtype) { -	case IO_NORMAL: -		LASSERT(vio->vui_iocb->ki_pos == pos); -		result = generic_file_read_iter(vio->vui_iocb, vio->vui_iter); -		break; -	case IO_SPLICE: -		result = generic_file_splice_read(file, &pos, -						  vio->u.splice.vui_pipe, cnt, -						  vio->u.splice.vui_flags); -		/* LU-1109: do splice read stripe by stripe otherwise if it -		 * may make nfsd stuck if this read occupied all internal pipe -		 * buffers. -		 */ -		io->ci_continue = 0; -		break; -	default: -		CERROR("Wrong IO type %u\n", vio->vui_io_subtype); -		LBUG(); -	} +	LASSERT(vio->vui_iocb->ki_pos == pos); +	result = generic_file_read_iter(vio->vui_iocb, vio->vui_iter);  out:  	if (result >= 0) { diff --git a/fs/coda/file.c b/fs/coda/file.c index f47c7483863b..8415d4f8d1a1 100644 --- a/fs/coda/file.c +++ b/fs/coda/file.c @@ -38,27 +38,6 @@ coda_file_read_iter(struct kiocb *iocb, struct iov_iter *to)  }  static ssize_t -coda_file_splice_read(struct file *coda_file, loff_t *ppos, -		      struct pipe_inode_info *pipe, size_t count, -		      unsigned int flags) -{ -	ssize_t (*splice_read)(struct file *, loff_t *, -			       struct pipe_inode_info *, size_t, unsigned int); -	struct coda_file_info *cfi; -	struct file *host_file; - -	cfi = CODA_FTOC(coda_file); -	BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC); -	host_file = cfi->cfi_container; - -	splice_read = host_file->f_op->splice_read; -	if (!splice_read) -		splice_read = default_file_splice_read; - -	return splice_read(host_file, ppos, pipe, count, flags); -} - -static ssize_t  coda_file_write_iter(struct kiocb *iocb, struct iov_iter *to)  {  	struct file *coda_file = iocb->ki_filp; @@ -225,6 +204,6 @@ const struct file_operations coda_file_operations = {  	.open		= coda_open,  	.release	= coda_release,  	.fsync		= coda_fsync, -	.splice_read	= coda_file_splice_read, +	.splice_read	= generic_file_splice_read,  }; diff --git a/fs/direct-io.c b/fs/direct-io.c index 7c3ce73cb617..fb9aa16a7727 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -246,6 +246,9 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, bool is_async)  		if ((dio->op == REQ_OP_READ) &&  		    ((offset + transferred) > dio->i_size))  			transferred = dio->i_size - offset; +		/* ignore EFAULT if some IO has been done */ +		if (unlikely(ret == -EFAULT) && transferred) +			ret = 0;  	}  	if (ret == 0) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index c41bde26c338..70ea57c7b6bb 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -728,7 +728,7 @@ static int fuse_copy_fill(struct fuse_copy_state *cs)  		struct pipe_buffer *buf = cs->pipebufs;  		if (!cs->write) { -			err = buf->ops->confirm(cs->pipe, buf); +			err = pipe_buf_confirm(cs->pipe, buf);  			if (err)  				return err; @@ -827,7 +827,7 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)  	fuse_copy_finish(cs); -	err = buf->ops->confirm(cs->pipe, buf); +	err = pipe_buf_confirm(cs->pipe, buf);  	if (err)  		return err; @@ -840,7 +840,7 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)  	if (cs->len != PAGE_SIZE)  		goto out_fallback; -	if (buf->ops->steal(cs->pipe, buf) != 0) +	if (pipe_buf_steal(cs->pipe, buf) != 0)  		goto out_fallback;  	newpage = buf->page; @@ -1341,9 +1341,8 @@ static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,  				    struct pipe_inode_info *pipe,  				    size_t len, unsigned int flags)  { -	int ret; +	int total, ret;  	int page_nr = 0; -	int do_wakeup = 0;  	struct pipe_buffer *bufs;  	struct fuse_copy_state cs;  	struct fuse_dev *fud = fuse_get_dev(in); @@ -1362,52 +1361,23 @@ static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,  	if (ret < 0)  		goto out; -	ret = 0; -	pipe_lock(pipe); - -	if (!pipe->readers) { -		send_sig(SIGPIPE, current, 0); -		if (!ret) -			ret = -EPIPE; -		goto out_unlock; -	} -  	if (pipe->nrbufs + cs.nr_segs > pipe->buffers) {  		ret = -EIO; -		goto out_unlock; +		goto out;  	} -	while (page_nr < cs.nr_segs) { -		int newbuf = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1); -		struct pipe_buffer *buf = pipe->bufs + newbuf; - -		buf->page = bufs[page_nr].page; -		buf->offset = bufs[page_nr].offset; -		buf->len = bufs[page_nr].len; +	for (ret = total = 0; page_nr < cs.nr_segs; total += ret) {  		/*  		 * Need to be careful about this.  Having buf->ops in module  		 * code can Oops if the buffer persists after module unload.  		 */ -		buf->ops = &nosteal_pipe_buf_ops; - -		pipe->nrbufs++; -		page_nr++; -		ret += buf->len; - -		if (pipe->files) -			do_wakeup = 1; -	} - -out_unlock: -	pipe_unlock(pipe); - -	if (do_wakeup) { -		smp_mb(); -		if (waitqueue_active(&pipe->wait)) -			wake_up_interruptible(&pipe->wait); -		kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); +		bufs[page_nr].ops = &nosteal_pipe_buf_ops; +		ret = add_to_pipe(pipe, &bufs[page_nr++]); +		if (unlikely(ret < 0)) +			break;  	} - +	if (total) +		ret = total;  out:  	for (; page_nr < cs.nr_segs; page_nr++)  		put_page(bufs[page_nr].page); @@ -1992,7 +1962,7 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,  			pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1);  			pipe->nrbufs--;  		} else { -			ibuf->ops->get(pipe, ibuf); +			pipe_buf_get(pipe, ibuf);  			*obuf = *ibuf;  			obuf->flags &= ~PIPE_BUF_FLAG_GIFT;  			obuf->len = rem; @@ -2014,10 +1984,9 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,  	ret = fuse_dev_do_write(fud, &cs, len); -	for (idx = 0; idx < nbuf; idx++) { -		struct pipe_buffer *buf = &bufs[idx]; -		buf->ops->release(pipe, buf); -	} +	for (idx = 0; idx < nbuf; idx++) +		pipe_buf_release(pipe, &bufs[idx]); +  out:  	kfree(bufs);  	return ret; diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 360188f162bd..e23ff70b3435 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -954,30 +954,6 @@ out_uninit:  	return ret;  } -static ssize_t gfs2_file_splice_read(struct file *in, loff_t *ppos, -				     struct pipe_inode_info *pipe, size_t len, -				     unsigned int flags) -{ -	struct inode *inode = in->f_mapping->host; -	struct gfs2_inode *ip = GFS2_I(inode); -	struct gfs2_holder gh; -	int ret; - -	inode_lock(inode); - -	ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh); -	if (ret) { -		inode_unlock(inode); -		return ret; -	} - -	gfs2_glock_dq_uninit(&gh); -	inode_unlock(inode); - -	return generic_file_splice_read(in, ppos, pipe, len, flags); -} - -  static ssize_t gfs2_file_splice_write(struct pipe_inode_info *pipe,  				      struct file *out, loff_t *ppos,  				      size_t len, unsigned int flags) @@ -1140,7 +1116,7 @@ const struct file_operations gfs2_file_fops = {  	.fsync		= gfs2_fsync,  	.lock		= gfs2_lock,  	.flock		= gfs2_flock, -	.splice_read	= gfs2_file_splice_read, +	.splice_read	= generic_file_splice_read,  	.splice_write	= gfs2_file_splice_write,  	.setlease	= simple_nosetlease,  	.fallocate	= gfs2_fallocate, @@ -1168,7 +1144,7 @@ const struct file_operations gfs2_file_fops_nolock = {  	.open		= gfs2_open,  	.release	= gfs2_release,  	.fsync		= gfs2_fsync, -	.splice_read	= gfs2_file_splice_read, +	.splice_read	= generic_file_splice_read,  	.splice_write	= gfs2_file_splice_write,  	.setlease	= generic_setlease,  	.fallocate	= gfs2_fallocate, diff --git a/fs/nfs/file.c b/fs/nfs/file.c index ca699ddc11c1..2efbdde36c3e 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -182,29 +182,6 @@ nfs_file_read(struct kiocb *iocb, struct iov_iter *to)  }  EXPORT_SYMBOL_GPL(nfs_file_read); -ssize_t -nfs_file_splice_read(struct file *filp, loff_t *ppos, -		     struct pipe_inode_info *pipe, size_t count, -		     unsigned int flags) -{ -	struct inode *inode = file_inode(filp); -	ssize_t res; - -	dprintk("NFS: splice_read(%pD2, %lu@%Lu)\n", -		filp, (unsigned long) count, (unsigned long long) *ppos); - -	nfs_start_io_read(inode); -	res = nfs_revalidate_mapping(inode, filp->f_mapping); -	if (!res) { -		res = generic_file_splice_read(filp, ppos, pipe, count, flags); -		if (res > 0) -			nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, res); -	} -	nfs_end_io_read(inode); -	return res; -} -EXPORT_SYMBOL_GPL(nfs_file_splice_read); -  int  nfs_file_mmap(struct file * file, struct vm_area_struct * vma)  { @@ -871,7 +848,7 @@ const struct file_operations nfs_file_operations = {  	.fsync		= nfs_file_fsync,  	.lock		= nfs_lock,  	.flock		= nfs_flock, -	.splice_read	= nfs_file_splice_read, +	.splice_read	= generic_file_splice_read,  	.splice_write	= iter_file_splice_write,  	.check_flags	= nfs_check_flags,  	.setlease	= simple_nosetlease, diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 74935a19e4bf..d7b062bdc504 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -365,8 +365,6 @@ int nfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *)  int nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync);  loff_t nfs_file_llseek(struct file *, loff_t, int);  ssize_t nfs_file_read(struct kiocb *, struct iov_iter *); -ssize_t nfs_file_splice_read(struct file *, loff_t *, struct pipe_inode_info *, -			     size_t, unsigned int);  int nfs_file_mmap(struct file *, struct vm_area_struct *);  ssize_t nfs_file_write(struct kiocb *, struct iov_iter *);  int nfs_file_release(struct inode *, struct file *); diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index d085ad794884..89a77950e0b0 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -248,7 +248,7 @@ const struct file_operations nfs4_file_operations = {  	.fsync		= nfs_file_fsync,  	.lock		= nfs_lock,  	.flock		= nfs_flock, -	.splice_read	= nfs_file_splice_read, +	.splice_read	= generic_file_splice_read,  	.splice_write	= iter_file_splice_write,  	.check_flags	= nfs_check_flags,  	.setlease	= simple_nosetlease, diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 0b055bfb8e86..8f91639f8364 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2321,36 +2321,6 @@ out_mutex:  	return ret;  } -static ssize_t ocfs2_file_splice_read(struct file *in, -				      loff_t *ppos, -				      struct pipe_inode_info *pipe, -				      size_t len, -				      unsigned int flags) -{ -	int ret = 0, lock_level = 0; -	struct inode *inode = file_inode(in); - -	trace_ocfs2_file_splice_read(inode, in, in->f_path.dentry, -			(unsigned long long)OCFS2_I(inode)->ip_blkno, -			in->f_path.dentry->d_name.len, -			in->f_path.dentry->d_name.name, len); - -	/* -	 * See the comment in ocfs2_file_read_iter() -	 */ -	ret = ocfs2_inode_lock_atime(inode, in->f_path.mnt, &lock_level); -	if (ret < 0) { -		mlog_errno(ret); -		goto bail; -	} -	ocfs2_inode_unlock(inode, lock_level); - -	ret = generic_file_splice_read(in, ppos, pipe, len, flags); - -bail: -	return ret; -} -  static ssize_t ocfs2_file_read_iter(struct kiocb *iocb,  				   struct iov_iter *to)  { @@ -2509,7 +2479,7 @@ const struct file_operations ocfs2_fops = {  #endif  	.lock		= ocfs2_lock,  	.flock		= ocfs2_flock, -	.splice_read	= ocfs2_file_splice_read, +	.splice_read	= generic_file_splice_read,  	.splice_write	= iter_file_splice_write,  	.fallocate	= ocfs2_fallocate,  }; @@ -2554,7 +2524,7 @@ const struct file_operations ocfs2_fops_no_plocks = {  	.compat_ioctl   = ocfs2_compat_ioctl,  #endif  	.flock		= ocfs2_flock, -	.splice_read	= ocfs2_file_splice_read, +	.splice_read	= generic_file_splice_read,  	.splice_write	= iter_file_splice_write,  	.fallocate	= ocfs2_fallocate,  }; diff --git a/fs/ocfs2/ocfs2_trace.h b/fs/ocfs2/ocfs2_trace.h index f8f5fc5e6c05..0b58abcf1c6d 100644 --- a/fs/ocfs2/ocfs2_trace.h +++ b/fs/ocfs2/ocfs2_trace.h @@ -1314,8 +1314,6 @@ DEFINE_OCFS2_FILE_OPS(ocfs2_file_aio_write);  DEFINE_OCFS2_FILE_OPS(ocfs2_file_splice_write); -DEFINE_OCFS2_FILE_OPS(ocfs2_file_splice_read); -  DEFINE_OCFS2_FILE_OPS(ocfs2_file_aio_read);  DEFINE_OCFS2_ULL_ULL_ULL_EVENT(ocfs2_truncate_file); diff --git a/fs/pipe.c b/fs/pipe.c index 4ebe6b2e5217..4fc422f0dea8 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -267,7 +267,6 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)  		if (bufs) {  			int curbuf = pipe->curbuf;  			struct pipe_buffer *buf = pipe->bufs + curbuf; -			const struct pipe_buf_operations *ops = buf->ops;  			size_t chars = buf->len;  			size_t written;  			int error; @@ -275,7 +274,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)  			if (chars > total_len)  				chars = total_len; -			error = ops->confirm(pipe, buf); +			error = pipe_buf_confirm(pipe, buf);  			if (error) {  				if (!ret)  					ret = error; @@ -299,8 +298,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)  			}  			if (!buf->len) { -				buf->ops = NULL; -				ops->release(pipe, buf); +				pipe_buf_release(pipe, buf);  				curbuf = (curbuf + 1) & (pipe->buffers - 1);  				pipe->curbuf = curbuf;  				pipe->nrbufs = --bufs; @@ -383,11 +381,10 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)  		int lastbuf = (pipe->curbuf + pipe->nrbufs - 1) &  							(pipe->buffers - 1);  		struct pipe_buffer *buf = pipe->bufs + lastbuf; -		const struct pipe_buf_operations *ops = buf->ops;  		int offset = buf->offset + buf->len; -		if (ops->can_merge && offset + chars <= PAGE_SIZE) { -			ret = ops->confirm(pipe, buf); +		if (buf->ops->can_merge && offset + chars <= PAGE_SIZE) { +			ret = pipe_buf_confirm(pipe, buf);  			if (ret)  				goto out; @@ -664,7 +661,7 @@ void free_pipe_info(struct pipe_inode_info *pipe)  	for (i = 0; i < pipe->buffers; i++) {  		struct pipe_buffer *buf = pipe->bufs + i;  		if (buf->ops) -			buf->ops->release(pipe, buf); +			pipe_buf_release(pipe, buf);  	}  	if (pipe->tmp_page)  		__free_page(pipe->tmp_page); diff --git a/fs/splice.c b/fs/splice.c index dd9bf7e410d2..aa38901a4f10 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -183,82 +183,39 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe,  		       struct splice_pipe_desc *spd)  {  	unsigned int spd_pages = spd->nr_pages; -	int ret, do_wakeup, page_nr; +	int ret = 0, page_nr = 0;  	if (!spd_pages)  		return 0; -	ret = 0; -	do_wakeup = 0; -	page_nr = 0; - -	pipe_lock(pipe); - -	for (;;) { -		if (!pipe->readers) { -			send_sig(SIGPIPE, current, 0); -			if (!ret) -				ret = -EPIPE; -			break; -		} - -		if (pipe->nrbufs < pipe->buffers) { -			int newbuf = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1); -			struct pipe_buffer *buf = pipe->bufs + newbuf; - -			buf->page = spd->pages[page_nr]; -			buf->offset = spd->partial[page_nr].offset; -			buf->len = spd->partial[page_nr].len; -			buf->private = spd->partial[page_nr].private; -			buf->ops = spd->ops; -			if (spd->flags & SPLICE_F_GIFT) -				buf->flags |= PIPE_BUF_FLAG_GIFT; - -			pipe->nrbufs++; -			page_nr++; -			ret += buf->len; - -			if (pipe->files) -				do_wakeup = 1; +	if (unlikely(!pipe->readers)) { +		send_sig(SIGPIPE, current, 0); +		ret = -EPIPE; +		goto out; +	} -			if (!--spd->nr_pages) -				break; -			if (pipe->nrbufs < pipe->buffers) -				continue; +	while (pipe->nrbufs < pipe->buffers) { +		int newbuf = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1); +		struct pipe_buffer *buf = pipe->bufs + newbuf; -			break; -		} +		buf->page = spd->pages[page_nr]; +		buf->offset = spd->partial[page_nr].offset; +		buf->len = spd->partial[page_nr].len; +		buf->private = spd->partial[page_nr].private; +		buf->ops = spd->ops; -		if (spd->flags & SPLICE_F_NONBLOCK) { -			if (!ret) -				ret = -EAGAIN; -			break; -		} +		pipe->nrbufs++; +		page_nr++; +		ret += buf->len; -		if (signal_pending(current)) { -			if (!ret) -				ret = -ERESTARTSYS; +		if (!--spd->nr_pages)  			break; -		} - -		if (do_wakeup) { -			smp_mb(); -			if (waitqueue_active(&pipe->wait)) -				wake_up_interruptible_sync(&pipe->wait); -			kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); -			do_wakeup = 0; -		} - -		pipe->waiting_writers++; -		pipe_wait(pipe); -		pipe->waiting_writers--;  	} -	pipe_unlock(pipe); - -	if (do_wakeup) -		wakeup_pipe_readers(pipe); +	if (!ret) +		ret = -EAGAIN; +out:  	while (page_nr < spd_pages)  		spd->spd_release(spd, page_nr++); @@ -266,6 +223,26 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe,  }  EXPORT_SYMBOL_GPL(splice_to_pipe); +ssize_t add_to_pipe(struct pipe_inode_info *pipe, struct pipe_buffer *buf) +{ +	int ret; + +	if (unlikely(!pipe->readers)) { +		send_sig(SIGPIPE, current, 0); +		ret = -EPIPE; +	} else if (pipe->nrbufs == pipe->buffers) { +		ret = -EAGAIN; +	} else { +		int newbuf = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1); +		pipe->bufs[newbuf] = *buf; +		pipe->nrbufs++; +		return buf->len; +	} +	pipe_buf_release(pipe, buf); +	return ret; +} +EXPORT_SYMBOL(add_to_pipe); +  void spd_release_page(struct splice_pipe_desc *spd, unsigned int i)  {  	put_page(spd->pages[i]); @@ -303,207 +280,6 @@ void splice_shrink_spd(struct splice_pipe_desc *spd)  	kfree(spd->partial);  } -static int -__generic_file_splice_read(struct file *in, loff_t *ppos, -			   struct pipe_inode_info *pipe, size_t len, -			   unsigned int flags) -{ -	struct address_space *mapping = in->f_mapping; -	unsigned int loff, nr_pages, req_pages; -	struct page *pages[PIPE_DEF_BUFFERS]; -	struct partial_page partial[PIPE_DEF_BUFFERS]; -	struct page *page; -	pgoff_t index, end_index; -	loff_t isize; -	int error, page_nr; -	struct splice_pipe_desc spd = { -		.pages = pages, -		.partial = partial, -		.nr_pages_max = PIPE_DEF_BUFFERS, -		.flags = flags, -		.ops = &page_cache_pipe_buf_ops, -		.spd_release = spd_release_page, -	}; - -	if (splice_grow_spd(pipe, &spd)) -		return -ENOMEM; - -	index = *ppos >> PAGE_SHIFT; -	loff = *ppos & ~PAGE_MASK; -	req_pages = (len + loff + PAGE_SIZE - 1) >> PAGE_SHIFT; -	nr_pages = min(req_pages, spd.nr_pages_max); - -	/* -	 * Lookup the (hopefully) full range of pages we need. -	 */ -	spd.nr_pages = find_get_pages_contig(mapping, index, nr_pages, spd.pages); -	index += spd.nr_pages; - -	/* -	 * If find_get_pages_contig() returned fewer pages than we needed, -	 * readahead/allocate the rest and fill in the holes. -	 */ -	if (spd.nr_pages < nr_pages) -		page_cache_sync_readahead(mapping, &in->f_ra, in, -				index, req_pages - spd.nr_pages); - -	error = 0; -	while (spd.nr_pages < nr_pages) { -		/* -		 * Page could be there, find_get_pages_contig() breaks on -		 * the first hole. -		 */ -		page = find_get_page(mapping, index); -		if (!page) { -			/* -			 * page didn't exist, allocate one. -			 */ -			page = page_cache_alloc_cold(mapping); -			if (!page) -				break; - -			error = add_to_page_cache_lru(page, mapping, index, -				   mapping_gfp_constraint(mapping, GFP_KERNEL)); -			if (unlikely(error)) { -				put_page(page); -				if (error == -EEXIST) -					continue; -				break; -			} -			/* -			 * add_to_page_cache() locks the page, unlock it -			 * to avoid convoluting the logic below even more. -			 */ -			unlock_page(page); -		} - -		spd.pages[spd.nr_pages++] = page; -		index++; -	} - -	/* -	 * Now loop over the map and see if we need to start IO on any -	 * pages, fill in the partial map, etc. -	 */ -	index = *ppos >> PAGE_SHIFT; -	nr_pages = spd.nr_pages; -	spd.nr_pages = 0; -	for (page_nr = 0; page_nr < nr_pages; page_nr++) { -		unsigned int this_len; - -		if (!len) -			break; - -		/* -		 * this_len is the max we'll use from this page -		 */ -		this_len = min_t(unsigned long, len, PAGE_SIZE - loff); -		page = spd.pages[page_nr]; - -		if (PageReadahead(page)) -			page_cache_async_readahead(mapping, &in->f_ra, in, -					page, index, req_pages - page_nr); - -		/* -		 * If the page isn't uptodate, we may need to start io on it -		 */ -		if (!PageUptodate(page)) { -			lock_page(page); - -			/* -			 * Page was truncated, or invalidated by the -			 * filesystem.  Redo the find/create, but this time the -			 * page is kept locked, so there's no chance of another -			 * race with truncate/invalidate. -			 */ -			if (!page->mapping) { -				unlock_page(page); -retry_lookup: -				page = find_or_create_page(mapping, index, -						mapping_gfp_mask(mapping)); - -				if (!page) { -					error = -ENOMEM; -					break; -				} -				put_page(spd.pages[page_nr]); -				spd.pages[page_nr] = page; -			} -			/* -			 * page was already under io and is now done, great -			 */ -			if (PageUptodate(page)) { -				unlock_page(page); -				goto fill_it; -			} - -			/* -			 * need to read in the page -			 */ -			error = mapping->a_ops->readpage(in, page); -			if (unlikely(error)) { -				/* -				 * Re-lookup the page -				 */ -				if (error == AOP_TRUNCATED_PAGE) -					goto retry_lookup; - -				break; -			} -		} -fill_it: -		/* -		 * i_size must be checked after PageUptodate. -		 */ -		isize = i_size_read(mapping->host); -		end_index = (isize - 1) >> PAGE_SHIFT; -		if (unlikely(!isize || index > end_index)) -			break; - -		/* -		 * if this is the last page, see if we need to shrink -		 * the length and stop -		 */ -		if (end_index == index) { -			unsigned int plen; - -			/* -			 * max good bytes in this page -			 */ -			plen = ((isize - 1) & ~PAGE_MASK) + 1; -			if (plen <= loff) -				break; - -			/* -			 * force quit after adding this page -			 */ -			this_len = min(this_len, plen - loff); -			len = this_len; -		} - -		spd.partial[page_nr].offset = loff; -		spd.partial[page_nr].len = this_len; -		len -= this_len; -		loff = 0; -		spd.nr_pages++; -		index++; -	} - -	/* -	 * Release any pages at the end, if we quit early. 'page_nr' is how far -	 * we got, 'nr_pages' is how many pages are in the map. -	 */ -	while (page_nr < nr_pages) -		put_page(spd.pages[page_nr++]); -	in->f_ra.prev_pos = (loff_t)index << PAGE_SHIFT; - -	if (spd.nr_pages) -		error = splice_to_pipe(pipe, &spd); - -	splice_shrink_spd(&spd); -	return error; -} -  /**   * generic_file_splice_read - splice data from file to a pipe   * @in:		file to splice from @@ -514,39 +290,53 @@ fill_it:   *   * Description:   *    Will read pages from given file and fill them into a pipe. Can be - *    used as long as the address_space operations for the source implements - *    a readpage() hook. + *    used as long as it has more or less sane ->read_iter().   *   */  ssize_t generic_file_splice_read(struct file *in, loff_t *ppos,  				 struct pipe_inode_info *pipe, size_t len,  				 unsigned int flags)  { -	loff_t isize, left; -	int ret; - -	if (IS_DAX(in->f_mapping->host)) -		return default_file_splice_read(in, ppos, pipe, len, flags); +	struct iov_iter to; +	struct kiocb kiocb; +	loff_t isize; +	int idx, ret;  	isize = i_size_read(in->f_mapping->host);  	if (unlikely(*ppos >= isize))  		return 0; -	left = isize - *ppos; -	if (unlikely(left < len)) -		len = left; - -	ret = __generic_file_splice_read(in, ppos, pipe, len, flags); +	iov_iter_pipe(&to, ITER_PIPE | READ, pipe, len); +	idx = to.idx; +	init_sync_kiocb(&kiocb, in); +	kiocb.ki_pos = *ppos; +	ret = in->f_op->read_iter(&kiocb, &to);  	if (ret > 0) { -		*ppos += ret; +		*ppos = kiocb.ki_pos;  		file_accessed(in); +	} else if (ret < 0) { +		if (WARN_ON(to.idx != idx || to.iov_offset)) { +			/* +			 * a bogus ->read_iter() has copied something and still +			 * returned an error instead of a short read. +			 */ +			to.idx = idx; +			to.iov_offset = 0; +			iov_iter_advance(&to, 0); /* to free what was emitted */ +		} +		/* +		 * callers of ->splice_read() expect -EAGAIN on +		 * "can't put anything in there", rather than -EFAULT. +		 */ +		if (ret == -EFAULT) +			ret = -EAGAIN;  	}  	return ret;  }  EXPORT_SYMBOL(generic_file_splice_read); -static const struct pipe_buf_operations default_pipe_buf_ops = { +const struct pipe_buf_operations default_pipe_buf_ops = {  	.can_merge = 0,  	.confirm = generic_pipe_buf_confirm,  	.release = generic_pipe_buf_release, @@ -570,7 +360,7 @@ const struct pipe_buf_operations nosteal_pipe_buf_ops = {  };  EXPORT_SYMBOL(nosteal_pipe_buf_ops); -static ssize_t kernel_readv(struct file *file, const struct iovec *vec, +static ssize_t kernel_readv(struct file *file, const struct kvec *vec,  			    unsigned long vlen, loff_t offset)  {  	mm_segment_t old_fs; @@ -602,102 +392,70 @@ ssize_t kernel_write(struct file *file, const char *buf, size_t count,  }  EXPORT_SYMBOL(kernel_write); -ssize_t default_file_splice_read(struct file *in, loff_t *ppos, +static ssize_t default_file_splice_read(struct file *in, loff_t *ppos,  				 struct pipe_inode_info *pipe, size_t len,  				 unsigned int flags)  { +	struct kvec *vec, __vec[PIPE_DEF_BUFFERS]; +	struct iov_iter to; +	struct page **pages;  	unsigned int nr_pages; -	unsigned int nr_freed; -	size_t offset; -	struct page *pages[PIPE_DEF_BUFFERS]; -	struct partial_page partial[PIPE_DEF_BUFFERS]; -	struct iovec *vec, __vec[PIPE_DEF_BUFFERS]; +	size_t offset, dummy, copied = 0;  	ssize_t res; -	size_t this_len; -	int error;  	int i; -	struct splice_pipe_desc spd = { -		.pages = pages, -		.partial = partial, -		.nr_pages_max = PIPE_DEF_BUFFERS, -		.flags = flags, -		.ops = &default_pipe_buf_ops, -		.spd_release = spd_release_page, -	}; -	if (splice_grow_spd(pipe, &spd)) +	if (pipe->nrbufs == pipe->buffers) +		return -EAGAIN; + +	/* +	 * Try to keep page boundaries matching to source pagecache ones - +	 * it probably won't be much help, but... +	 */ +	offset = *ppos & ~PAGE_MASK; + +	iov_iter_pipe(&to, ITER_PIPE | READ, pipe, len + offset); + +	res = iov_iter_get_pages_alloc(&to, &pages, len + offset, &dummy); +	if (res <= 0)  		return -ENOMEM; -	res = -ENOMEM; +	nr_pages = res / PAGE_SIZE; +  	vec = __vec; -	if (spd.nr_pages_max > PIPE_DEF_BUFFERS) { -		vec = kmalloc(spd.nr_pages_max * sizeof(struct iovec), GFP_KERNEL); -		if (!vec) -			goto shrink_ret; +	if (nr_pages > PIPE_DEF_BUFFERS) { +		vec = kmalloc(nr_pages * sizeof(struct kvec), GFP_KERNEL); +		if (unlikely(!vec)) { +			res = -ENOMEM; +			goto out; +		}  	} -	offset = *ppos & ~PAGE_MASK; -	nr_pages = (len + offset + PAGE_SIZE - 1) >> PAGE_SHIFT; - -	for (i = 0; i < nr_pages && i < spd.nr_pages_max && len; i++) { -		struct page *page; +	pipe->bufs[to.idx].offset = offset; +	pipe->bufs[to.idx].len -= offset; -		page = alloc_page(GFP_USER); -		error = -ENOMEM; -		if (!page) -			goto err; - -		this_len = min_t(size_t, len, PAGE_SIZE - offset); -		vec[i].iov_base = (void __user *) page_address(page); +	for (i = 0; i < nr_pages; i++) { +		size_t this_len = min_t(size_t, len, PAGE_SIZE - offset); +		vec[i].iov_base = page_address(pages[i]) + offset;  		vec[i].iov_len = this_len; -		spd.pages[i] = page; -		spd.nr_pages++;  		len -= this_len;  		offset = 0;  	} -	res = kernel_readv(in, vec, spd.nr_pages, *ppos); -	if (res < 0) { -		error = res; -		goto err; -	} - -	error = 0; -	if (!res) -		goto err; - -	nr_freed = 0; -	for (i = 0; i < spd.nr_pages; i++) { -		this_len = min_t(size_t, vec[i].iov_len, res); -		spd.partial[i].offset = 0; -		spd.partial[i].len = this_len; -		if (!this_len) { -			__free_page(spd.pages[i]); -			spd.pages[i] = NULL; -			nr_freed++; -		} -		res -= this_len; -	} -	spd.nr_pages -= nr_freed; - -	res = splice_to_pipe(pipe, &spd); -	if (res > 0) +	res = kernel_readv(in, vec, nr_pages, *ppos); +	if (res > 0) { +		copied = res;  		*ppos += res; +	} -shrink_ret:  	if (vec != __vec)  		kfree(vec); -	splice_shrink_spd(&spd); +out: +	for (i = 0; i < nr_pages; i++) +		put_page(pages[i]); +	kvfree(pages); +	iov_iter_advance(&to, copied);	/* truncates and discards */  	return res; - -err: -	for (i = 0; i < spd.nr_pages; i++) -		__free_page(spd.pages[i]); - -	res = error; -	goto shrink_ret;  } -EXPORT_SYMBOL(default_file_splice_read);  /*   * Send 'sd->len' bytes to socket from 'sd->file' at position 'sd->pos' @@ -757,13 +515,12 @@ static int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_des  	while (pipe->nrbufs) {  		struct pipe_buffer *buf = pipe->bufs + pipe->curbuf; -		const struct pipe_buf_operations *ops = buf->ops;  		sd->len = buf->len;  		if (sd->len > sd->total_len)  			sd->len = sd->total_len; -		ret = buf->ops->confirm(pipe, buf); +		ret = pipe_buf_confirm(pipe, buf);  		if (unlikely(ret)) {  			if (ret == -ENODATA)  				ret = 0; @@ -783,8 +540,7 @@ static int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_des  		sd->total_len -= ret;  		if (!buf->len) { -			buf->ops = NULL; -			ops->release(pipe, buf); +			pipe_buf_release(pipe, buf);  			pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1);  			pipe->nrbufs--;  			if (pipe->files) @@ -1003,7 +759,7 @@ iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out,  			if (idx == pipe->buffers - 1)  				idx = -1; -			ret = buf->ops->confirm(pipe, buf); +			ret = pipe_buf_confirm(pipe, buf);  			if (unlikely(ret)) {  				if (ret == -ENODATA)  					ret = 0; @@ -1030,11 +786,9 @@ iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out,  		while (ret) {  			struct pipe_buffer *buf = pipe->bufs + pipe->curbuf;  			if (ret >= buf->len) { -				const struct pipe_buf_operations *ops = buf->ops;  				ret -= buf->len;  				buf->len = 0; -				buf->ops = NULL; -				ops->release(pipe, buf); +				pipe_buf_release(pipe, buf);  				pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1);  				pipe->nrbufs--;  				if (pipe->files) @@ -1273,10 +1027,8 @@ out_release:  	for (i = 0; i < pipe->buffers; i++) {  		struct pipe_buffer *buf = pipe->bufs + i; -		if (buf->ops) { -			buf->ops->release(pipe, buf); -			buf->ops = NULL; -		} +		if (buf->ops) +			pipe_buf_release(pipe, buf);  	}  	if (!bytes) @@ -1342,6 +1094,20 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,  }  EXPORT_SYMBOL(do_splice_direct); +static int wait_for_space(struct pipe_inode_info *pipe, unsigned flags) +{ +	while (pipe->nrbufs == pipe->buffers) { +		if (flags & SPLICE_F_NONBLOCK) +			return -EAGAIN; +		if (signal_pending(current)) +			return -ERESTARTSYS; +		pipe->waiting_writers++; +		pipe_wait(pipe); +		pipe->waiting_writers--; +	} +	return 0; +} +  static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe,  			       struct pipe_inode_info *opipe,  			       size_t len, unsigned int flags); @@ -1424,8 +1190,13 @@ static long do_splice(struct file *in, loff_t __user *off_in,  			offset = in->f_pos;  		} -		ret = do_splice_to(in, &offset, opipe, len, flags); - +		pipe_lock(opipe); +		ret = wait_for_space(opipe, flags); +		if (!ret) +			ret = do_splice_to(in, &offset, opipe, len, flags); +		pipe_unlock(opipe); +		if (ret > 0) +			wakeup_pipe_readers(opipe);  		if (!off_in)  			in->f_pos = offset;  		else if (copy_to_user(off_in, &offset, sizeof(loff_t))) @@ -1437,106 +1208,50 @@ static long do_splice(struct file *in, loff_t __user *off_in,  	return -EINVAL;  } -/* - * Map an iov into an array of pages and offset/length tupples. With the - * partial_page structure, we can map several non-contiguous ranges into - * our ones pages[] map instead of splitting that operation into pieces. - * Could easily be exported as a generic helper for other users, in which - * case one would probably want to add a 'max_nr_pages' parameter as well. - */ -static int get_iovec_page_array(const struct iovec __user *iov, -				unsigned int nr_vecs, struct page **pages, -				struct partial_page *partial, bool aligned, -				unsigned int pipe_buffers) +static int iter_to_pipe(struct iov_iter *from, +			struct pipe_inode_info *pipe, +			unsigned flags)  { -	int buffers = 0, error = 0; - -	while (nr_vecs) { -		unsigned long off, npages; -		struct iovec entry; -		void __user *base; -		size_t len; -		int i; - -		error = -EFAULT; -		if (copy_from_user(&entry, iov, sizeof(entry))) -			break; - -		base = entry.iov_base; -		len = entry.iov_len; - -		/* -		 * Sanity check this iovec. 0 read succeeds. -		 */ -		error = 0; -		if (unlikely(!len)) -			break; -		error = -EFAULT; -		if (!access_ok(VERIFY_READ, base, len)) -			break; - -		/* -		 * Get this base offset and number of pages, then map -		 * in the user pages. -		 */ -		off = (unsigned long) base & ~PAGE_MASK; - -		/* -		 * If asked for alignment, the offset must be zero and the -		 * length a multiple of the PAGE_SIZE. -		 */ -		error = -EINVAL; -		if (aligned && (off || len & ~PAGE_MASK)) -			break; - -		npages = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT; -		if (npages > pipe_buffers - buffers) -			npages = pipe_buffers - buffers; - -		error = get_user_pages_fast((unsigned long)base, npages, -					0, &pages[buffers]); - -		if (unlikely(error <= 0)) +	struct pipe_buffer buf = { +		.ops = &user_page_pipe_buf_ops, +		.flags = flags +	}; +	size_t total = 0; +	int ret = 0; +	bool failed = false; + +	while (iov_iter_count(from) && !failed) { +		struct page *pages[16]; +		ssize_t copied; +		size_t start; +		int n; + +		copied = iov_iter_get_pages(from, pages, ~0UL, 16, &start); +		if (copied <= 0) { +			ret = copied;  			break; - -		/* -		 * Fill this contiguous range into the partial page map. -		 */ -		for (i = 0; i < error; i++) { -			const int plen = min_t(size_t, len, PAGE_SIZE - off); - -			partial[buffers].offset = off; -			partial[buffers].len = plen; - -			off = 0; -			len -= plen; -			buffers++;  		} -		/* -		 * We didn't complete this iov, stop here since it probably -		 * means we have to move some of this into a pipe to -		 * be able to continue. -		 */ -		if (len) -			break; - -		/* -		 * Don't continue if we mapped fewer pages than we asked for, -		 * or if we mapped the max number of pages that we have -		 * room for. -		 */ -		if (error < npages || buffers == pipe_buffers) -			break; - -		nr_vecs--; -		iov++; +		for (n = 0; copied; n++, start = 0) { +			int size = min_t(int, copied, PAGE_SIZE - start); +			if (!failed) { +				buf.page = pages[n]; +				buf.offset = start; +				buf.len = size; +				ret = add_to_pipe(pipe, &buf); +				if (unlikely(ret < 0)) { +					failed = true; +				} else { +					iov_iter_advance(from, ret); +					total += ret; +				} +			} else { +				put_page(pages[n]); +			} +			copied -= size; +		}  	} - -	if (buffers) -		return buffers; - -	return error; +	return total ? total : ret;  }  static int pipe_to_user(struct pipe_inode_info *pipe, struct pipe_buffer *buf, @@ -1590,38 +1305,36 @@ static long vmsplice_to_user(struct file *file, const struct iovec __user *uiov,   * as splice-from-memory, where the regular splice is splice-from-file (or   * to file). In both cases the output is a pipe, naturally.   */ -static long vmsplice_to_pipe(struct file *file, const struct iovec __user *iov, +static long vmsplice_to_pipe(struct file *file, const struct iovec __user *uiov,  			     unsigned long nr_segs, unsigned int flags)  {  	struct pipe_inode_info *pipe; -	struct page *pages[PIPE_DEF_BUFFERS]; -	struct partial_page partial[PIPE_DEF_BUFFERS]; -	struct splice_pipe_desc spd = { -		.pages = pages, -		.partial = partial, -		.nr_pages_max = PIPE_DEF_BUFFERS, -		.flags = flags, -		.ops = &user_page_pipe_buf_ops, -		.spd_release = spd_release_page, -	}; +	struct iovec iovstack[UIO_FASTIOV]; +	struct iovec *iov = iovstack; +	struct iov_iter from;  	long ret; +	unsigned buf_flag = 0; + +	if (flags & SPLICE_F_GIFT) +		buf_flag = PIPE_BUF_FLAG_GIFT;  	pipe = get_pipe_info(file);  	if (!pipe)  		return -EBADF; -	if (splice_grow_spd(pipe, &spd)) -		return -ENOMEM; - -	spd.nr_pages = get_iovec_page_array(iov, nr_segs, spd.pages, -					    spd.partial, false, -					    spd.nr_pages_max); -	if (spd.nr_pages <= 0) -		ret = spd.nr_pages; -	else -		ret = splice_to_pipe(pipe, &spd); +	ret = import_iovec(WRITE, uiov, nr_segs, +			   ARRAY_SIZE(iovstack), &iov, &from); +	if (ret < 0) +		return ret; -	splice_shrink_spd(&spd); +	pipe_lock(pipe); +	ret = wait_for_space(pipe, flags); +	if (!ret) +		ret = iter_to_pipe(&from, pipe, buf_flag); +	pipe_unlock(pipe); +	if (ret > 0) +		wakeup_pipe_readers(pipe); +	kfree(iov);  	return ret;  } @@ -1876,7 +1589,7 @@ retry:  			 * Get a reference to this pipe buffer,  			 * so we can copy the contents over.  			 */ -			ibuf->ops->get(ipipe, ibuf); +			pipe_buf_get(ipipe, ibuf);  			*obuf = *ibuf;  			/* @@ -1948,7 +1661,7 @@ static int link_pipe(struct pipe_inode_info *ipipe,  		 * Get a reference to this pipe buffer,  		 * so we can copy the contents over.  		 */ -		ibuf->ops->get(ipipe, ibuf); +		pipe_buf_get(ipipe, ibuf);  		obuf = opipe->bufs + nbuf;  		*obuf = *ibuf; diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index c68517b0f248..f46b2929c64d 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -393,45 +393,6 @@ xfs_file_read_iter(  	return ret;  } -STATIC ssize_t -xfs_file_splice_read( -	struct file		*infilp, -	loff_t			*ppos, -	struct pipe_inode_info	*pipe, -	size_t			count, -	unsigned int		flags) -{ -	struct xfs_inode	*ip = XFS_I(infilp->f_mapping->host); -	ssize_t			ret; - -	XFS_STATS_INC(ip->i_mount, xs_read_calls); - -	if (XFS_FORCED_SHUTDOWN(ip->i_mount)) -		return -EIO; - -	trace_xfs_file_splice_read(ip, count, *ppos); - -	/* -	 * DAX inodes cannot ues the page cache for splice, so we have to push -	 * them through the VFS IO path. This means it goes through -	 * ->read_iter, which for us takes the XFS_IOLOCK_SHARED. Hence we -	 * cannot lock the splice operation at this level for DAX inodes. -	 */ -	if (IS_DAX(VFS_I(ip))) { -		ret = default_file_splice_read(infilp, ppos, pipe, count, -					       flags); -		goto out; -	} - -	xfs_rw_ilock(ip, XFS_IOLOCK_SHARED); -	ret = generic_file_splice_read(infilp, ppos, pipe, count, flags); -	xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); -out: -	if (ret > 0) -		XFS_STATS_ADD(ip->i_mount, xs_read_bytes, ret); -	return ret; -} -  /*   * Zero any on disk space between the current EOF and the new, larger EOF.   * @@ -1608,7 +1569,7 @@ const struct file_operations xfs_file_operations = {  	.llseek		= xfs_file_llseek,  	.read_iter	= xfs_file_read_iter,  	.write_iter	= xfs_file_write_iter, -	.splice_read	= xfs_file_splice_read, +	.splice_read	= generic_file_splice_read,  	.splice_write	= iter_file_splice_write,  	.unlocked_ioctl	= xfs_file_ioctl,  #ifdef CONFIG_COMPAT diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index c6b2b1dcde75..16093c7dacde 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -1170,7 +1170,6 @@ DEFINE_RW_EVENT(xfs_file_dax_read);  DEFINE_RW_EVENT(xfs_file_buffered_write);  DEFINE_RW_EVENT(xfs_file_direct_write);  DEFINE_RW_EVENT(xfs_file_dax_write); -DEFINE_RW_EVENT(xfs_file_splice_read);  DECLARE_EVENT_CLASS(xfs_page_class,  	TP_PROTO(struct inode *inode, struct page *page, unsigned long off, diff --git a/include/linux/fs.h b/include/linux/fs.h index 901e25d495cc..b04883e74579 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2794,8 +2794,6 @@ extern void block_sync_page(struct page *page);  /* fs/splice.c */  extern ssize_t generic_file_splice_read(struct file *, loff_t *,  		struct pipe_inode_info *, size_t, unsigned int); -extern ssize_t default_file_splice_read(struct file *, loff_t *, -		struct pipe_inode_info *, size_t, unsigned int);  extern ssize_t iter_file_splice_write(struct pipe_inode_info *,  		struct file *, loff_t *, size_t, unsigned int);  extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 24f5470d3944..e7497c9dde7f 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -66,15 +66,10 @@ struct pipe_inode_info {   *   * ->confirm()   *	->steal() - *	... - *	->map() - *	... - *	->unmap()   * - * That is, ->map() must be called on a confirmed buffer, - * same goes for ->steal(). See below for the meaning of each - * operation. Also see kerneldoc in fs/pipe.c for the pipe - * and generic variants of these hooks. + * That is, ->steal() must be called on a confirmed buffer. + * See below for the meaning of each operation. Also see kerneldoc + * in fs/pipe.c for the pipe and generic variants of these hooks.   */  struct pipe_buf_operations {  	/* @@ -115,6 +110,53 @@ struct pipe_buf_operations {  	void (*get)(struct pipe_inode_info *, struct pipe_buffer *);  }; +/** + * pipe_buf_get - get a reference to a pipe_buffer + * @pipe:	the pipe that the buffer belongs to + * @buf:	the buffer to get a reference to + */ +static inline void pipe_buf_get(struct pipe_inode_info *pipe, +				struct pipe_buffer *buf) +{ +	buf->ops->get(pipe, buf); +} + +/** + * pipe_buf_release - put a reference to a pipe_buffer + * @pipe:	the pipe that the buffer belongs to + * @buf:	the buffer to put a reference to + */ +static inline void pipe_buf_release(struct pipe_inode_info *pipe, +				    struct pipe_buffer *buf) +{ +	const struct pipe_buf_operations *ops = buf->ops; + +	buf->ops = NULL; +	ops->release(pipe, buf); +} + +/** + * pipe_buf_confirm - verify contents of the pipe buffer + * @pipe:	the pipe that the buffer belongs to + * @buf:	the buffer to confirm + */ +static inline int pipe_buf_confirm(struct pipe_inode_info *pipe, +				   struct pipe_buffer *buf) +{ +	return buf->ops->confirm(pipe, buf); +} + +/** + * pipe_buf_steal - attempt to take ownership of a pipe_buffer + * @pipe:	the pipe that the buffer belongs to + * @buf:	the buffer to attempt to steal + */ +static inline int pipe_buf_steal(struct pipe_inode_info *pipe, +				 struct pipe_buffer *buf) +{ +	return buf->ops->steal(pipe, buf); +} +  /* Differs from PIPE_BUF in that PIPE_SIZE is the length of the actual     memory allocation, whereas PIPE_BUF makes atomicity guarantees.  */  #define PIPE_SIZE		PAGE_SIZE @@ -129,7 +171,6 @@ extern unsigned long pipe_user_pages_hard;  extern unsigned long pipe_user_pages_soft;  int pipe_proc_fn(struct ctl_table *, int, void __user *, size_t *, loff_t *); -  /* Drop the inode semaphore and wait for a pipe event, atomically */  void pipe_wait(struct pipe_inode_info *pipe); diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 9bf60b556bd2..601258f6e621 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3064,15 +3064,9 @@ int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len);  int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len);  __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to,  			      int len, __wsum csum); -ssize_t skb_socket_splice(struct sock *sk, -			  struct pipe_inode_info *pipe, -			  struct splice_pipe_desc *spd);  int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset,  		    struct pipe_inode_info *pipe, unsigned int len, -		    unsigned int flags, -		    ssize_t (*splice_cb)(struct sock *, -					 struct pipe_inode_info *, -					 struct splice_pipe_desc *)); +		    unsigned int flags);  void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to);  unsigned int skb_zerocopy_headlen(const struct sk_buff *from);  int skb_zerocopy(struct sk_buff *to, struct sk_buff *from, diff --git a/include/linux/splice.h b/include/linux/splice.h index da2751d3b93d..00a21166e268 100644 --- a/include/linux/splice.h +++ b/include/linux/splice.h @@ -72,6 +72,8 @@ extern ssize_t __splice_from_pipe(struct pipe_inode_info *,  				  struct splice_desc *, splice_actor *);  extern ssize_t splice_to_pipe(struct pipe_inode_info *,  			      struct splice_pipe_desc *); +extern ssize_t add_to_pipe(struct pipe_inode_info *, +			      struct pipe_buffer *);  extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *,  				      splice_direct_actor *); @@ -83,4 +85,5 @@ extern void splice_shrink_spd(struct splice_pipe_desc *);  extern void spd_release_page(struct splice_pipe_desc *, unsigned int);  extern const struct pipe_buf_operations page_cache_pipe_buf_ops; +extern const struct pipe_buf_operations default_pipe_buf_ops;  #endif diff --git a/include/linux/uio.h b/include/linux/uio.h index 75b4aaf31a9d..b5ebe6dca404 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -13,6 +13,7 @@  #include <uapi/linux/uio.h>  struct page; +struct pipe_inode_info;  struct kvec {  	void *iov_base; /* and that should *never* hold a userland pointer */ @@ -23,6 +24,7 @@ enum {  	ITER_IOVEC = 0,  	ITER_KVEC = 2,  	ITER_BVEC = 4, +	ITER_PIPE = 8,  };  struct iov_iter { @@ -33,8 +35,12 @@ struct iov_iter {  		const struct iovec *iov;  		const struct kvec *kvec;  		const struct bio_vec *bvec; +		struct pipe_inode_info *pipe; +	}; +	union { +		unsigned long nr_segs; +		int idx;  	}; -	unsigned long nr_segs;  };  /* @@ -64,7 +70,7 @@ static inline struct iovec iov_iter_iovec(const struct iov_iter *iter)  }  #define iov_for_each(iov, iter, start)				\ -	if (!((start).type & ITER_BVEC))			\ +	if (!((start).type & (ITER_BVEC | ITER_PIPE)))		\  	for (iter = (start);					\  	     (iter).count &&					\  	     ((iov = iov_iter_iovec(&(iter))), 1);		\ @@ -94,6 +100,8 @@ void iov_iter_kvec(struct iov_iter *i, int direction, const struct kvec *kvec,  			unsigned long nr_segs, size_t count);  void iov_iter_bvec(struct iov_iter *i, int direction, const struct bio_vec *bvec,  			unsigned long nr_segs, size_t count); +void iov_iter_pipe(struct iov_iter *i, int direction, struct pipe_inode_info *pipe, +			size_t count);  ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages,  			size_t maxsize, unsigned maxpages, size_t *start);  ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, struct page ***pages, @@ -109,7 +117,7 @@ static inline size_t iov_iter_count(struct iov_iter *i)  static inline bool iter_is_iovec(struct iov_iter *i)  { -	return !(i->type & (ITER_BVEC | ITER_KVEC)); +	return !(i->type & (ITER_BVEC | ITER_KVEC | ITER_PIPE));  }  /* diff --git a/kernel/relay.c b/kernel/relay.c index fc9b4a4af463..9988f5cc2d46 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -1108,51 +1108,23 @@ static size_t relay_file_read_end_pos(struct rchan_buf *buf,  	return end_pos;  } -/* - *	subbuf_read_actor - read up to one subbuf's worth of data - */ -static int subbuf_read_actor(size_t read_start, -			     struct rchan_buf *buf, -			     size_t avail, -			     read_descriptor_t *desc) -{ -	void *from; -	int ret = 0; - -	from = buf->start + read_start; -	ret = avail; -	if (copy_to_user(desc->arg.buf, from, avail)) { -		desc->error = -EFAULT; -		ret = 0; -	} -	desc->arg.data += ret; -	desc->written += ret; -	desc->count -= ret; - -	return ret; -} - -typedef int (*subbuf_actor_t) (size_t read_start, -			       struct rchan_buf *buf, -			       size_t avail, -			       read_descriptor_t *desc); - -/* - *	relay_file_read_subbufs - read count bytes, bridging subbuf boundaries - */ -static ssize_t relay_file_read_subbufs(struct file *filp, loff_t *ppos, -					subbuf_actor_t subbuf_actor, -					read_descriptor_t *desc) +static ssize_t relay_file_read(struct file *filp, +			       char __user *buffer, +			       size_t count, +			       loff_t *ppos)  {  	struct rchan_buf *buf = filp->private_data;  	size_t read_start, avail; +	size_t written = 0;  	int ret; -	if (!desc->count) +	if (!count)  		return 0;  	inode_lock(file_inode(filp));  	do { +		void *from; +  		if (!relay_file_read_avail(buf, *ppos))  			break; @@ -1161,32 +1133,22 @@ static ssize_t relay_file_read_subbufs(struct file *filp, loff_t *ppos,  		if (!avail)  			break; -		avail = min(desc->count, avail); -		ret = subbuf_actor(read_start, buf, avail, desc); -		if (desc->error < 0) +		avail = min(count, avail); +		from = buf->start + read_start; +		ret = avail; +		if (copy_to_user(buffer, from, avail))  			break; -		if (ret) { -			relay_file_read_consume(buf, read_start, ret); -			*ppos = relay_file_read_end_pos(buf, read_start, ret); -		} -	} while (desc->count && ret); -	inode_unlock(file_inode(filp)); +		buffer += ret; +		written += ret; +		count -= ret; -	return desc->written; -} +		relay_file_read_consume(buf, read_start, ret); +		*ppos = relay_file_read_end_pos(buf, read_start, ret); +	} while (count); +	inode_unlock(file_inode(filp)); -static ssize_t relay_file_read(struct file *filp, -			       char __user *buffer, -			       size_t count, -			       loff_t *ppos) -{ -	read_descriptor_t desc; -	desc.written = 0; -	desc.count = count; -	desc.arg.buf = buffer; -	desc.error = 0; -	return relay_file_read_subbufs(filp, ppos, subbuf_read_actor, &desc); +	return written;  }  static void relay_consume_bytes(struct rchan_buf *rbuf, int bytes_consumed) diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 7e3138cfc8c9..48b8c27acabb 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -3,8 +3,11 @@  #include <linux/pagemap.h>  #include <linux/slab.h>  #include <linux/vmalloc.h> +#include <linux/splice.h>  #include <net/checksum.h> +#define PIPE_PARANOIA /* for now */ +  #define iterate_iovec(i, n, __v, __p, skip, STEP) {	\  	size_t left;					\  	size_t wanted = n;				\ @@ -290,6 +293,93 @@ done:  	return wanted - bytes;  } +#ifdef PIPE_PARANOIA +static bool sanity(const struct iov_iter *i) +{ +	struct pipe_inode_info *pipe = i->pipe; +	int idx = i->idx; +	int next = pipe->curbuf + pipe->nrbufs; +	if (i->iov_offset) { +		struct pipe_buffer *p; +		if (unlikely(!pipe->nrbufs)) +			goto Bad;	// pipe must be non-empty +		if (unlikely(idx != ((next - 1) & (pipe->buffers - 1)))) +			goto Bad;	// must be at the last buffer... + +		p = &pipe->bufs[idx]; +		if (unlikely(p->offset + p->len != i->iov_offset)) +			goto Bad;	// ... at the end of segment +	} else { +		if (idx != (next & (pipe->buffers - 1))) +			goto Bad;	// must be right after the last buffer +	} +	return true; +Bad: +	printk(KERN_ERR "idx = %d, offset = %zd\n", i->idx, i->iov_offset); +	printk(KERN_ERR "curbuf = %d, nrbufs = %d, buffers = %d\n", +			pipe->curbuf, pipe->nrbufs, pipe->buffers); +	for (idx = 0; idx < pipe->buffers; idx++) +		printk(KERN_ERR "[%p %p %d %d]\n", +			pipe->bufs[idx].ops, +			pipe->bufs[idx].page, +			pipe->bufs[idx].offset, +			pipe->bufs[idx].len); +	WARN_ON(1); +	return false; +} +#else +#define sanity(i) true +#endif + +static inline int next_idx(int idx, struct pipe_inode_info *pipe) +{ +	return (idx + 1) & (pipe->buffers - 1); +} + +static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t bytes, +			 struct iov_iter *i) +{ +	struct pipe_inode_info *pipe = i->pipe; +	struct pipe_buffer *buf; +	size_t off; +	int idx; + +	if (unlikely(bytes > i->count)) +		bytes = i->count; + +	if (unlikely(!bytes)) +		return 0; + +	if (!sanity(i)) +		return 0; + +	off = i->iov_offset; +	idx = i->idx; +	buf = &pipe->bufs[idx]; +	if (off) { +		if (offset == off && buf->page == page) { +			/* merge with the last one */ +			buf->len += bytes; +			i->iov_offset += bytes; +			goto out; +		} +		idx = next_idx(idx, pipe); +		buf = &pipe->bufs[idx]; +	} +	if (idx == pipe->curbuf && pipe->nrbufs) +		return 0; +	pipe->nrbufs++; +	buf->ops = &page_cache_pipe_buf_ops; +	get_page(buf->page = page); +	buf->offset = offset; +	buf->len = bytes; +	i->iov_offset = offset + bytes; +	i->idx = idx; +out: +	i->count -= bytes; +	return bytes; +} +  /*   * Fault in one or more iovecs of the given iov_iter, to a maximum length of   * bytes.  For each iovec, fault in each page that constitutes the iovec. @@ -356,9 +446,98 @@ static void memzero_page(struct page *page, size_t offset, size_t len)  	kunmap_atomic(addr);  } +static inline bool allocated(struct pipe_buffer *buf) +{ +	return buf->ops == &default_pipe_buf_ops; +} + +static inline void data_start(const struct iov_iter *i, int *idxp, size_t *offp) +{ +	size_t off = i->iov_offset; +	int idx = i->idx; +	if (off && (!allocated(&i->pipe->bufs[idx]) || off == PAGE_SIZE)) { +		idx = next_idx(idx, i->pipe); +		off = 0; +	} +	*idxp = idx; +	*offp = off; +} + +static size_t push_pipe(struct iov_iter *i, size_t size, +			int *idxp, size_t *offp) +{ +	struct pipe_inode_info *pipe = i->pipe; +	size_t off; +	int idx; +	ssize_t left; + +	if (unlikely(size > i->count)) +		size = i->count; +	if (unlikely(!size)) +		return 0; + +	left = size; +	data_start(i, &idx, &off); +	*idxp = idx; +	*offp = off; +	if (off) { +		left -= PAGE_SIZE - off; +		if (left <= 0) { +			pipe->bufs[idx].len += size; +			return size; +		} +		pipe->bufs[idx].len = PAGE_SIZE; +		idx = next_idx(idx, pipe); +	} +	while (idx != pipe->curbuf || !pipe->nrbufs) { +		struct page *page = alloc_page(GFP_USER); +		if (!page) +			break; +		pipe->nrbufs++; +		pipe->bufs[idx].ops = &default_pipe_buf_ops; +		pipe->bufs[idx].page = page; +		pipe->bufs[idx].offset = 0; +		if (left <= PAGE_SIZE) { +			pipe->bufs[idx].len = left; +			return size; +		} +		pipe->bufs[idx].len = PAGE_SIZE; +		left -= PAGE_SIZE; +		idx = next_idx(idx, pipe); +	} +	return size - left; +} + +static size_t copy_pipe_to_iter(const void *addr, size_t bytes, +				struct iov_iter *i) +{ +	struct pipe_inode_info *pipe = i->pipe; +	size_t n, off; +	int idx; + +	if (!sanity(i)) +		return 0; + +	bytes = n = push_pipe(i, bytes, &idx, &off); +	if (unlikely(!n)) +		return 0; +	for ( ; n; idx = next_idx(idx, pipe), off = 0) { +		size_t chunk = min_t(size_t, n, PAGE_SIZE - off); +		memcpy_to_page(pipe->bufs[idx].page, off, addr, chunk); +		i->idx = idx; +		i->iov_offset = off + chunk; +		n -= chunk; +		addr += chunk; +	} +	i->count -= bytes; +	return bytes; +} +  size_t copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)  {  	const char *from = addr; +	if (unlikely(i->type & ITER_PIPE)) +		return copy_pipe_to_iter(addr, bytes, i);  	iterate_and_advance(i, bytes, v,  		__copy_to_user(v.iov_base, (from += v.iov_len) - v.iov_len,  			       v.iov_len), @@ -374,6 +553,10 @@ EXPORT_SYMBOL(copy_to_iter);  size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)  {  	char *to = addr; +	if (unlikely(i->type & ITER_PIPE)) { +		WARN_ON(1); +		return 0; +	}  	iterate_and_advance(i, bytes, v,  		__copy_from_user((to += v.iov_len) - v.iov_len, v.iov_base,  				 v.iov_len), @@ -389,6 +572,10 @@ EXPORT_SYMBOL(copy_from_iter);  size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)  {  	char *to = addr; +	if (unlikely(i->type & ITER_PIPE)) { +		WARN_ON(1); +		return 0; +	}  	iterate_and_advance(i, bytes, v,  		__copy_from_user_nocache((to += v.iov_len) - v.iov_len,  					 v.iov_base, v.iov_len), @@ -409,14 +596,20 @@ size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,  		size_t wanted = copy_to_iter(kaddr + offset, bytes, i);  		kunmap_atomic(kaddr);  		return wanted; -	} else +	} else if (likely(!(i->type & ITER_PIPE)))  		return copy_page_to_iter_iovec(page, offset, bytes, i); +	else +		return copy_page_to_iter_pipe(page, offset, bytes, i);  }  EXPORT_SYMBOL(copy_page_to_iter);  size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,  			 struct iov_iter *i)  { +	if (unlikely(i->type & ITER_PIPE)) { +		WARN_ON(1); +		return 0; +	}  	if (i->type & (ITER_BVEC|ITER_KVEC)) {  		void *kaddr = kmap_atomic(page);  		size_t wanted = copy_from_iter(kaddr + offset, bytes, i); @@ -427,8 +620,34 @@ size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,  }  EXPORT_SYMBOL(copy_page_from_iter); +static size_t pipe_zero(size_t bytes, struct iov_iter *i) +{ +	struct pipe_inode_info *pipe = i->pipe; +	size_t n, off; +	int idx; + +	if (!sanity(i)) +		return 0; + +	bytes = n = push_pipe(i, bytes, &idx, &off); +	if (unlikely(!n)) +		return 0; + +	for ( ; n; idx = next_idx(idx, pipe), off = 0) { +		size_t chunk = min_t(size_t, n, PAGE_SIZE - off); +		memzero_page(pipe->bufs[idx].page, off, chunk); +		i->idx = idx; +		i->iov_offset = off + chunk; +		n -= chunk; +	} +	i->count -= bytes; +	return bytes; +} +  size_t iov_iter_zero(size_t bytes, struct iov_iter *i)  { +	if (unlikely(i->type & ITER_PIPE)) +		return pipe_zero(bytes, i);  	iterate_and_advance(i, bytes, v,  		__clear_user(v.iov_base, v.iov_len),  		memzero_page(v.bv_page, v.bv_offset, v.bv_len), @@ -443,6 +662,11 @@ size_t iov_iter_copy_from_user_atomic(struct page *page,  		struct iov_iter *i, unsigned long offset, size_t bytes)  {  	char *kaddr = kmap_atomic(page), *p = kaddr + offset; +	if (unlikely(i->type & ITER_PIPE)) { +		kunmap_atomic(kaddr); +		WARN_ON(1); +		return 0; +	}  	iterate_all_kinds(i, bytes, v,  		__copy_from_user_inatomic((p += v.iov_len) - v.iov_len,  					  v.iov_base, v.iov_len), @@ -455,8 +679,49 @@ size_t iov_iter_copy_from_user_atomic(struct page *page,  }  EXPORT_SYMBOL(iov_iter_copy_from_user_atomic); +static void pipe_advance(struct iov_iter *i, size_t size) +{ +	struct pipe_inode_info *pipe = i->pipe; +	struct pipe_buffer *buf; +	int idx = i->idx; +	size_t off = i->iov_offset; +	 +	if (unlikely(i->count < size)) +		size = i->count; + +	if (size) { +		if (off) /* make it relative to the beginning of buffer */ +			size += off - pipe->bufs[idx].offset; +		while (1) { +			buf = &pipe->bufs[idx]; +			if (size <= buf->len) +				break; +			size -= buf->len; +			idx = next_idx(idx, pipe); +		} +		buf->len = size; +		i->idx = idx; +		off = i->iov_offset = buf->offset + size; +	} +	if (off) +		idx = next_idx(idx, pipe); +	if (pipe->nrbufs) { +		int unused = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1); +		/* [curbuf,unused) is in use.  Free [idx,unused) */ +		while (idx != unused) { +			pipe_buf_release(pipe, &pipe->bufs[idx]); +			idx = next_idx(idx, pipe); +			pipe->nrbufs--; +		} +	} +} +  void iov_iter_advance(struct iov_iter *i, size_t size)  { +	if (unlikely(i->type & ITER_PIPE)) { +		pipe_advance(i, size); +		return; +	}  	iterate_and_advance(i, size, v, 0, 0, 0)  }  EXPORT_SYMBOL(iov_iter_advance); @@ -466,6 +731,8 @@ EXPORT_SYMBOL(iov_iter_advance);   */  size_t iov_iter_single_seg_count(const struct iov_iter *i)  { +	if (unlikely(i->type & ITER_PIPE)) +		return i->count;	// it is a silly place, anyway  	if (i->nr_segs == 1)  		return i->count;  	else if (i->type & ITER_BVEC) @@ -501,6 +768,19 @@ void iov_iter_bvec(struct iov_iter *i, int direction,  }  EXPORT_SYMBOL(iov_iter_bvec); +void iov_iter_pipe(struct iov_iter *i, int direction, +			struct pipe_inode_info *pipe, +			size_t count) +{ +	BUG_ON(direction != ITER_PIPE); +	i->type = direction; +	i->pipe = pipe; +	i->idx = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1); +	i->iov_offset = 0; +	i->count = count; +} +EXPORT_SYMBOL(iov_iter_pipe); +  unsigned long iov_iter_alignment(const struct iov_iter *i)  {  	unsigned long res = 0; @@ -509,6 +789,11 @@ unsigned long iov_iter_alignment(const struct iov_iter *i)  	if (!size)  		return 0; +	if (unlikely(i->type & ITER_PIPE)) { +		if (i->iov_offset && allocated(&i->pipe->bufs[i->idx])) +			return size | i->iov_offset; +		return size; +	}  	iterate_all_kinds(i, size, v,  		(res |= (unsigned long)v.iov_base | v.iov_len, 0),  		res |= v.bv_offset | v.bv_len, @@ -525,6 +810,11 @@ unsigned long iov_iter_gap_alignment(const struct iov_iter *i)  	if (!size)  		return 0; +	if (unlikely(i->type & ITER_PIPE)) { +		WARN_ON(1); +		return ~0U; +	} +  	iterate_all_kinds(i, size, v,  		(res |= (!res ? 0 : (unsigned long)v.iov_base) |  			(size != v.iov_len ? size : 0), 0), @@ -537,6 +827,47 @@ unsigned long iov_iter_gap_alignment(const struct iov_iter *i)  }  EXPORT_SYMBOL(iov_iter_gap_alignment); +static inline size_t __pipe_get_pages(struct iov_iter *i, +				size_t maxsize, +				struct page **pages, +				int idx, +				size_t *start) +{ +	struct pipe_inode_info *pipe = i->pipe; +	size_t n = push_pipe(i, maxsize, &idx, start); +	if (!n) +		return -EFAULT; + +	maxsize = n; +	n += *start; +	while (n >= PAGE_SIZE) { +		get_page(*pages++ = pipe->bufs[idx].page); +		idx = next_idx(idx, pipe); +		n -= PAGE_SIZE; +	} + +	return maxsize; +} + +static ssize_t pipe_get_pages(struct iov_iter *i, +		   struct page **pages, size_t maxsize, unsigned maxpages, +		   size_t *start) +{ +	unsigned npages; +	size_t capacity; +	int idx; + +	if (!sanity(i)) +		return -EFAULT; + +	data_start(i, &idx, start); +	/* some of this one + all after this one */ +	npages = ((i->pipe->curbuf - idx - 1) & (i->pipe->buffers - 1)) + 1; +	capacity = min(npages,maxpages) * PAGE_SIZE - *start; + +	return __pipe_get_pages(i, min(maxsize, capacity), pages, idx, start); +} +  ssize_t iov_iter_get_pages(struct iov_iter *i,  		   struct page **pages, size_t maxsize, unsigned maxpages,  		   size_t *start) @@ -547,6 +878,8 @@ ssize_t iov_iter_get_pages(struct iov_iter *i,  	if (!maxsize)  		return 0; +	if (unlikely(i->type & ITER_PIPE)) +		return pipe_get_pages(i, pages, maxsize, maxpages, start);  	iterate_all_kinds(i, maxsize, v, ({  		unsigned long addr = (unsigned long)v.iov_base;  		size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1)); @@ -582,6 +915,37 @@ static struct page **get_pages_array(size_t n)  	return p;  } +static ssize_t pipe_get_pages_alloc(struct iov_iter *i, +		   struct page ***pages, size_t maxsize, +		   size_t *start) +{ +	struct page **p; +	size_t n; +	int idx; +	int npages; + +	if (!sanity(i)) +		return -EFAULT; + +	data_start(i, &idx, start); +	/* some of this one + all after this one */ +	npages = ((i->pipe->curbuf - idx - 1) & (i->pipe->buffers - 1)) + 1; +	n = npages * PAGE_SIZE - *start; +	if (maxsize > n) +		maxsize = n; +	else +		npages = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE); +	p = get_pages_array(npages); +	if (!p) +		return -ENOMEM; +	n = __pipe_get_pages(i, maxsize, p, idx, start); +	if (n > 0) +		*pages = p; +	else +		kvfree(p); +	return n; +} +  ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,  		   struct page ***pages, size_t maxsize,  		   size_t *start) @@ -594,6 +958,8 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,  	if (!maxsize)  		return 0; +	if (unlikely(i->type & ITER_PIPE)) +		return pipe_get_pages_alloc(i, pages, maxsize, start);  	iterate_all_kinds(i, maxsize, v, ({  		unsigned long addr = (unsigned long)v.iov_base;  		size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1)); @@ -635,6 +1001,10 @@ size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum,  	__wsum sum, next;  	size_t off = 0;  	sum = *csum; +	if (unlikely(i->type & ITER_PIPE)) { +		WARN_ON(1); +		return 0; +	}  	iterate_and_advance(i, bytes, v, ({  		int err = 0;  		next = csum_and_copy_from_user(v.iov_base,  @@ -673,6 +1043,10 @@ size_t csum_and_copy_to_iter(const void *addr, size_t bytes, __wsum *csum,  	__wsum sum, next;  	size_t off = 0;  	sum = *csum; +	if (unlikely(i->type & ITER_PIPE)) { +		WARN_ON(1);	/* for now */ +		return 0; +	}  	iterate_and_advance(i, bytes, v, ({  		int err = 0;  		next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len, @@ -712,7 +1086,20 @@ int iov_iter_npages(const struct iov_iter *i, int maxpages)  	if (!size)  		return 0; -	iterate_all_kinds(i, size, v, ({ +	if (unlikely(i->type & ITER_PIPE)) { +		struct pipe_inode_info *pipe = i->pipe; +		size_t off; +		int idx; + +		if (!sanity(i)) +			return 0; + +		data_start(i, &idx, &off); +		/* some of this one + all after this one */ +		npages = ((pipe->curbuf - idx - 1) & (pipe->buffers - 1)) + 1; +		if (npages >= maxpages) +			return maxpages; +	} else iterate_all_kinds(i, size, v, ({  		unsigned long p = (unsigned long)v.iov_base;  		npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE)  			- p / PAGE_SIZE; @@ -737,6 +1124,10 @@ EXPORT_SYMBOL(iov_iter_npages);  const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)  {  	*new = *old; +	if (unlikely(new->type & ITER_PIPE)) { +		WARN_ON(1); +		return NULL; +	}  	if (new->type & ITER_BVEC)  		return new->bvec = kmemdup(new->bvec,  				    new->nr_segs * sizeof(struct bio_vec), diff --git a/mm/shmem.c b/mm/shmem.c index 971fc83e6402..d86b5e455fef 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2311,119 +2311,6 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)  	return retval ? retval : error;  } -static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos, -				struct pipe_inode_info *pipe, size_t len, -				unsigned int flags) -{ -	struct address_space *mapping = in->f_mapping; -	struct inode *inode = mapping->host; -	unsigned int loff, nr_pages, req_pages; -	struct page *pages[PIPE_DEF_BUFFERS]; -	struct partial_page partial[PIPE_DEF_BUFFERS]; -	struct page *page; -	pgoff_t index, end_index; -	loff_t isize, left; -	int error, page_nr; -	struct splice_pipe_desc spd = { -		.pages = pages, -		.partial = partial, -		.nr_pages_max = PIPE_DEF_BUFFERS, -		.flags = flags, -		.ops = &page_cache_pipe_buf_ops, -		.spd_release = spd_release_page, -	}; - -	isize = i_size_read(inode); -	if (unlikely(*ppos >= isize)) -		return 0; - -	left = isize - *ppos; -	if (unlikely(left < len)) -		len = left; - -	if (splice_grow_spd(pipe, &spd)) -		return -ENOMEM; - -	index = *ppos >> PAGE_SHIFT; -	loff = *ppos & ~PAGE_MASK; -	req_pages = (len + loff + PAGE_SIZE - 1) >> PAGE_SHIFT; -	nr_pages = min(req_pages, spd.nr_pages_max); - -	spd.nr_pages = find_get_pages_contig(mapping, index, -						nr_pages, spd.pages); -	index += spd.nr_pages; -	error = 0; - -	while (spd.nr_pages < nr_pages) { -		error = shmem_getpage(inode, index, &page, SGP_CACHE); -		if (error) -			break; -		unlock_page(page); -		spd.pages[spd.nr_pages++] = page; -		index++; -	} - -	index = *ppos >> PAGE_SHIFT; -	nr_pages = spd.nr_pages; -	spd.nr_pages = 0; - -	for (page_nr = 0; page_nr < nr_pages; page_nr++) { -		unsigned int this_len; - -		if (!len) -			break; - -		this_len = min_t(unsigned long, len, PAGE_SIZE - loff); -		page = spd.pages[page_nr]; - -		if (!PageUptodate(page) || page->mapping != mapping) { -			error = shmem_getpage(inode, index, &page, SGP_CACHE); -			if (error) -				break; -			unlock_page(page); -			put_page(spd.pages[page_nr]); -			spd.pages[page_nr] = page; -		} - -		isize = i_size_read(inode); -		end_index = (isize - 1) >> PAGE_SHIFT; -		if (unlikely(!isize || index > end_index)) -			break; - -		if (end_index == index) { -			unsigned int plen; - -			plen = ((isize - 1) & ~PAGE_MASK) + 1; -			if (plen <= loff) -				break; - -			this_len = min(this_len, plen - loff); -			len = this_len; -		} - -		spd.partial[page_nr].offset = loff; -		spd.partial[page_nr].len = this_len; -		len -= this_len; -		loff = 0; -		spd.nr_pages++; -		index++; -	} - -	while (page_nr < nr_pages) -		put_page(spd.pages[page_nr++]); - -	if (spd.nr_pages) -		error = splice_to_pipe(pipe, &spd); - -	splice_shrink_spd(&spd); - -	if (error > 0) { -		*ppos += error; -		file_accessed(in); -	} -	return error; -} -  /*   * llseek SEEK_DATA or SEEK_HOLE through the radix_tree.   */ @@ -3786,7 +3673,7 @@ static const struct file_operations shmem_file_operations = {  	.read_iter	= shmem_file_read_iter,  	.write_iter	= generic_file_write_iter,  	.fsync		= noop_fsync, -	.splice_read	= shmem_file_splice_read, +	.splice_read	= generic_file_splice_read,  	.splice_write	= iter_file_splice_write,  	.fallocate	= shmem_fallocate,  #endif diff --git a/net/core/skbuff.c b/net/core/skbuff.c index cbd19d250947..1e3e0087245b 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1962,37 +1962,13 @@ static bool __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe,  	return false;  } -ssize_t skb_socket_splice(struct sock *sk, -			  struct pipe_inode_info *pipe, -			  struct splice_pipe_desc *spd) -{ -	int ret; - -	/* Drop the socket lock, otherwise we have reverse -	 * locking dependencies between sk_lock and i_mutex -	 * here as compared to sendfile(). We enter here -	 * with the socket lock held, and splice_to_pipe() will -	 * grab the pipe inode lock. For sendfile() emulation, -	 * we call into ->sendpage() with the i_mutex lock held -	 * and networking will grab the socket lock. -	 */ -	release_sock(sk); -	ret = splice_to_pipe(pipe, spd); -	lock_sock(sk); - -	return ret; -} -  /*   * Map data from the skb to a pipe. Should handle both the linear part,   * the fragments, and the frag list.   */  int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset,  		    struct pipe_inode_info *pipe, unsigned int tlen, -		    unsigned int flags, -		    ssize_t (*splice_cb)(struct sock *, -					 struct pipe_inode_info *, -					 struct splice_pipe_desc *)) +		    unsigned int flags)  {  	struct partial_page partial[MAX_SKB_FRAGS];  	struct page *pages[MAX_SKB_FRAGS]; @@ -2009,7 +1985,7 @@ int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset,  	__skb_splice_bits(skb, pipe, &offset, &tlen, &spd, sk);  	if (spd.nr_pages) -		ret = splice_cb(sk, pipe, &spd); +		ret = splice_to_pipe(pipe, &spd);  	return ret;  } diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f253e5019d22..2414b7c80b87 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -691,8 +691,7 @@ static int tcp_splice_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb,  	int ret;  	ret = skb_splice_bits(skb, skb->sk, offset, tss->pipe, -			      min(rd_desc->count, len), tss->flags, -			      skb_socket_splice); +			      min(rd_desc->count, len), tss->flags);  	if (ret > 0)  		rd_desc->count -= ret;  	return ret; diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index b7f869a85ab7..7e08a4d3d77d 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -1160,19 +1160,6 @@ out:  	return copied ? : err;  } -static ssize_t kcm_sock_splice(struct sock *sk, -			       struct pipe_inode_info *pipe, -			       struct splice_pipe_desc *spd) -{ -	int ret; - -	release_sock(sk); -	ret = splice_to_pipe(pipe, spd); -	lock_sock(sk); - -	return ret; -} -  static ssize_t kcm_splice_read(struct socket *sock, loff_t *ppos,  			       struct pipe_inode_info *pipe, size_t len,  			       unsigned int flags) @@ -1202,8 +1189,7 @@ static ssize_t kcm_splice_read(struct socket *sock, loff_t *ppos,  	if (len > rxm->full_len)  		len = rxm->full_len; -	copied = skb_splice_bits(skb, sk, rxm->offset, pipe, len, flags, -				 kcm_sock_splice); +	copied = skb_splice_bits(skb, sk, rxm->offset, pipe, len, flags);  	if (copied < 0) {  		err = copied;  		goto err_out; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 8309687a56b0..145082e2ba36 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2475,28 +2475,13 @@ static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,  	return unix_stream_read_generic(&state);  } -static ssize_t skb_unix_socket_splice(struct sock *sk, -				      struct pipe_inode_info *pipe, -				      struct splice_pipe_desc *spd) -{ -	int ret; -	struct unix_sock *u = unix_sk(sk); - -	mutex_unlock(&u->iolock); -	ret = splice_to_pipe(pipe, spd); -	mutex_lock(&u->iolock); - -	return ret; -} -  static int unix_stream_splice_actor(struct sk_buff *skb,  				    int skip, int chunk,  				    struct unix_stream_read_state *state)  {  	return skb_splice_bits(skb, state->socket->sk,  			       UNIXCB(skb).consumed + skip, -			       state->pipe, chunk, state->splice_flags, -			       skb_unix_socket_splice); +			       state->pipe, chunk, state->splice_flags);  }  static ssize_t unix_stream_splice_read(struct socket *sock,  loff_t *ppos, | 
