From 87374179c535a98337569904727aa02f960fe79e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 20 Oct 2016 15:12:15 +0200 Subject: block: add a proper block layer data direction encoding Currently the block layer op_is_write, bio_data_dir and rq_data_dir helper treat every operation that is not a READ as a data out operation. This worked surprisingly long, but the new REQ_OP_ZONE_REPORT operation actually adds a second operation that reads data from the device. Surprisingly nothing critical relied on this direction, but this might be a good opportunity to properly fix this issue up. We take a little inspiration and use the least significant bit of the operation number to encode the data direction, which just requires us to renumber the operations to fix this scheme. Signed-off-by: Christoph Hellwig Reviewed-by: Shaun Tancheff Signed-off-by: Jens Axboe --- include/linux/fs.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux/fs.h') diff --git a/include/linux/fs.h b/include/linux/fs.h index 16d2b6e874d6..e3e878f12b25 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2499,11 +2499,6 @@ extern void make_bad_inode(struct inode *); extern bool is_bad_inode(struct inode *); #ifdef CONFIG_BLOCK -static inline bool op_is_write(unsigned int op) -{ - return op == REQ_OP_READ ? false : true; -} - /* * return data direction, READ or WRITE */ -- cgit v1.2.3 From 6f6b29171a192e84b666c816e49d2175afbbb09f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 1 Nov 2016 07:40:07 -0600 Subject: block: don't use REQ_SYNC in the READ_SYNC definition Reads are synchronous per definition, don't add another flag for it. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/fs.h') diff --git a/include/linux/fs.h b/include/linux/fs.h index e3e878f12b25..5e0078fceed7 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -196,7 +196,7 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, #define READ REQ_OP_READ #define WRITE REQ_OP_WRITE -#define READ_SYNC REQ_SYNC +#define READ_SYNC 0 #define WRITE_SYNC (REQ_SYNC | REQ_NOIDLE) #define WRITE_ODIRECT REQ_SYNC #define WRITE_FLUSH (REQ_SYNC | REQ_NOIDLE | REQ_PREFLUSH) -- cgit v1.2.3 From b685d3d65ac791406e0dfd8779cc9b3707fea5a3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 1 Nov 2016 07:40:08 -0600 Subject: block: treat REQ_FUA and REQ_PREFLUSH as synchronous MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of requiring everyone to specify the REQ_SYNC flag aѕ well. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 8 +++++++- include/linux/fs.h | 6 +++--- 2 files changed, 10 insertions(+), 4 deletions(-) (limited to 'include/linux/fs.h') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 3fa62cabe8d2..107d23d18096 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -216,9 +216,15 @@ static inline bool op_is_write(unsigned int op) return (op & 1); } +/* + * Reads are always treated as synchronous, as are requests with the FUA or + * PREFLUSH flag. Other operations may be marked as synchronous using the + * REQ_SYNC flag. + */ static inline bool op_is_sync(unsigned int op) { - return (op & REQ_OP_MASK) == REQ_OP_READ || (op & REQ_SYNC); + return (op & REQ_OP_MASK) == REQ_OP_READ || + (op & (REQ_SYNC | REQ_FUA | REQ_PREFLUSH)); } typedef unsigned int blk_qc_t; diff --git a/include/linux/fs.h b/include/linux/fs.h index 5e0078fceed7..ccedccb28ec8 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -199,9 +199,9 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, #define READ_SYNC 0 #define WRITE_SYNC (REQ_SYNC | REQ_NOIDLE) #define WRITE_ODIRECT REQ_SYNC -#define WRITE_FLUSH (REQ_SYNC | REQ_NOIDLE | REQ_PREFLUSH) -#define WRITE_FUA (REQ_SYNC | REQ_NOIDLE | REQ_FUA) -#define WRITE_FLUSH_FUA (REQ_SYNC | REQ_NOIDLE | REQ_PREFLUSH | REQ_FUA) +#define WRITE_FLUSH (REQ_NOIDLE | REQ_PREFLUSH) +#define WRITE_FUA (REQ_NOIDLE | REQ_FUA) +#define WRITE_FLUSH_FUA (REQ_NOIDLE | REQ_PREFLUSH | REQ_FUA) /* * Attribute flags. These should be or-ed together to figure out what -- cgit v1.2.3 From a2b809672ee6fcb4d5756ea815725b3dbaea654e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 1 Nov 2016 07:40:09 -0600 Subject: block: replace REQ_NOIDLE with REQ_IDLE Noidle should be the default for writes as seen by all the compounds definitions in fs.h using it. In fact only direct I/O really should be using NODILE, so turn the whole flag around to get the defaults right, which will make our life much easier especially onces the WRITE_* defines go away. This assumes all the existing "raw" users of REQ_SYNC for writes want noidle behavior, which seems to be spot on from a quick audit. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- Documentation/block/cfq-iosched.txt | 32 ++++++++++++++++---------------- block/cfq-iosched.c | 11 ++++++++--- drivers/block/drbd/drbd_actlog.c | 2 +- include/linux/blk_types.h | 4 ++-- include/linux/fs.h | 10 +++++----- include/trace/events/f2fs.h | 2 +- 6 files changed, 33 insertions(+), 28 deletions(-) (limited to 'include/linux/fs.h') diff --git a/Documentation/block/cfq-iosched.txt b/Documentation/block/cfq-iosched.txt index 1e4f835a659d..895bd3813115 100644 --- a/Documentation/block/cfq-iosched.txt +++ b/Documentation/block/cfq-iosched.txt @@ -240,11 +240,11 @@ All cfq queues doing synchronous sequential IO go on to sync-idle tree. On this tree we idle on each queue individually. All synchronous non-sequential queues go on sync-noidle tree. Also any -request which are marked with REQ_NOIDLE go on this service tree. On this -tree we do not idle on individual queues instead idle on the whole group -of queues or the tree. So if there are 4 queues waiting for IO to dispatch -we will idle only once last queue has dispatched the IO and there is -no more IO on this service tree. +synchronous write request which is not marked with REQ_IDLE goes on this +service tree. On this tree we do not idle on individual queues instead idle +on the whole group of queues or the tree. So if there are 4 queues waiting +for IO to dispatch we will idle only once last queue has dispatched the IO +and there is no more IO on this service tree. All async writes go on async service tree. There is no idling on async queues. @@ -257,17 +257,17 @@ tree idling provides isolation with buffered write queues on async tree. FAQ === -Q1. Why to idle at all on queues marked with REQ_NOIDLE. +Q1. Why to idle at all on queues not marked with REQ_IDLE. -A1. We only do tree idle (all queues on sync-noidle tree) on queues marked - with REQ_NOIDLE. This helps in providing isolation with all the sync-idle +A1. We only do tree idle (all queues on sync-noidle tree) on queues not marked + with REQ_IDLE. This helps in providing isolation with all the sync-idle queues. Otherwise in presence of many sequential readers, other synchronous IO might not get fair share of disk. For example, if there are 10 sequential readers doing IO and they get - 100ms each. If a REQ_NOIDLE request comes in, it will be scheduled - roughly after 1 second. If after completion of REQ_NOIDLE request we - do not idle, and after a couple of milli seconds a another REQ_NOIDLE + 100ms each. If a !REQ_IDLE request comes in, it will be scheduled + roughly after 1 second. If after completion of !REQ_IDLE request we + do not idle, and after a couple of milli seconds a another !REQ_IDLE request comes in, again it will be scheduled after 1second. Repeat it and notice how a workload can lose its disk share and suffer due to multiple sequential readers. @@ -276,16 +276,16 @@ A1. We only do tree idle (all queues on sync-noidle tree) on queues marked context of fsync, and later some journaling data is written. Journaling data comes in only after fsync has finished its IO (atleast for ext4 that seemed to be the case). Now if one decides not to idle on fsync - thread due to REQ_NOIDLE, then next journaling write will not get + thread due to !REQ_IDLE, then next journaling write will not get scheduled for another second. A process doing small fsync, will suffer badly in presence of multiple sequential readers. - Hence doing tree idling on threads using REQ_NOIDLE flag on requests + Hence doing tree idling on threads using !REQ_IDLE flag on requests provides isolation from multiple sequential readers and at the same time we do not idle on individual threads. -Q2. When to specify REQ_NOIDLE -A2. I would think whenever one is doing synchronous write and not expecting +Q2. When to specify REQ_IDLE +A2. I would think whenever one is doing synchronous write and expecting more writes to be dispatched from same context soon, should be able - to specify REQ_NOIDLE on writes and that probably should work well for + to specify REQ_IDLE on writes and that probably should work well for most of the cases. diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index f28db97c3fe0..dcbed8c9c82c 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -3914,6 +3914,12 @@ cfq_update_io_seektime(struct cfq_data *cfqd, struct cfq_queue *cfqq, cfqq->seek_history |= (sdist > CFQQ_SEEK_THR); } +static inline bool req_noidle(struct request *req) +{ + return req_op(req) == REQ_OP_WRITE && + (req->cmd_flags & (REQ_SYNC | REQ_IDLE)) == REQ_SYNC; +} + /* * Disable idle window if the process thinks too long or seeks so much that * it doesn't matter @@ -3935,7 +3941,7 @@ cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq, if (cfqq->queued[0] + cfqq->queued[1] >= 4) cfq_mark_cfqq_deep(cfqq); - if (cfqq->next_rq && (cfqq->next_rq->cmd_flags & REQ_NOIDLE)) + if (cfqq->next_rq && req_noidle(cfqq->next_rq)) enable_idle = 0; else if (!atomic_read(&cic->icq.ioc->active_ref) || !cfqd->cfq_slice_idle || @@ -4220,8 +4226,7 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq) const int sync = rq_is_sync(rq); u64 now = ktime_get_ns(); - cfq_log_cfqq(cfqd, cfqq, "complete rqnoidle %d", - !!(rq->cmd_flags & REQ_NOIDLE)); + cfq_log_cfqq(cfqd, cfqq, "complete rqnoidle %d", req_noidle(rq)); cfq_update_hw_tag(cfqd); diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index 2d3d50ab74bf..8d7bcfa49c12 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -148,7 +148,7 @@ static int _drbd_md_sync_page_io(struct drbd_device *device, if ((op == REQ_OP_WRITE) && !test_bit(MD_NO_FUA, &device->flags)) op_flags |= REQ_FUA | REQ_PREFLUSH; - op_flags |= REQ_SYNC | REQ_NOIDLE; + op_flags |= REQ_SYNC; bio = bio_alloc_drbd(GFP_NOIO); bio->bi_bdev = bdev->md_bdev; diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 107d23d18096..63b750a3b165 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -175,7 +175,7 @@ enum req_flag_bits { __REQ_META, /* metadata io request */ __REQ_PRIO, /* boost priority in cfq */ __REQ_NOMERGE, /* don't touch this for merging */ - __REQ_NOIDLE, /* don't anticipate more IO after this one */ + __REQ_IDLE, /* anticipate more IO after this one */ __REQ_INTEGRITY, /* I/O includes block integrity payload */ __REQ_FUA, /* forced unit access */ __REQ_PREFLUSH, /* request for cache flush */ @@ -190,7 +190,7 @@ enum req_flag_bits { #define REQ_META (1ULL << __REQ_META) #define REQ_PRIO (1ULL << __REQ_PRIO) #define REQ_NOMERGE (1ULL << __REQ_NOMERGE) -#define REQ_NOIDLE (1ULL << __REQ_NOIDLE) +#define REQ_IDLE (1ULL << __REQ_IDLE) #define REQ_INTEGRITY (1ULL << __REQ_INTEGRITY) #define REQ_FUA (1ULL << __REQ_FUA) #define REQ_PREFLUSH (1ULL << __REQ_PREFLUSH) diff --git a/include/linux/fs.h b/include/linux/fs.h index ccedccb28ec8..46a74209917f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -197,11 +197,11 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, #define WRITE REQ_OP_WRITE #define READ_SYNC 0 -#define WRITE_SYNC (REQ_SYNC | REQ_NOIDLE) -#define WRITE_ODIRECT REQ_SYNC -#define WRITE_FLUSH (REQ_NOIDLE | REQ_PREFLUSH) -#define WRITE_FUA (REQ_NOIDLE | REQ_FUA) -#define WRITE_FLUSH_FUA (REQ_NOIDLE | REQ_PREFLUSH | REQ_FUA) +#define WRITE_SYNC REQ_SYNC +#define WRITE_ODIRECT (REQ_SYNC | REQ_IDLE) +#define WRITE_FLUSH REQ_PREFLUSH +#define WRITE_FUA REQ_FUA +#define WRITE_FLUSH_FUA (REQ_PREFLUSH | REQ_FUA) /* * Attribute flags. These should be or-ed together to figure out what diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 903a09165bb1..a9d34424450d 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -32,7 +32,7 @@ TRACE_DEFINE_ENUM(LFS); TRACE_DEFINE_ENUM(SSR); TRACE_DEFINE_ENUM(__REQ_RAHEAD); TRACE_DEFINE_ENUM(__REQ_SYNC); -TRACE_DEFINE_ENUM(__REQ_NOIDLE); +TRACE_DEFINE_ENUM(__REQ_IDLE); TRACE_DEFINE_ENUM(__REQ_PREFLUSH); TRACE_DEFINE_ENUM(__REQ_FUA); TRACE_DEFINE_ENUM(__REQ_PRIO); -- cgit v1.2.3 From 70fd76140a6cb63262bd47b68d57b42e889c10ee Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 1 Nov 2016 07:40:10 -0600 Subject: block,fs: use REQ_* flags directly Remove the WRITE_* and READ_SYNC wrappers, and just use the flags directly. Where applicable this also drops usage of the bio_set_op_attrs wrapper. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-flush.c | 4 ++-- drivers/block/drbd/drbd_receiver.c | 2 +- drivers/block/xen-blkback/blkback.c | 10 ++++---- drivers/md/bcache/btree.c | 4 ++-- drivers/md/bcache/debug.c | 4 ++-- drivers/md/bcache/request.c | 2 +- drivers/md/bcache/super.c | 4 ++-- drivers/md/dm-bufio.c | 2 +- drivers/md/dm-log.c | 2 +- drivers/md/dm-raid1.c | 4 ++-- drivers/md/dm-snap-persistent.c | 4 ++-- drivers/md/dm.c | 2 +- drivers/md/md.c | 4 ++-- drivers/md/raid5-cache.c | 4 ++-- drivers/md/raid5.c | 2 +- drivers/nvme/target/io-cmd.c | 4 ++-- drivers/target/target_core_iblock.c | 8 +++---- fs/btrfs/disk-io.c | 6 ++--- fs/btrfs/extent_io.c | 16 ++++++------- fs/btrfs/inode.c | 6 ++--- fs/btrfs/scrub.c | 2 +- fs/btrfs/volumes.c | 2 +- fs/btrfs/volumes.h | 2 +- fs/buffer.c | 8 +++---- fs/direct-io.c | 2 +- fs/ext4/mmp.c | 6 ++--- fs/ext4/page-io.c | 2 +- fs/ext4/super.c | 2 +- fs/f2fs/checkpoint.c | 4 ++-- fs/f2fs/data.c | 16 ++++++------- fs/f2fs/gc.c | 6 ++--- fs/f2fs/inline.c | 2 +- fs/f2fs/node.c | 4 ++-- fs/f2fs/segment.c | 8 +++---- fs/f2fs/super.c | 2 +- fs/gfs2/log.c | 4 ++-- fs/gfs2/meta_io.c | 6 ++--- fs/gfs2/ops_fstype.c | 2 +- fs/hfsplus/super.c | 4 ++-- fs/jbd2/checkpoint.c | 2 +- fs/jbd2/commit.c | 9 +++---- fs/jbd2/journal.c | 15 ++++++------ fs/jbd2/revoke.c | 2 +- fs/jfs/jfs_logmgr.c | 4 ++-- fs/mpage.c | 6 ++--- fs/nilfs2/super.c | 2 +- fs/ocfs2/cluster/heartbeat.c | 2 +- fs/reiserfs/journal.c | 6 +++-- fs/xfs/xfs_aops.c | 11 +++++---- fs/xfs/xfs_buf.c | 2 +- include/linux/fs.h | 47 ------------------------------------- include/trace/events/f2fs.h | 10 ++++---- kernel/power/swap.c | 19 +++++++-------- 53 files changed, 133 insertions(+), 182 deletions(-) (limited to 'include/linux/fs.h') diff --git a/block/blk-flush.c b/block/blk-flush.c index 95f1d4d357df..d35beca18481 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -330,7 +330,7 @@ static bool blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq) } flush_rq->cmd_type = REQ_TYPE_FS; - flush_rq->cmd_flags = REQ_OP_FLUSH | WRITE_FLUSH; + flush_rq->cmd_flags = REQ_OP_FLUSH | REQ_PREFLUSH; flush_rq->rq_flags |= RQF_FLUSH_SEQ; flush_rq->rq_disk = first_rq->rq_disk; flush_rq->end_io = flush_end_io; @@ -486,7 +486,7 @@ int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask, bio = bio_alloc(gfp_mask, 0); bio->bi_bdev = bdev; - bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_FLUSH); + bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; ret = submit_bio_wait(bio); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 942384f34e22..a89538cb3eaa 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1266,7 +1266,7 @@ static void submit_one_flush(struct drbd_device *device, struct issue_flush_cont bio->bi_bdev = device->ldev->backing_bdev; bio->bi_private = octx; bio->bi_end_io = one_flush_endio; - bio_set_op_attrs(bio, REQ_OP_FLUSH, WRITE_FLUSH); + bio->bi_opf = REQ_OP_FLUSH | REQ_PREFLUSH; device->flush_jif = jiffies; set_bit(FLUSH_PENDING, &device->flags); diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 4a80ee752597..726c32e35db9 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -1253,14 +1253,14 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring, case BLKIF_OP_WRITE: ring->st_wr_req++; operation = REQ_OP_WRITE; - operation_flags = WRITE_ODIRECT; + operation_flags = REQ_SYNC | REQ_IDLE; break; case BLKIF_OP_WRITE_BARRIER: drain = true; case BLKIF_OP_FLUSH_DISKCACHE: ring->st_f_req++; operation = REQ_OP_WRITE; - operation_flags = WRITE_FLUSH; + operation_flags = REQ_PREFLUSH; break; default: operation = 0; /* make gcc happy */ @@ -1272,7 +1272,7 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring, nseg = req->operation == BLKIF_OP_INDIRECT ? req->u.indirect.nr_segments : req->u.rw.nr_segments; - if (unlikely(nseg == 0 && operation_flags != WRITE_FLUSH) || + if (unlikely(nseg == 0 && operation_flags != REQ_PREFLUSH) || unlikely((req->operation != BLKIF_OP_INDIRECT) && (nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) || unlikely((req->operation == BLKIF_OP_INDIRECT) && @@ -1334,7 +1334,7 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring, } /* Wait on all outstanding I/O's and once that has been completed - * issue the WRITE_FLUSH. + * issue the flush. */ if (drain) xen_blk_drain_io(pending_req->ring); @@ -1380,7 +1380,7 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring, /* This will be hit if the operation was a flush or discard. */ if (!bio) { - BUG_ON(operation_flags != WRITE_FLUSH); + BUG_ON(operation_flags != REQ_PREFLUSH); bio = bio_alloc(GFP_KERNEL, 0); if (unlikely(bio == NULL)) diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 81d3db40cd7b..6fdd8e252760 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -297,7 +297,7 @@ static void bch_btree_node_read(struct btree *b) bio->bi_iter.bi_size = KEY_SIZE(&b->key) << 9; bio->bi_end_io = btree_node_read_endio; bio->bi_private = &cl; - bio_set_op_attrs(bio, REQ_OP_READ, REQ_META|READ_SYNC); + bio->bi_opf = REQ_OP_READ | REQ_META; bch_bio_map(bio, b->keys.set[0].data); @@ -393,7 +393,7 @@ static void do_btree_node_write(struct btree *b) b->bio->bi_end_io = btree_node_write_endio; b->bio->bi_private = cl; b->bio->bi_iter.bi_size = roundup(set_bytes(i), block_bytes(b->c)); - bio_set_op_attrs(b->bio, REQ_OP_WRITE, REQ_META|WRITE_SYNC|REQ_FUA); + b->bio->bi_opf = REQ_OP_WRITE | REQ_META | REQ_FUA; bch_bio_map(b->bio, i); /* diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c index 333a1e5f6ae6..1c9130ae0073 100644 --- a/drivers/md/bcache/debug.c +++ b/drivers/md/bcache/debug.c @@ -52,7 +52,7 @@ void bch_btree_verify(struct btree *b) bio->bi_bdev = PTR_CACHE(b->c, &b->key, 0)->bdev; bio->bi_iter.bi_sector = PTR_OFFSET(&b->key, 0); bio->bi_iter.bi_size = KEY_SIZE(&v->key) << 9; - bio_set_op_attrs(bio, REQ_OP_READ, REQ_META|READ_SYNC); + bio->bi_opf = REQ_OP_READ | REQ_META; bch_bio_map(bio, sorted); submit_bio_wait(bio); @@ -113,7 +113,7 @@ void bch_data_verify(struct cached_dev *dc, struct bio *bio) check = bio_clone(bio, GFP_NOIO); if (!check) return; - bio_set_op_attrs(check, REQ_OP_READ, READ_SYNC); + check->bi_opf = REQ_OP_READ; if (bio_alloc_pages(check, GFP_NOIO)) goto out_put; diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index e8a2b693c928..0d99b5f4b3e6 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -923,7 +923,7 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s) flush->bi_bdev = bio->bi_bdev; flush->bi_end_io = request_endio; flush->bi_private = cl; - bio_set_op_attrs(flush, REQ_OP_WRITE, WRITE_FLUSH); + flush->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; closure_bio_submit(flush, cl); } diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 849ad441cd76..988edf928466 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -381,7 +381,7 @@ static char *uuid_read(struct cache_set *c, struct jset *j, struct closure *cl) return "bad uuid pointer"; bkey_copy(&c->uuid_bucket, k); - uuid_io(c, REQ_OP_READ, READ_SYNC, k, cl); + uuid_io(c, REQ_OP_READ, 0, k, cl); if (j->version < BCACHE_JSET_VERSION_UUIDv1) { struct uuid_entry_v0 *u0 = (void *) c->uuids; @@ -600,7 +600,7 @@ static void prio_read(struct cache *ca, uint64_t bucket) ca->prio_last_buckets[bucket_nr] = bucket; bucket_nr++; - prio_io(ca, bucket, REQ_OP_READ, READ_SYNC); + prio_io(ca, bucket, REQ_OP_READ, 0); if (p->csum != bch_crc64(&p->magic, bucket_bytes(ca) - 8)) pr_warn("bad csum reading priorities"); diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 125aedc3875f..b3ba142e59a4 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -1316,7 +1316,7 @@ int dm_bufio_issue_flush(struct dm_bufio_client *c) { struct dm_io_request io_req = { .bi_op = REQ_OP_WRITE, - .bi_op_flags = WRITE_FLUSH, + .bi_op_flags = REQ_PREFLUSH, .mem.type = DM_IO_KMEM, .mem.ptr.addr = NULL, .client = c->dm_io, diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c index 07fc1ad42ec5..33e71ea6cc14 100644 --- a/drivers/md/dm-log.c +++ b/drivers/md/dm-log.c @@ -308,7 +308,7 @@ static int flush_header(struct log_c *lc) }; lc->io_req.bi_op = REQ_OP_WRITE; - lc->io_req.bi_op_flags = WRITE_FLUSH; + lc->io_req.bi_op_flags = REQ_PREFLUSH; return dm_io(&lc->io_req, 1, &null_location, NULL); } diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index bdf1606f67bc..1a176d7c8b90 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -261,7 +261,7 @@ static int mirror_flush(struct dm_target *ti) struct mirror *m; struct dm_io_request io_req = { .bi_op = REQ_OP_WRITE, - .bi_op_flags = WRITE_FLUSH, + .bi_op_flags = REQ_PREFLUSH, .mem.type = DM_IO_KMEM, .mem.ptr.addr = NULL, .client = ms->io_client, @@ -657,7 +657,7 @@ static void do_write(struct mirror_set *ms, struct bio *bio) struct mirror *m; struct dm_io_request io_req = { .bi_op = REQ_OP_WRITE, - .bi_op_flags = bio->bi_opf & WRITE_FLUSH_FUA, + .bi_op_flags = bio->bi_opf & (REQ_FUA | REQ_PREFLUSH), .mem.type = DM_IO_BIO, .mem.ptr.bio = bio, .notify.fn = write_callback, diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c index b8cf956b577b..b93476c3ba3f 100644 --- a/drivers/md/dm-snap-persistent.c +++ b/drivers/md/dm-snap-persistent.c @@ -741,7 +741,7 @@ static void persistent_commit_exception(struct dm_exception_store *store, /* * Commit exceptions to disk. */ - if (ps->valid && area_io(ps, REQ_OP_WRITE, WRITE_FLUSH_FUA)) + if (ps->valid && area_io(ps, REQ_OP_WRITE, REQ_PREFLUSH | REQ_FUA)) ps->valid = 0; /* @@ -818,7 +818,7 @@ static int persistent_commit_merge(struct dm_exception_store *store, for (i = 0; i < nr_merged; i++) clear_exception(ps, ps->current_committed - 1 - i); - r = area_io(ps, REQ_OP_WRITE, WRITE_FLUSH_FUA); + r = area_io(ps, REQ_OP_WRITE, REQ_PREFLUSH | REQ_FUA); if (r < 0) return r; diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 147af9536d0c..b2abfa41af3e 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1527,7 +1527,7 @@ static struct mapped_device *alloc_dev(int minor) bio_init(&md->flush_bio); md->flush_bio.bi_bdev = md->bdev; - bio_set_op_attrs(&md->flush_bio, REQ_OP_WRITE, WRITE_FLUSH); + md->flush_bio.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; dm_stats_init(&md->stats); diff --git a/drivers/md/md.c b/drivers/md/md.c index eac84d8ff724..b69ec7da4bae 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -394,7 +394,7 @@ static void submit_flushes(struct work_struct *ws) bi->bi_end_io = md_end_flush; bi->bi_private = rdev; bi->bi_bdev = rdev->bdev; - bio_set_op_attrs(bi, REQ_OP_WRITE, WRITE_FLUSH); + bi->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; atomic_inc(&mddev->flush_pending); submit_bio(bi); rcu_read_lock(); @@ -743,7 +743,7 @@ void md_super_write(struct mddev *mddev, struct md_rdev *rdev, bio_add_page(bio, page, size, 0); bio->bi_private = rdev; bio->bi_end_io = super_written; - bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_FLUSH_FUA); + bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_FUA; atomic_inc(&mddev->pending_writes); submit_bio(bio); diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c index 1b1ab4a1d132..28d015c6fffe 100644 --- a/drivers/md/raid5-cache.c +++ b/drivers/md/raid5-cache.c @@ -685,7 +685,7 @@ void r5l_flush_stripe_to_raid(struct r5l_log *log) bio_reset(&log->flush_bio); log->flush_bio.bi_bdev = log->rdev->bdev; log->flush_bio.bi_end_io = r5l_log_flush_endio; - bio_set_op_attrs(&log->flush_bio, REQ_OP_WRITE, WRITE_FLUSH); + log->flush_bio.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; submit_bio(&log->flush_bio); } @@ -1053,7 +1053,7 @@ static int r5l_log_write_empty_meta_block(struct r5l_log *log, sector_t pos, mb->checksum = cpu_to_le32(crc); if (!sync_page_io(log->rdev, pos, PAGE_SIZE, page, REQ_OP_WRITE, - WRITE_FUA, false)) { + REQ_FUA, false)) { __free_page(page); return -EIO; } diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 92ac251e91e6..70acdd379e44 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -913,7 +913,7 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s) if (test_and_clear_bit(R5_Wantwrite, &sh->dev[i].flags)) { op = REQ_OP_WRITE; if (test_and_clear_bit(R5_WantFUA, &sh->dev[i].flags)) - op_flags = WRITE_FUA; + op_flags = REQ_FUA; if (test_bit(R5_Discard, &sh->dev[i].flags)) op = REQ_OP_DISCARD; } else if (test_and_clear_bit(R5_Wantread, &sh->dev[i].flags)) diff --git a/drivers/nvme/target/io-cmd.c b/drivers/nvme/target/io-cmd.c index 4a96c2049b7b..c2784cfc5e29 100644 --- a/drivers/nvme/target/io-cmd.c +++ b/drivers/nvme/target/io-cmd.c @@ -58,7 +58,7 @@ static void nvmet_execute_rw(struct nvmet_req *req) if (req->cmd->rw.opcode == nvme_cmd_write) { op = REQ_OP_WRITE; - op_flags = WRITE_ODIRECT; + op_flags = REQ_SYNC | REQ_IDLE; if (req->cmd->rw.control & cpu_to_le16(NVME_RW_FUA)) op_flags |= REQ_FUA; } else { @@ -109,7 +109,7 @@ static void nvmet_execute_flush(struct nvmet_req *req) bio->bi_bdev = req->ns->bdev; bio->bi_private = req; bio->bi_end_io = nvmet_bio_done; - bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_FLUSH); + bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; submit_bio(bio); } diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c index 372d744315f3..d316ed537d59 100644 --- a/drivers/target/target_core_iblock.c +++ b/drivers/target/target_core_iblock.c @@ -388,7 +388,7 @@ iblock_execute_sync_cache(struct se_cmd *cmd) bio = bio_alloc(GFP_KERNEL, 0); bio->bi_end_io = iblock_end_io_flush; bio->bi_bdev = ib_dev->ibd_bd; - bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_FLUSH); + bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; if (!immed) bio->bi_private = cmd; submit_bio(bio); @@ -686,15 +686,15 @@ iblock_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, struct iblock_dev *ib_dev = IBLOCK_DEV(dev); struct request_queue *q = bdev_get_queue(ib_dev->ibd_bd); /* - * Force writethrough using WRITE_FUA if a volatile write cache + * Force writethrough using REQ_FUA if a volatile write cache * is not enabled, or if initiator set the Force Unit Access bit. */ op = REQ_OP_WRITE; if (test_bit(QUEUE_FLAG_FUA, &q->queue_flags)) { if (cmd->se_cmd_flags & SCF_FUA) - op_flags = WRITE_FUA; + op_flags = REQ_FUA; else if (!test_bit(QUEUE_FLAG_WC, &q->queue_flags)) - op_flags = WRITE_FUA; + op_flags = REQ_FUA; } } else { op = REQ_OP_READ; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index c8454a8e35f2..fe10afd51e02 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3485,9 +3485,9 @@ static int write_dev_supers(struct btrfs_device *device, * to go down lazy. */ if (i == 0) - ret = btrfsic_submit_bh(REQ_OP_WRITE, WRITE_FUA, bh); + ret = btrfsic_submit_bh(REQ_OP_WRITE, REQ_FUA, bh); else - ret = btrfsic_submit_bh(REQ_OP_WRITE, WRITE_SYNC, bh); + ret = btrfsic_submit_bh(REQ_OP_WRITE, REQ_SYNC, bh); if (ret) errors++; } @@ -3551,7 +3551,7 @@ static int write_dev_flush(struct btrfs_device *device, int wait) bio->bi_end_io = btrfs_end_empty_barrier; bio->bi_bdev = device->bdev; - bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_FLUSH); + bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; init_completion(&device->flush_wait); bio->bi_private = &device->flush_wait; device->flush_bio = bio; diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 66a755150056..ff87bff7bdb6 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -127,7 +127,7 @@ struct extent_page_data { */ unsigned int extent_locked:1; - /* tells the submit_bio code to use a WRITE_SYNC */ + /* tells the submit_bio code to use REQ_SYNC */ unsigned int sync_io:1; }; @@ -2047,7 +2047,7 @@ int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical, return -EIO; } bio->bi_bdev = dev->bdev; - bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_SYNC); + bio->bi_opf = REQ_OP_WRITE | REQ_SYNC; bio_add_page(bio, page, length, pg_offset); if (btrfsic_submit_bio_wait(bio)) { @@ -2388,7 +2388,7 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset, struct inode *inode = page->mapping->host; struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; struct bio *bio; - int read_mode; + int read_mode = 0; int ret; BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE); @@ -2404,9 +2404,7 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset, } if (failed_bio->bi_vcnt > 1) - read_mode = READ_SYNC | REQ_FAILFAST_DEV; - else - read_mode = READ_SYNC; + read_mode |= REQ_FAILFAST_DEV; phy_offset >>= inode->i_sb->s_blocksize_bits; bio = btrfs_create_repair_bio(inode, failed_bio, failrec, page, @@ -3484,7 +3482,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, unsigned long nr_written = 0; if (wbc->sync_mode == WB_SYNC_ALL) - write_flags = WRITE_SYNC; + write_flags = REQ_SYNC; trace___extent_writepage(page, inode, wbc); @@ -3729,7 +3727,7 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb, unsigned long i, num_pages; unsigned long bio_flags = 0; unsigned long start, end; - int write_flags = (epd->sync_io ? WRITE_SYNC : 0) | REQ_META; + int write_flags = (epd->sync_io ? REQ_SYNC : 0) | REQ_META; int ret = 0; clear_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags); @@ -4076,7 +4074,7 @@ static void flush_epd_write_bio(struct extent_page_data *epd) int ret; bio_set_op_attrs(epd->bio, REQ_OP_WRITE, - epd->sync_io ? WRITE_SYNC : 0); + epd->sync_io ? REQ_SYNC : 0); ret = submit_one_bio(epd->bio, 0, epd->bio_flags); BUG_ON(ret < 0); /* -ENOMEM */ diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 9a377079af26..c8eb82a416b3 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7917,7 +7917,7 @@ static int dio_read_error(struct inode *inode, struct bio *failed_bio, struct io_failure_record *failrec; struct bio *bio; int isector; - int read_mode; + int read_mode = 0; int ret; BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE); @@ -7936,9 +7936,7 @@ static int dio_read_error(struct inode *inode, struct bio *failed_bio, if ((failed_bio->bi_vcnt > 1) || (failed_bio->bi_io_vec->bv_len > BTRFS_I(inode)->root->sectorsize)) - read_mode = READ_SYNC | REQ_FAILFAST_DEV; - else - read_mode = READ_SYNC; + read_mode |= REQ_FAILFAST_DEV; isector = start - btrfs_io_bio(failed_bio)->logical; isector >>= inode->i_sb->s_blocksize_bits; diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index fffb9ab8526e..ff3078234d94 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -4440,7 +4440,7 @@ static int write_page_nocow(struct scrub_ctx *sctx, bio->bi_iter.bi_size = 0; bio->bi_iter.bi_sector = physical_for_dev_replace >> 9; bio->bi_bdev = dev->bdev; - bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_SYNC); + bio->bi_opf = REQ_OP_WRITE | REQ_SYNC; ret = bio_add_page(bio, page, PAGE_SIZE, 0); if (ret != PAGE_SIZE) { leave_with_eio: diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index deda46cf1292..0d7d635d8bfb 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -6023,7 +6023,7 @@ static void btrfs_end_bio(struct bio *bio) else btrfs_dev_stat_inc(dev, BTRFS_DEV_STAT_READ_ERRS); - if ((bio->bi_opf & WRITE_FLUSH) == WRITE_FLUSH) + if (bio->bi_opf & REQ_PREFLUSH) btrfs_dev_stat_inc(dev, BTRFS_DEV_STAT_FLUSH_ERRS); btrfs_dev_stat_print_on_error(dev); diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 09ed29c67848..f137ffe6654c 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -62,7 +62,7 @@ struct btrfs_device { int running_pending; /* regular prio bios */ struct btrfs_pending_bios pending_bios; - /* WRITE_SYNC bios */ + /* sync bios */ struct btrfs_pending_bios pending_sync_bios; struct block_device *bdev; diff --git a/fs/buffer.c b/fs/buffer.c index a29335867e30..bc7c2bb30a9b 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -753,7 +753,7 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list) * still in flight on potentially older * contents. */ - write_dirty_buffer(bh, WRITE_SYNC); + write_dirty_buffer(bh, REQ_SYNC); /* * Kick off IO for the previous mapping. Note @@ -1684,7 +1684,7 @@ static struct buffer_head *create_page_buffers(struct page *page, struct inode * * prevents this contention from occurring. * * If block_write_full_page() is called with wbc->sync_mode == - * WB_SYNC_ALL, the writes are posted using WRITE_SYNC; this + * WB_SYNC_ALL, the writes are posted using REQ_SYNC; this * causes the writes to be flagged as synchronous writes. */ int __block_write_full_page(struct inode *inode, struct page *page, @@ -1697,7 +1697,7 @@ int __block_write_full_page(struct inode *inode, struct page *page, struct buffer_head *bh, *head; unsigned int blocksize, bbits; int nr_underway = 0; - int write_flags = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : 0); + int write_flags = (wbc->sync_mode == WB_SYNC_ALL ? REQ_SYNC : 0); head = create_page_buffers(page, inode, (1 << BH_Dirty)|(1 << BH_Uptodate)); @@ -3210,7 +3210,7 @@ EXPORT_SYMBOL(__sync_dirty_buffer); int sync_dirty_buffer(struct buffer_head *bh) { - return __sync_dirty_buffer(bh, WRITE_SYNC); + return __sync_dirty_buffer(bh, REQ_SYNC); } EXPORT_SYMBOL(sync_dirty_buffer); diff --git a/fs/direct-io.c b/fs/direct-io.c index fb9aa16a7727..a5138c564019 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -1209,7 +1209,7 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, dio->inode = inode; if (iov_iter_rw(iter) == WRITE) { dio->op = REQ_OP_WRITE; - dio->op_flags = WRITE_ODIRECT; + dio->op_flags = REQ_SYNC | REQ_IDLE; } else { dio->op = REQ_OP_READ; } diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c index d89754ef1aab..eb9835638680 100644 --- a/fs/ext4/mmp.c +++ b/fs/ext4/mmp.c @@ -35,7 +35,7 @@ static void ext4_mmp_csum_set(struct super_block *sb, struct mmp_struct *mmp) } /* - * Write the MMP block using WRITE_SYNC to try to get the block on-disk + * Write the MMP block using REQ_SYNC to try to get the block on-disk * faster. */ static int write_mmp_block(struct super_block *sb, struct buffer_head *bh) @@ -52,7 +52,7 @@ static int write_mmp_block(struct super_block *sb, struct buffer_head *bh) lock_buffer(bh); bh->b_end_io = end_buffer_write_sync; get_bh(bh); - submit_bh(REQ_OP_WRITE, WRITE_SYNC | REQ_META | REQ_PRIO, bh); + submit_bh(REQ_OP_WRITE, REQ_SYNC | REQ_META | REQ_PRIO, bh); wait_on_buffer(bh); sb_end_write(sb); if (unlikely(!buffer_uptodate(bh))) @@ -88,7 +88,7 @@ static int read_mmp_block(struct super_block *sb, struct buffer_head **bh, get_bh(*bh); lock_buffer(*bh); (*bh)->b_end_io = end_buffer_read_sync; - submit_bh(REQ_OP_READ, READ_SYNC | REQ_META | REQ_PRIO, *bh); + submit_bh(REQ_OP_READ, REQ_META | REQ_PRIO, *bh); wait_on_buffer(*bh); if (!buffer_uptodate(*bh)) { ret = -EIO; diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 0094923e5ebf..e0b3b54cdef3 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -340,7 +340,7 @@ void ext4_io_submit(struct ext4_io_submit *io) if (bio) { int io_op_flags = io->io_wbc->sync_mode == WB_SYNC_ALL ? - WRITE_SYNC : 0; + REQ_SYNC : 0; bio_set_op_attrs(io->io_bio, REQ_OP_WRITE, io_op_flags); submit_bio(io->io_bio); } diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 6db81fbcbaa6..f31eb286af90 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4553,7 +4553,7 @@ static int ext4_commit_super(struct super_block *sb, int sync) unlock_buffer(sbh); if (sync) { error = __sync_dirty_buffer(sbh, - test_opt(sb, BARRIER) ? WRITE_FUA : WRITE_SYNC); + test_opt(sb, BARRIER) ? REQ_FUA : REQ_SYNC); if (error) return error; diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 7e9b504bd8b2..d935c06a84f0 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -65,7 +65,7 @@ static struct page *__get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index, .sbi = sbi, .type = META, .op = REQ_OP_READ, - .op_flags = READ_SYNC | REQ_META | REQ_PRIO, + .op_flags = REQ_META | REQ_PRIO, .old_blkaddr = index, .new_blkaddr = index, .encrypted_page = NULL, @@ -160,7 +160,7 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, .sbi = sbi, .type = META, .op = REQ_OP_READ, - .op_flags = sync ? (READ_SYNC | REQ_META | REQ_PRIO) : REQ_RAHEAD, + .op_flags = sync ? (REQ_META | REQ_PRIO) : REQ_RAHEAD, .encrypted_page = NULL, }; struct blk_plug plug; diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 9ae194fd2fdb..b80bf10603d7 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -198,11 +198,9 @@ static void __f2fs_submit_merged_bio(struct f2fs_sb_info *sbi, if (type >= META_FLUSH) { io->fio.type = META_FLUSH; io->fio.op = REQ_OP_WRITE; - if (test_opt(sbi, NOBARRIER)) - io->fio.op_flags = WRITE_FLUSH | REQ_META | REQ_PRIO; - else - io->fio.op_flags = WRITE_FLUSH_FUA | REQ_META | - REQ_PRIO; + io->fio.op_flags = REQ_PREFLUSH | REQ_META | REQ_PRIO; + if (!test_opt(sbi, NOBARRIER)) + io->fio.op_flags |= REQ_FUA; } __submit_merged_bio(io); out: @@ -483,7 +481,7 @@ struct page *find_data_page(struct inode *inode, pgoff_t index) return page; f2fs_put_page(page, 0); - page = get_read_data_page(inode, index, READ_SYNC, false); + page = get_read_data_page(inode, index, 0, false); if (IS_ERR(page)) return page; @@ -509,7 +507,7 @@ struct page *get_lock_data_page(struct inode *inode, pgoff_t index, struct address_space *mapping = inode->i_mapping; struct page *page; repeat: - page = get_read_data_page(inode, index, READ_SYNC, for_write); + page = get_read_data_page(inode, index, 0, for_write); if (IS_ERR(page)) return page; @@ -1251,7 +1249,7 @@ static int f2fs_write_data_page(struct page *page, .sbi = sbi, .type = DATA, .op = REQ_OP_WRITE, - .op_flags = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : 0, + .op_flags = (wbc->sync_mode == WB_SYNC_ALL) ? REQ_SYNC : 0, .page = page, .encrypted_page = NULL, }; @@ -1663,7 +1661,7 @@ repeat: err = PTR_ERR(bio); goto fail; } - bio_set_op_attrs(bio, REQ_OP_READ, READ_SYNC); + bio->bi_opf = REQ_OP_READ; if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { bio_put(bio); err = -EFAULT; diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 93985c64d8a8..9eb11b2244ea 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -550,7 +550,7 @@ static void move_encrypted_block(struct inode *inode, block_t bidx) .sbi = F2FS_I_SB(inode), .type = DATA, .op = REQ_OP_READ, - .op_flags = READ_SYNC, + .op_flags = 0, .encrypted_page = NULL, }; struct dnode_of_data dn; @@ -625,7 +625,7 @@ static void move_encrypted_block(struct inode *inode, block_t bidx) f2fs_wait_on_page_writeback(dn.node_page, NODE, true); fio.op = REQ_OP_WRITE; - fio.op_flags = WRITE_SYNC; + fio.op_flags = REQ_SYNC; fio.new_blkaddr = newaddr; f2fs_submit_page_mbio(&fio); @@ -663,7 +663,7 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type) .sbi = F2FS_I_SB(inode), .type = DATA, .op = REQ_OP_WRITE, - .op_flags = WRITE_SYNC, + .op_flags = REQ_SYNC, .page = page, .encrypted_page = NULL, }; diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 5f1a67f756af..2e7f54c191b4 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -111,7 +111,7 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) .sbi = F2FS_I_SB(dn->inode), .type = DATA, .op = REQ_OP_WRITE, - .op_flags = WRITE_SYNC | REQ_PRIO, + .op_flags = REQ_SYNC | REQ_PRIO, .page = page, .encrypted_page = NULL, }; diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 01177ecdeab8..932f3f8bb57b 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1134,7 +1134,7 @@ repeat: if (!page) return ERR_PTR(-ENOMEM); - err = read_node_page(page, READ_SYNC); + err = read_node_page(page, 0); if (err < 0) { f2fs_put_page(page, 1); return ERR_PTR(err); @@ -1570,7 +1570,7 @@ static int f2fs_write_node_page(struct page *page, .sbi = sbi, .type = NODE, .op = REQ_OP_WRITE, - .op_flags = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : 0, + .op_flags = (wbc->sync_mode == WB_SYNC_ALL) ? REQ_SYNC : 0, .page = page, .encrypted_page = NULL, }; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index fc886f008449..f1b4a1775ebe 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -259,7 +259,7 @@ static int __commit_inmem_pages(struct inode *inode, .sbi = sbi, .type = DATA, .op = REQ_OP_WRITE, - .op_flags = WRITE_SYNC | REQ_PRIO, + .op_flags = REQ_SYNC | REQ_PRIO, .encrypted_page = NULL, }; bool submit_bio = false; @@ -420,7 +420,7 @@ repeat: fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list); bio->bi_bdev = sbi->sb->s_bdev; - bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_FLUSH); + bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; ret = submit_bio_wait(bio); llist_for_each_entry_safe(cmd, next, @@ -454,7 +454,7 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi) atomic_inc(&fcc->submit_flush); bio->bi_bdev = sbi->sb->s_bdev; - bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_FLUSH); + bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; ret = submit_bio_wait(bio); atomic_dec(&fcc->submit_flush); bio_put(bio); @@ -1515,7 +1515,7 @@ void write_meta_page(struct f2fs_sb_info *sbi, struct page *page) .sbi = sbi, .type = META, .op = REQ_OP_WRITE, - .op_flags = WRITE_SYNC | REQ_META | REQ_PRIO, + .op_flags = REQ_SYNC | REQ_META | REQ_PRIO, .old_blkaddr = page->index, .new_blkaddr = page->index, .page = page, diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 6132b4ce4e4c..2cac6bb86080 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1238,7 +1238,7 @@ static int __f2fs_commit_super(struct buffer_head *bh, unlock_buffer(bh); /* it's rare case, we can do fua all the time */ - return __sync_dirty_buffer(bh, WRITE_FLUSH_FUA); + return __sync_dirty_buffer(bh, REQ_PREFLUSH | REQ_FUA); } static inline bool sanity_check_area_boundary(struct f2fs_sb_info *sbi, diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index e58ccef09c91..27c00a16def0 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -657,7 +657,7 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags) struct gfs2_log_header *lh; unsigned int tail; u32 hash; - int op_flags = WRITE_FLUSH_FUA | REQ_META; + int op_flags = REQ_PREFLUSH | REQ_FUA | REQ_META; struct page *page = mempool_alloc(gfs2_page_pool, GFP_NOIO); enum gfs2_freeze_state state = atomic_read(&sdp->sd_freeze_state); lh = page_address(page); @@ -682,7 +682,7 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags) if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags)) { gfs2_ordered_wait(sdp); log_flush_wait(sdp); - op_flags = WRITE_SYNC | REQ_META | REQ_PRIO; + op_flags = REQ_SYNC | REQ_META | REQ_PRIO; } sdp->sd_log_idle = (tail == sdp->sd_log_flush_head); diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 373639a59782..e562b1191c9c 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c @@ -38,7 +38,7 @@ static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wb struct buffer_head *bh, *head; int nr_underway = 0; int write_flags = REQ_META | REQ_PRIO | - (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : 0); + (wbc->sync_mode == WB_SYNC_ALL ? REQ_SYNC : 0); BUG_ON(!PageLocked(page)); BUG_ON(!page_has_buffers(page)); @@ -285,7 +285,7 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags, } } - gfs2_submit_bhs(REQ_OP_READ, READ_SYNC | REQ_META | REQ_PRIO, bhs, num); + gfs2_submit_bhs(REQ_OP_READ, REQ_META | REQ_PRIO, bhs, num); if (!(flags & DIO_WAIT)) return 0; @@ -453,7 +453,7 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen) if (buffer_uptodate(first_bh)) goto out; if (!buffer_locked(first_bh)) - ll_rw_block(REQ_OP_READ, READ_SYNC | REQ_META, 1, &first_bh); + ll_rw_block(REQ_OP_READ, REQ_META, 1, &first_bh); dblock++; extlen--; diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index ff72ac6439c8..a34308df927f 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -246,7 +246,7 @@ static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector, int silent) bio->bi_end_io = end_bio_io_page; bio->bi_private = page; - bio_set_op_attrs(bio, REQ_OP_READ, READ_SYNC | REQ_META); + bio_set_op_attrs(bio, REQ_OP_READ, REQ_META); submit_bio(bio); wait_on_page_locked(page); bio_put(bio); diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 11854dd84572..67aedf4c2e7c 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -221,7 +221,7 @@ static int hfsplus_sync_fs(struct super_block *sb, int wait) error2 = hfsplus_submit_bio(sb, sbi->part_start + HFSPLUS_VOLHEAD_SECTOR, sbi->s_vhdr_buf, NULL, REQ_OP_WRITE, - WRITE_SYNC); + REQ_SYNC); if (!error) error = error2; if (!write_backup) @@ -230,7 +230,7 @@ static int hfsplus_sync_fs(struct super_block *sb, int wait) error2 = hfsplus_submit_bio(sb, sbi->part_start + sbi->sect_count - 2, sbi->s_backup_vhdr_buf, NULL, REQ_OP_WRITE, - WRITE_SYNC); + REQ_SYNC); if (!error) error2 = error; out: diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 684996c8a3a4..4055f51617ef 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c @@ -186,7 +186,7 @@ __flush_batch(journal_t *journal, int *batch_count) blk_start_plug(&plug); for (i = 0; i < *batch_count; i++) - write_dirty_buffer(journal->j_chkpt_bhs[i], WRITE_SYNC); + write_dirty_buffer(journal->j_chkpt_bhs[i], REQ_SYNC); blk_finish_plug(&plug); for (i = 0; i < *batch_count; i++) { diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 31f8ca046639..8c514367ba5a 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -155,9 +155,10 @@ static int journal_submit_commit_record(journal_t *journal, if (journal->j_flags & JBD2_BARRIER && !jbd2_has_feature_async_commit(journal)) - ret = submit_bh(REQ_OP_WRITE, WRITE_SYNC | WRITE_FLUSH_FUA, bh); + ret = submit_bh(REQ_OP_WRITE, + REQ_SYNC | REQ_PREFLUSH | REQ_FUA, bh); else - ret = submit_bh(REQ_OP_WRITE, WRITE_SYNC, bh); + ret = submit_bh(REQ_OP_WRITE, REQ_SYNC, bh); *cbh = bh; return ret; @@ -402,7 +403,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) jbd2_journal_update_sb_log_tail(journal, journal->j_tail_sequence, journal->j_tail, - WRITE_SYNC); + REQ_SYNC); mutex_unlock(&journal->j_checkpoint_mutex); } else { jbd_debug(3, "superblock not updated\n"); @@ -717,7 +718,7 @@ start_journal_io: clear_buffer_dirty(bh); set_buffer_uptodate(bh); bh->b_end_io = journal_end_buffer_io_sync; - submit_bh(REQ_OP_WRITE, WRITE_SYNC, bh); + submit_bh(REQ_OP_WRITE, REQ_SYNC, bh); } cond_resched(); stats.run.rs_blocks_logged += bufs; diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 927da4956a89..8ed971eeab44 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -913,7 +913,7 @@ int __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block) * space and if we lose sb update during power failure we'd replay * old transaction with possibly newly overwritten data. */ - ret = jbd2_journal_update_sb_log_tail(journal, tid, block, WRITE_FUA); + ret = jbd2_journal_update_sb_log_tail(journal, tid, block, REQ_FUA); if (ret) goto out; @@ -1306,7 +1306,7 @@ static int journal_reset(journal_t *journal) /* Lock here to make assertions happy... */ mutex_lock(&journal->j_checkpoint_mutex); /* - * Update log tail information. We use WRITE_FUA since new + * Update log tail information. We use REQ_FUA since new * transaction will start reusing journal space and so we * must make sure information about current log tail is on * disk before that. @@ -1314,7 +1314,7 @@ static int journal_reset(journal_t *journal) jbd2_journal_update_sb_log_tail(journal, journal->j_tail_sequence, journal->j_tail, - WRITE_FUA); + REQ_FUA); mutex_unlock(&journal->j_checkpoint_mutex); } return jbd2_journal_start_thread(journal); @@ -1454,7 +1454,7 @@ void jbd2_journal_update_sb_errno(journal_t *journal) sb->s_errno = cpu_to_be32(journal->j_errno); read_unlock(&journal->j_state_lock); - jbd2_write_superblock(journal, WRITE_FUA); + jbd2_write_superblock(journal, REQ_FUA); } EXPORT_SYMBOL(jbd2_journal_update_sb_errno); @@ -1720,7 +1720,8 @@ int jbd2_journal_destroy(journal_t *journal) ++journal->j_transaction_sequence; write_unlock(&journal->j_state_lock); - jbd2_mark_journal_empty(journal, WRITE_FLUSH_FUA); + jbd2_mark_journal_empty(journal, + REQ_PREFLUSH | REQ_FUA); mutex_unlock(&journal->j_checkpoint_mutex); } else err = -EIO; @@ -1979,7 +1980,7 @@ int jbd2_journal_flush(journal_t *journal) * the magic code for a fully-recovered superblock. Any future * commits of data to the journal will restore the current * s_start value. */ - jbd2_mark_journal_empty(journal, WRITE_FUA); + jbd2_mark_journal_empty(journal, REQ_FUA); mutex_unlock(&journal->j_checkpoint_mutex); write_lock(&journal->j_state_lock); J_ASSERT(!journal->j_running_transaction); @@ -2025,7 +2026,7 @@ int jbd2_journal_wipe(journal_t *journal, int write) if (write) { /* Lock to make assertions happy... */ mutex_lock(&journal->j_checkpoint_mutex); - jbd2_mark_journal_empty(journal, WRITE_FUA); + jbd2_mark_journal_empty(journal, REQ_FUA); mutex_unlock(&journal->j_checkpoint_mutex); } diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c index 91171dc352cb..cfc38b552118 100644 --- a/fs/jbd2/revoke.c +++ b/fs/jbd2/revoke.c @@ -648,7 +648,7 @@ static void flush_descriptor(journal_t *journal, set_buffer_jwrite(descriptor); BUFFER_TRACE(descriptor, "write"); set_buffer_dirty(descriptor); - write_dirty_buffer(descriptor, WRITE_SYNC); + write_dirty_buffer(descriptor, REQ_SYNC); } #endif diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c index a21ea8b3e5fa..bb1da1feafeb 100644 --- a/fs/jfs/jfs_logmgr.c +++ b/fs/jfs/jfs_logmgr.c @@ -2002,7 +2002,7 @@ static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp) bio->bi_end_io = lbmIODone; bio->bi_private = bp; - bio_set_op_attrs(bio, REQ_OP_READ, READ_SYNC); + bio->bi_opf = REQ_OP_READ; /*check if journaling to disk has been disabled*/ if (log->no_integrity) { bio->bi_iter.bi_size = 0; @@ -2146,7 +2146,7 @@ static void lbmStartIO(struct lbuf * bp) bio->bi_end_io = lbmIODone; bio->bi_private = bp; - bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_SYNC); + bio->bi_opf = REQ_OP_WRITE | REQ_SYNC; /* check if journaling to disk has been disabled */ if (log->no_integrity) { diff --git a/fs/mpage.c b/fs/mpage.c index d2413af0823a..f35e2819d0c6 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -489,7 +489,7 @@ static int __mpage_writepage(struct page *page, struct writeback_control *wbc, struct buffer_head map_bh; loff_t i_size = i_size_read(inode); int ret = 0; - int op_flags = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : 0); + int op_flags = (wbc->sync_mode == WB_SYNC_ALL ? REQ_SYNC : 0); if (page_has_buffers(page)) { struct buffer_head *head = page_buffers(page); @@ -705,7 +705,7 @@ mpage_writepages(struct address_space *mapping, ret = write_cache_pages(mapping, wbc, __mpage_writepage, &mpd); if (mpd.bio) { int op_flags = (wbc->sync_mode == WB_SYNC_ALL ? - WRITE_SYNC : 0); + REQ_SYNC : 0); mpage_bio_submit(REQ_OP_WRITE, op_flags, mpd.bio); } } @@ -726,7 +726,7 @@ int mpage_writepage(struct page *page, get_block_t get_block, int ret = __mpage_writepage(page, wbc, &mpd); if (mpd.bio) { int op_flags = (wbc->sync_mode == WB_SYNC_ALL ? - WRITE_SYNC : 0); + REQ_SYNC : 0); mpage_bio_submit(REQ_OP_WRITE, op_flags, mpd.bio); } return ret; diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index c95d369e90aa..12eeae62a2b1 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -189,7 +189,7 @@ static int nilfs_sync_super(struct super_block *sb, int flag) set_buffer_dirty(nilfs->ns_sbh[0]); if (nilfs_test_opt(nilfs, BARRIER)) { err = __sync_dirty_buffer(nilfs->ns_sbh[0], - WRITE_SYNC | WRITE_FLUSH_FUA); + REQ_SYNC | REQ_PREFLUSH | REQ_FUA); } else { err = sync_dirty_buffer(nilfs->ns_sbh[0]); } diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 636abcbd4650..52eef16edb01 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -627,7 +627,7 @@ static int o2hb_issue_node_write(struct o2hb_region *reg, slot = o2nm_this_node(); bio = o2hb_setup_one_bio(reg, write_wc, &slot, slot+1, REQ_OP_WRITE, - WRITE_SYNC); + REQ_SYNC); if (IS_ERR(bio)) { status = PTR_ERR(bio); mlog_errno(status); diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index bc2dde2423c2..aa40c242f1db 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -1111,7 +1111,8 @@ static int flush_commit_list(struct super_block *s, mark_buffer_dirty(jl->j_commit_bh) ; depth = reiserfs_write_unlock_nested(s); if (reiserfs_barrier_flush(s)) - __sync_dirty_buffer(jl->j_commit_bh, WRITE_FLUSH_FUA); + __sync_dirty_buffer(jl->j_commit_bh, + REQ_PREFLUSH | REQ_FUA); else sync_dirty_buffer(jl->j_commit_bh); reiserfs_write_lock_nested(s, depth); @@ -1269,7 +1270,8 @@ static int _update_journal_header_block(struct super_block *sb, depth = reiserfs_write_unlock_nested(sb); if (reiserfs_barrier_flush(sb)) - __sync_dirty_buffer(journal->j_header_bh, WRITE_FLUSH_FUA); + __sync_dirty_buffer(journal->j_header_bh, + REQ_PREFLUSH | REQ_FUA); else sync_dirty_buffer(journal->j_header_bh); diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 3e57a56cf829..594e02c485b2 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -495,8 +495,10 @@ xfs_submit_ioend( ioend->io_bio->bi_private = ioend; ioend->io_bio->bi_end_io = xfs_end_bio; - bio_set_op_attrs(ioend->io_bio, REQ_OP_WRITE, - (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : 0); + ioend->io_bio->bi_opf = REQ_OP_WRITE; + if (wbc->sync_mode == WB_SYNC_ALL) + ioend->io_bio->bi_opf |= REQ_SYNC; + /* * If we are failing the IO now, just mark the ioend with an * error and finish it. This will run IO completion immediately @@ -567,8 +569,9 @@ xfs_chain_bio( bio_chain(ioend->io_bio, new); bio_get(ioend->io_bio); /* for xfs_destroy_ioend */ - bio_set_op_attrs(ioend->io_bio, REQ_OP_WRITE, - (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : 0); + ioend->io_bio->bi_opf = REQ_OP_WRITE; + if (wbc->sync_mode == WB_SYNC_ALL) + ioend->io_bio->bi_opf |= REQ_SYNC; submit_bio(ioend->io_bio); ioend->io_bio = new; } diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index b5b9bffe3520..33c435f3316c 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1304,7 +1304,7 @@ _xfs_buf_ioapply( if (bp->b_flags & XBF_WRITE) { op = REQ_OP_WRITE; if (bp->b_flags & XBF_SYNCIO) - op_flags = WRITE_SYNC; + op_flags = REQ_SYNC; if (bp->b_flags & XBF_FUA) op_flags |= REQ_FUA; if (bp->b_flags & XBF_FLUSH) diff --git a/include/linux/fs.h b/include/linux/fs.h index 46a74209917f..7a1b78ab7c15 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -151,58 +151,11 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, */ #define CHECK_IOVEC_ONLY -1 -/* - * The below are the various read and write flags that we support. Some of - * them include behavioral modifiers that send information down to the - * block layer and IO scheduler. They should be used along with a req_op. - * Terminology: - * - * The block layer uses device plugging to defer IO a little bit, in - * the hope that we will see more IO very shortly. This increases - * coalescing of adjacent IO and thus reduces the number of IOs we - * have to send to the device. It also allows for better queuing, - * if the IO isn't mergeable. If the caller is going to be waiting - * for the IO, then he must ensure that the device is unplugged so - * that the IO is dispatched to the driver. - * - * All IO is handled async in Linux. This is fine for background - * writes, but for reads or writes that someone waits for completion - * on, we want to notify the block layer and IO scheduler so that they - * know about it. That allows them to make better scheduling - * decisions. So when the below references 'sync' and 'async', it - * is referencing this priority hint. - * - * With that in mind, the available types are: - * - * READ A normal read operation. Device will be plugged. - * READ_SYNC A synchronous read. Device is not plugged, caller can - * immediately wait on this read without caring about - * unplugging. - * WRITE A normal async write. Device will be plugged. - * WRITE_SYNC Synchronous write. Identical to WRITE, but passes down - * the hint that someone will be waiting on this IO - * shortly. The write equivalent of READ_SYNC. - * WRITE_ODIRECT Special case write for O_DIRECT only. - * WRITE_FLUSH Like WRITE_SYNC but with preceding cache flush. - * WRITE_FUA Like WRITE_SYNC but data is guaranteed to be on - * non-volatile media on completion. - * WRITE_FLUSH_FUA Combination of WRITE_FLUSH and FUA. The IO is preceded - * by a cache flush and data is guaranteed to be on - * non-volatile media on completion. - * - */ #define RW_MASK REQ_OP_WRITE #define READ REQ_OP_READ #define WRITE REQ_OP_WRITE -#define READ_SYNC 0 -#define WRITE_SYNC REQ_SYNC -#define WRITE_ODIRECT (REQ_SYNC | REQ_IDLE) -#define WRITE_FLUSH REQ_PREFLUSH -#define WRITE_FUA REQ_FUA -#define WRITE_FLUSH_FUA (REQ_PREFLUSH | REQ_FUA) - /* * Attribute flags. These should be or-ed together to figure out what * has been changed! diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index a9d34424450d..5da2c829a718 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -55,7 +55,7 @@ TRACE_DEFINE_ENUM(CP_DISCARD); { IPU, "IN-PLACE" }, \ { OPU, "OUT-OF-PLACE" }) -#define F2FS_BIO_FLAG_MASK(t) (t & (REQ_RAHEAD | WRITE_FLUSH_FUA)) +#define F2FS_BIO_FLAG_MASK(t) (t & (REQ_RAHEAD | REQ_PREFLUSH | REQ_FUA)) #define F2FS_BIO_EXTRA_MASK(t) (t & (REQ_META | REQ_PRIO)) #define show_bio_type(op_flags) show_bio_op_flags(op_flags), \ @@ -65,11 +65,9 @@ TRACE_DEFINE_ENUM(CP_DISCARD); __print_symbolic(F2FS_BIO_FLAG_MASK(flags), \ { 0, "WRITE" }, \ { REQ_RAHEAD, "READAHEAD" }, \ - { READ_SYNC, "READ_SYNC" }, \ - { WRITE_SYNC, "WRITE_SYNC" }, \ - { WRITE_FLUSH, "WRITE_FLUSH" }, \ - { WRITE_FUA, "WRITE_FUA" }, \ - { WRITE_FLUSH_FUA, "WRITE_FLUSH_FUA" }) + { REQ_SYNC, "REQ_SYNC" }, \ + { REQ_PREFLUSH, "REQ_PREFLUSH" }, \ + { REQ_FUA, "REQ_FUA" }) #define show_bio_extra(type) \ __print_symbolic(F2FS_BIO_EXTRA_MASK(type), \ diff --git a/kernel/power/swap.c b/kernel/power/swap.c index a3b1e617bcdc..32e0c232efba 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -307,7 +307,7 @@ static int mark_swapfiles(struct swap_map_handle *handle, unsigned int flags) { int error; - hib_submit_io(REQ_OP_READ, READ_SYNC, swsusp_resume_block, + hib_submit_io(REQ_OP_READ, 0, swsusp_resume_block, swsusp_header, NULL); if (!memcmp("SWAP-SPACE",swsusp_header->sig, 10) || !memcmp("SWAPSPACE2",swsusp_header->sig, 10)) { @@ -317,7 +317,7 @@ static int mark_swapfiles(struct swap_map_handle *handle, unsigned int flags) swsusp_header->flags = flags; if (flags & SF_CRC32_MODE) swsusp_header->crc32 = handle->crc32; - error = hib_submit_io(REQ_OP_WRITE, WRITE_SYNC, + error = hib_submit_io(REQ_OP_WRITE, REQ_SYNC, swsusp_resume_block, swsusp_header, NULL); } else { printk(KERN_ERR "PM: Swap header not found!\n"); @@ -397,7 +397,7 @@ static int write_page(void *buf, sector_t offset, struct hib_bio_batch *hb) } else { src = buf; } - return hib_submit_io(REQ_OP_WRITE, WRITE_SYNC, offset, src, hb); + return hib_submit_io(REQ_OP_WRITE, REQ_SYNC, offset, src, hb); } static void release_swap_writer(struct swap_map_handle *handle) @@ -1000,8 +1000,7 @@ static int get_swap_reader(struct swap_map_handle *handle, return -ENOMEM; } - error = hib_submit_io(REQ_OP_READ, READ_SYNC, offset, - tmp->map, NULL); + error = hib_submit_io(REQ_OP_READ, 0, offset, tmp->map, NULL); if (error) { release_swap_reader(handle); return error; @@ -1025,7 +1024,7 @@ static int swap_read_page(struct swap_map_handle *handle, void *buf, offset = handle->cur->entries[handle->k]; if (!offset) return -EFAULT; - error = hib_submit_io(REQ_OP_READ, READ_SYNC, offset, buf, hb); + error = hib_submit_io(REQ_OP_READ, 0, offset, buf, hb); if (error) return error; if (++handle->k >= MAP_PAGE_ENTRIES) { @@ -1534,7 +1533,7 @@ int swsusp_check(void) if (!IS_ERR(hib_resume_bdev)) { set_blocksize(hib_resume_bdev, PAGE_SIZE); clear_page(swsusp_header); - error = hib_submit_io(REQ_OP_READ, READ_SYNC, + error = hib_submit_io(REQ_OP_READ, 0, swsusp_resume_block, swsusp_header, NULL); if (error) @@ -1543,7 +1542,7 @@ int swsusp_check(void) if (!memcmp(HIBERNATE_SIG, swsusp_header->sig, 10)) { memcpy(swsusp_header->sig, swsusp_header->orig_sig, 10); /* Reset swap signature now */ - error = hib_submit_io(REQ_OP_WRITE, WRITE_SYNC, + error = hib_submit_io(REQ_OP_WRITE, REQ_SYNC, swsusp_resume_block, swsusp_header, NULL); } else { @@ -1588,11 +1587,11 @@ int swsusp_unmark(void) { int error; - hib_submit_io(REQ_OP_READ, READ_SYNC, swsusp_resume_block, + hib_submit_io(REQ_OP_READ, 0, swsusp_resume_block, swsusp_header, NULL); if (!memcmp(HIBERNATE_SIG,swsusp_header->sig, 10)) { memcpy(swsusp_header->sig,swsusp_header->orig_sig, 10); - error = hib_submit_io(REQ_OP_WRITE, WRITE_SYNC, + error = hib_submit_io(REQ_OP_WRITE, REQ_SYNC, swsusp_resume_block, swsusp_header, NULL); } else { -- cgit v1.2.3 From d38499530e5f170d30f32d3841fade204e63081d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 1 Nov 2016 07:40:11 -0600 Subject: fs: decouple READ and WRITE from the block layer ops Move READ and WRITE to kernel.h and don't define them in terms of block layer ops; they are our generic data direction indicators these days and have no more resemblance with the block layer ops. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/bio.h | 6 ++++++ include/linux/fs.h | 13 ------------- include/linux/kernel.h | 4 ++++ include/linux/uio.h | 2 +- 4 files changed, 11 insertions(+), 14 deletions(-) (limited to 'include/linux/fs.h') diff --git a/include/linux/bio.h b/include/linux/bio.h index 87ce64dafb93..fe9a17017608 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -62,6 +62,12 @@ #define bio_sectors(bio) ((bio)->bi_iter.bi_size >> 9) #define bio_end_sector(bio) ((bio)->bi_iter.bi_sector + bio_sectors((bio))) +/* + * Return the data direction, READ or WRITE. + */ +#define bio_data_dir(bio) \ + (op_is_write(bio_op(bio)) ? WRITE : READ) + /* * Check whether this bio carries any data or not. A NULL bio is allowed. */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 7a1b78ab7c15..0ad36e0c7fa7 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -151,11 +151,6 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, */ #define CHECK_IOVEC_ONLY -1 -#define RW_MASK REQ_OP_WRITE - -#define READ REQ_OP_READ -#define WRITE REQ_OP_WRITE - /* * Attribute flags. These should be or-ed together to figure out what * has been changed! @@ -2452,14 +2447,6 @@ extern void make_bad_inode(struct inode *); extern bool is_bad_inode(struct inode *); #ifdef CONFIG_BLOCK -/* - * return data direction, READ or WRITE - */ -static inline int bio_data_dir(struct bio *bio) -{ - return op_is_write(bio_op(bio)) ? WRITE : READ; -} - extern void check_disk_size_change(struct gendisk *disk, struct block_device *bdev); extern int revalidate_disk(struct gendisk *); diff --git a/include/linux/kernel.h b/include/linux/kernel.h index bc6ed52a39b9..01b6b460c34d 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -50,6 +50,10 @@ #define PTR_ALIGN(p, a) ((typeof(p))ALIGN((unsigned long)(p), (a))) #define IS_ALIGNED(x, a) (((x) & ((typeof(x))(a) - 1)) == 0) +/* generic data direction definitions */ +#define READ 0 +#define WRITE 1 + #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr)) #define u64_to_user_ptr(x) ( \ diff --git a/include/linux/uio.h b/include/linux/uio.h index 6e22b544d039..d5aba1512b8b 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -125,7 +125,7 @@ static inline bool iter_is_iovec(const struct iov_iter *i) * * The ?: is just for type safety. */ -#define iov_iter_rw(i) ((0 ? (struct iov_iter *)0 : (i))->type & RW_MASK) +#define iov_iter_rw(i) ((0 ? (struct iov_iter *)0 : (i))->type & (READ | WRITE)) /* * Cap the iov_iter by given limit; note that the second argument is -- cgit v1.2.3 From 1e3914d4cf4e14653b7917b0e965217465cb7a9c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 1 Nov 2016 07:40:12 -0600 Subject: block, fs: move submit_bio to bio.h This is where all the other bio operations live, so users must include bio.h anyway. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/bio.h | 2 ++ include/linux/fs.h | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux/fs.h') diff --git a/include/linux/bio.h b/include/linux/bio.h index fe9a17017608..5c604b4914bf 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -404,6 +404,8 @@ static inline struct bio *bio_clone_kmalloc(struct bio *bio, gfp_t gfp_mask) } +extern blk_qc_t submit_bio(struct bio *); + extern void bio_endio(struct bio *); static inline void bio_io_error(struct bio *bio) diff --git a/include/linux/fs.h b/include/linux/fs.h index 0ad36e0c7fa7..5b0a9b77534d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2717,7 +2717,6 @@ static inline void remove_inode_hash(struct inode *inode) extern void inode_sb_list_add(struct inode *inode); #ifdef CONFIG_BLOCK -extern blk_qc_t submit_bio(struct bio *); extern int bdev_read_only(struct block_device *); #endif extern int set_blocksize(struct block_device *, int); -- cgit v1.2.3 From 2f8b544477e627a42e66902e948d87f86554aeca Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 1 Nov 2016 07:40:13 -0600 Subject: block,fs: untangle fs.h and blk_types.h Nothing in fs.h should require blk_types.h to be included. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- fs/9p/vfs_addr.c | 1 + fs/cifs/connect.c | 1 + fs/cifs/transport.c | 1 + fs/gfs2/dir.c | 1 + fs/isofs/compress.c | 1 + fs/ntfs/logfile.c | 1 + fs/ocfs2/buffer_head_io.c | 1 + fs/orangefs/inode.c | 1 + fs/reiserfs/stree.c | 1 + fs/squashfs/block.c | 1 + fs/udf/dir.c | 1 + fs/udf/directory.c | 1 + fs/udf/inode.c | 1 + fs/ufs/balloc.c | 1 + include/linux/fs.h | 2 +- include/linux/swap.h | 1 + include/linux/writeback.h | 2 ++ lib/iov_iter.c | 1 + 18 files changed, 19 insertions(+), 1 deletion(-) (limited to 'include/linux/fs.h') diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c index 6181ad79e1a5..5ca1fb0043f6 100644 --- a/fs/9p/vfs_addr.c +++ b/fs/9p/vfs_addr.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index aab5227979e2..db726e8311ca 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -41,6 +41,7 @@ #include #include #include +#include #include "cifspdu.h" #include "cifsglob.h" diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 206a597b2293..5f02edc819af 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index 3cdde5f5d399..79113219be5f 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c @@ -62,6 +62,7 @@ #include #include #include +#include #include "gfs2.h" #include "incore.h" diff --git a/fs/isofs/compress.c b/fs/isofs/compress.c index 44af14b2e916..9bb2fe35799d 100644 --- a/fs/isofs/compress.c +++ b/fs/isofs/compress.c @@ -18,6 +18,7 @@ #include #include +#include #include #include diff --git a/fs/ntfs/logfile.c b/fs/ntfs/logfile.c index 761f12f7f3ef..353379ff6057 100644 --- a/fs/ntfs/logfile.c +++ b/fs/ntfs/logfile.c @@ -27,6 +27,7 @@ #include #include #include +#include #include "attrib.h" #include "aops.h" diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c index 8f040f88ade4..d9ebe11c8990 100644 --- a/fs/ocfs2/buffer_head_io.c +++ b/fs/ocfs2/buffer_head_io.c @@ -26,6 +26,7 @@ #include #include #include +#include #include diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c index ef3b4eb54cf2..551bc74ed2b8 100644 --- a/fs/orangefs/inode.c +++ b/fs/orangefs/inode.c @@ -8,6 +8,7 @@ * Linux VFS inode operations. */ +#include #include "protocol.h" #include "orangefs-kernel.h" #include "orangefs-bufmap.h" diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c index a97e352d05d3..0037aea97d39 100644 --- a/fs/reiserfs/stree.c +++ b/fs/reiserfs/stree.c @@ -11,6 +11,7 @@ #include #include #include +#include #include "reiserfs.h" #include #include diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c index ce62a380314f..2751476e6b6e 100644 --- a/fs/squashfs/block.c +++ b/fs/squashfs/block.c @@ -31,6 +31,7 @@ #include #include #include +#include #include "squashfs_fs.h" #include "squashfs_fs_sb.h" diff --git a/fs/udf/dir.c b/fs/udf/dir.c index aaec13c95253..2d0e028067eb 100644 --- a/fs/udf/dir.c +++ b/fs/udf/dir.c @@ -30,6 +30,7 @@ #include #include #include +#include #include "udf_i.h" #include "udf_sb.h" diff --git a/fs/udf/directory.c b/fs/udf/directory.c index 988d5352bdb8..7aa48bd7cbaf 100644 --- a/fs/udf/directory.c +++ b/fs/udf/directory.c @@ -16,6 +16,7 @@ #include #include +#include struct fileIdentDesc *udf_fileident_read(struct inode *dir, loff_t *nf_pos, struct udf_fileident_bh *fibh, diff --git a/fs/udf/inode.c b/fs/udf/inode.c index aad46401ede5..0f3db71753aa 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -38,6 +38,7 @@ #include #include #include +#include #include "udf_i.h" #include "udf_sb.h" diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c index 67e085d591d8..b035af54f538 100644 --- a/fs/ufs/balloc.c +++ b/fs/ufs/balloc.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include "ufs_fs.h" diff --git a/include/linux/fs.h b/include/linux/fs.h index 5b0a9b77534d..8533e9d59c29 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -28,7 +28,6 @@ #include #include #include -#include #include #include #include @@ -38,6 +37,7 @@ struct backing_dev_info; struct bdi_writeback; +struct bio; struct export_operations; struct hd_geometry; struct iovec; diff --git a/include/linux/swap.h b/include/linux/swap.h index a56523cefb9b..3a6aebc23001 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -11,6 +11,7 @@ #include #include #include +#include #include struct notifier_block; diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 797100e10010..e4c38703bf4e 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -10,6 +10,8 @@ #include #include +struct bio; + DECLARE_PER_CPU(int, dirty_throttle_leaks); /* diff --git a/lib/iov_iter.c b/lib/iov_iter.c index f0c7f1481bae..efc953c47572 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1,4 +1,5 @@ #include +#include #include #include #include -- cgit v1.2.3 From ba6379f7e6c7e51b3c0e92672bc61bb6961c2b5e Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 23 Nov 2016 12:53:00 +0100 Subject: fs: Provide function to get superblock with exclusive s_umount Quota code will need a variant of get_super_thawed() that returns superblock with s_umount held in exclusive mode to serialize quota on and quota off operations. Provide this functionality. Signed-off-by: Jan Kara --- fs/super.c | 80 ++++++++++++++++++++++++++++++++++++++++-------------- include/linux/fs.h | 2 ++ 2 files changed, 62 insertions(+), 20 deletions(-) (limited to 'include/linux/fs.h') diff --git a/fs/super.c b/fs/super.c index c183835566c1..f7f724230e2b 100644 --- a/fs/super.c +++ b/fs/super.c @@ -558,6 +558,13 @@ void drop_super(struct super_block *sb) EXPORT_SYMBOL(drop_super); +void drop_super_exclusive(struct super_block *sb) +{ + up_write(&sb->s_umount); + put_super(sb); +} +EXPORT_SYMBOL(drop_super_exclusive); + /** * iterate_supers - call function for all active superblocks * @f: function to call @@ -628,15 +635,7 @@ void iterate_supers_type(struct file_system_type *type, EXPORT_SYMBOL(iterate_supers_type); -/** - * get_super - get the superblock of a device - * @bdev: device to get the superblock for - * - * Scans the superblock list and finds the superblock of the file system - * mounted on the device given. %NULL is returned if no match is found. - */ - -struct super_block *get_super(struct block_device *bdev) +static struct super_block *__get_super(struct block_device *bdev, bool excl) { struct super_block *sb; @@ -651,11 +650,17 @@ rescan: if (sb->s_bdev == bdev) { sb->s_count++; spin_unlock(&sb_lock); - down_read(&sb->s_umount); + if (!excl) + down_read(&sb->s_umount); + else + down_write(&sb->s_umount); /* still alive? */ if (sb->s_root && (sb->s_flags & MS_BORN)) return sb; - up_read(&sb->s_umount); + if (!excl) + up_read(&sb->s_umount); + else + up_write(&sb->s_umount); /* nope, got unmounted */ spin_lock(&sb_lock); __put_super(sb); @@ -666,31 +671,66 @@ rescan: return NULL; } -EXPORT_SYMBOL(get_super); - /** - * get_super_thawed - get thawed superblock of a device + * get_super - get the superblock of a device * @bdev: device to get the superblock for * * Scans the superblock list and finds the superblock of the file system - * mounted on the device. The superblock is returned once it is thawed - * (or immediately if it was not frozen). %NULL is returned if no match - * is found. + * mounted on the device given. %NULL is returned if no match is found. */ -struct super_block *get_super_thawed(struct block_device *bdev) +struct super_block *get_super(struct block_device *bdev) +{ + return __get_super(bdev, false); +} +EXPORT_SYMBOL(get_super); + +static struct super_block *__get_super_thawed(struct block_device *bdev, + bool excl) { while (1) { - struct super_block *s = get_super(bdev); + struct super_block *s = __get_super(bdev, excl); if (!s || s->s_writers.frozen == SB_UNFROZEN) return s; - up_read(&s->s_umount); + if (!excl) + up_read(&s->s_umount); + else + up_write(&s->s_umount); wait_event(s->s_writers.wait_unfrozen, s->s_writers.frozen == SB_UNFROZEN); put_super(s); } } + +/** + * get_super_thawed - get thawed superblock of a device + * @bdev: device to get the superblock for + * + * Scans the superblock list and finds the superblock of the file system + * mounted on the device. The superblock is returned once it is thawed + * (or immediately if it was not frozen). %NULL is returned if no match + * is found. + */ +struct super_block *get_super_thawed(struct block_device *bdev) +{ + return __get_super_thawed(bdev, false); +} EXPORT_SYMBOL(get_super_thawed); +/** + * get_super_exclusive_thawed - get thawed superblock of a device + * @bdev: device to get the superblock for + * + * Scans the superblock list and finds the superblock of the file system + * mounted on the device. The superblock is returned once it is thawed + * (or immediately if it was not frozen) and s_umount semaphore is held + * in exclusive mode. %NULL is returned if no match is found. + */ +struct super_block *get_super_exclusive_thawed(struct block_device *bdev) +{ + return __get_super_thawed(bdev, true); +} +EXPORT_SYMBOL(get_super_exclusive_thawed); + /** * get_active_super - get an active reference to the superblock of a device * @bdev: device to get the superblock for diff --git a/include/linux/fs.h b/include/linux/fs.h index dc0478c07b2a..d04cfdefcd11 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2949,8 +2949,10 @@ extern void put_filesystem(struct file_system_type *fs); extern struct file_system_type *get_fs_type(const char *name); extern struct super_block *get_super(struct block_device *); extern struct super_block *get_super_thawed(struct block_device *); +extern struct super_block *get_super_exclusive_thawed(struct block_device *bdev); extern struct super_block *get_active_super(struct block_device *bdev); extern void drop_super(struct super_block *sb); +extern void drop_super_exclusive(struct super_block *sb); extern void iterate_supers(void (*)(struct super_block *, void *), void *); extern void iterate_supers_type(struct file_system_type *, void (*)(struct super_block *, void *), void *); -- cgit v1.2.3 From 640eb7e7b5242af53c456552a526d0080e6333f8 Mon Sep 17 00:00:00 2001 From: Mickaël Salaün Date: Mon, 14 Nov 2016 22:14:35 +0100 Subject: fs: Constify path_is_under()'s arguments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The function path_is_under() doesn't modify the paths pointed by its arguments but only browse them. Constifying this pointers make a cleaner interface to be used by (future) code which may only have access to const struct path pointers (e.g. LSM hooks). Signed-off-by: Mickaël Salaün Cc: Alexander Viro Signed-off-by: Al Viro --- fs/namespace.c | 2 +- include/linux/fs.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux/fs.h') diff --git a/fs/namespace.c b/fs/namespace.c index e6c234b1a645..4d80a5066a1f 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2997,7 +2997,7 @@ bool is_path_reachable(struct mount *mnt, struct dentry *dentry, return &mnt->mnt == root->mnt && is_subdir(dentry, root->dentry); } -bool path_is_under(struct path *path1, struct path *path2) +bool path_is_under(const struct path *path1, const struct path *path2) { bool res; read_seqlock_excl(&mount_lock); diff --git a/include/linux/fs.h b/include/linux/fs.h index dc0478c07b2a..f96501b51c49 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2709,7 +2709,7 @@ extern struct file * open_exec(const char *); /* fs/dcache.c -- generic fs support functions */ extern bool is_subdir(struct dentry *, struct dentry *); -extern bool path_is_under(struct path *, struct path *); +extern bool path_is_under(const struct path *, const struct path *); extern char *file_path(struct file *, char *, int); -- cgit v1.2.3 From ca71cf71eeda04dc9ad18271504e499013af5415 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 20 Nov 2016 19:45:28 -0500 Subject: namespace.c: constify struct path passed to a bunch of primitives Signed-off-by: Al Viro --- fs/internal.h | 2 +- fs/namespace.c | 8 ++++---- include/linux/fs.h | 2 +- include/linux/mount.h | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux/fs.h') diff --git a/fs/internal.h b/fs/internal.h index f4da3341b4a3..3e460159d835 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -62,7 +62,7 @@ extern int vfs_path_lookup(struct dentry *, struct vfsmount *, extern void *copy_mount_options(const void __user *); extern char *copy_mount_string(const void __user *); -extern struct vfsmount *lookup_mnt(struct path *); +extern struct vfsmount *lookup_mnt(const struct path *); extern int finish_automount(struct vfsmount *, struct path *); extern int sb_prepare_remount_readonly(struct super_block *); diff --git a/fs/namespace.c b/fs/namespace.c index 4d80a5066a1f..9ad88a45b3e3 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -678,7 +678,7 @@ out: * * lookup_mnt takes a reference to the found vfsmount. */ -struct vfsmount *lookup_mnt(struct path *path) +struct vfsmount *lookup_mnt(const struct path *path) { struct mount *child_mnt; struct vfsmount *m; @@ -1159,7 +1159,7 @@ struct vfsmount *mntget(struct vfsmount *mnt) } EXPORT_SYMBOL(mntget); -struct vfsmount *mnt_clone_internal(struct path *path) +struct vfsmount *mnt_clone_internal(const struct path *path) { struct mount *p; p = clone_mnt(real_mount(path->mnt), path->dentry, CL_PRIVATE); @@ -1758,7 +1758,7 @@ out: /* Caller should check returned pointer for errors */ -struct vfsmount *collect_mounts(struct path *path) +struct vfsmount *collect_mounts(const struct path *path) { struct mount *tree; namespace_lock(); @@ -1791,7 +1791,7 @@ void drop_collected_mounts(struct vfsmount *mnt) * * Release with mntput(). */ -struct vfsmount *clone_private_mount(struct path *path) +struct vfsmount *clone_private_mount(const struct path *path) { struct mount *old_mnt = real_mount(path->mnt); struct mount *new_mnt; diff --git a/include/linux/fs.h b/include/linux/fs.h index f96501b51c49..3056fe46f336 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2123,7 +2123,7 @@ extern int may_umount_tree(struct vfsmount *); extern int may_umount(struct vfsmount *); extern long do_mount(const char *, const char __user *, const char *, unsigned long, void *); -extern struct vfsmount *collect_mounts(struct path *); +extern struct vfsmount *collect_mounts(const struct path *); extern void drop_collected_mounts(struct vfsmount *); extern int iterate_mounts(int (*)(struct vfsmount *, void *), void *, struct vfsmount *); diff --git a/include/linux/mount.h b/include/linux/mount.h index 1172cce949a4..cf2b5784b649 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -79,12 +79,12 @@ extern void mnt_drop_write(struct vfsmount *mnt); extern void mnt_drop_write_file(struct file *file); extern void mntput(struct vfsmount *mnt); extern struct vfsmount *mntget(struct vfsmount *mnt); -extern struct vfsmount *mnt_clone_internal(struct path *path); +extern struct vfsmount *mnt_clone_internal(const struct path *path); extern int __mnt_is_readonly(struct vfsmount *mnt); extern bool mnt_may_suid(struct vfsmount *mnt); struct path; -extern struct vfsmount *clone_private_mount(struct path *path); +extern struct vfsmount *clone_private_mount(const struct path *path); struct file_system_type; extern struct vfsmount *vfs_kern_mount(struct file_system_type *type, -- cgit v1.2.3 From f0bb5aaf2c51267c49ed5e2c6103df22acfe30f5 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 20 Nov 2016 20:27:12 -0500 Subject: vfs: misc struct path constification Signed-off-by: Al Viro --- fs/namei.c | 4 ++-- fs/statfs.c | 2 +- fs/utimes.c | 2 +- include/linux/fs.h | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux/fs.h') diff --git a/fs/namei.c b/fs/namei.c index 5b4eed221530..1c8f4386b03f 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2895,7 +2895,7 @@ bool may_open_dev(const struct path *path) !(path->mnt->mnt_sb->s_iflags & SB_I_NODEV); } -static int may_open(struct path *path, int acc_mode, int flag) +static int may_open(const struct path *path, int acc_mode, int flag) { struct dentry *dentry = path->dentry; struct inode *inode = dentry->d_inode; @@ -2945,7 +2945,7 @@ static int may_open(struct path *path, int acc_mode, int flag) static int handle_truncate(struct file *filp) { - struct path *path = &filp->f_path; + const struct path *path = &filp->f_path; struct inode *inode = path->dentry->d_inode; int error = get_write_access(inode); if (error) diff --git a/fs/statfs.c b/fs/statfs.c index 083dc0ac9140..13ae259d4879 100644 --- a/fs/statfs.c +++ b/fs/statfs.c @@ -63,7 +63,7 @@ static int statfs_by_dentry(struct dentry *dentry, struct kstatfs *buf) return retval; } -int vfs_statfs(struct path *path, struct kstatfs *buf) +int vfs_statfs(const struct path *path, struct kstatfs *buf) { int error; diff --git a/fs/utimes.c b/fs/utimes.c index 22307cdf7014..5fdb505e307c 100644 --- a/fs/utimes.c +++ b/fs/utimes.c @@ -48,7 +48,7 @@ static bool nsec_valid(long nsec) return nsec >= 0 && nsec <= 999999999; } -static int utimes_common(struct path *path, struct timespec *times) +static int utimes_common(const struct path *path, struct timespec *times) { int error; struct iattr newattrs; diff --git a/include/linux/fs.h b/include/linux/fs.h index 3056fe46f336..0e177d395efb 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2127,7 +2127,7 @@ extern struct vfsmount *collect_mounts(const struct path *); extern void drop_collected_mounts(struct vfsmount *); extern int iterate_mounts(int (*)(struct vfsmount *, void *), void *, struct vfsmount *); -extern int vfs_statfs(struct path *, struct kstatfs *); +extern int vfs_statfs(const struct path *, struct kstatfs *); extern int user_statfs(const char __user *, struct kstatfs *); extern int fd_statfs(int, struct kstatfs *); extern int vfs_ustat(dev_t, struct kstatfs *); -- cgit v1.2.3 From fd4a0edf2a3d781c6ae07d2810776ce22302ee1c Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 9 Dec 2016 16:45:04 +0100 Subject: vfs: replace calling i_op->readlink with vfs_readlink() Also check d_is_symlink() in callers instead of inode->i_op->readlink because following patches will allow NULL ->readlink for symlinks. Signed-off-by: Miklos Szeredi --- fs/namei.c | 21 +++++++++++++++++++++ fs/nfsd/nfs4xdr.c | 8 ++++---- fs/nfsd/vfs.c | 6 ++---- fs/stat.c | 8 +++++--- fs/xfs/xfs_ioctl.c | 4 ++-- include/linux/fs.h | 1 + 6 files changed, 35 insertions(+), 13 deletions(-) (limited to 'include/linux/fs.h') diff --git a/fs/namei.c b/fs/namei.c index 5b4eed221530..12a4159de72a 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -4668,6 +4668,27 @@ int generic_readlink(struct dentry *dentry, char __user *buffer, int buflen) } EXPORT_SYMBOL(generic_readlink); +/** + * vfs_readlink - copy symlink body into userspace buffer + * @dentry: dentry on which to get symbolic link + * @buffer: user memory pointer + * @buflen: size of buffer + * + * Does not touch atime. That's up to the caller if necessary + * + * Does not call security hook. + */ +int vfs_readlink(struct dentry *dentry, char __user *buffer, int buflen) +{ + struct inode *inode = d_inode(dentry); + + if (!inode->i_op->readlink) + return -EINVAL; + + return inode->i_op->readlink(dentry, buffer, buflen); +} +EXPORT_SYMBOL(vfs_readlink); + /** * vfs_get_link - get symlink body * @dentry: dentry on which to get symbolic link diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index c2d2895a1ec1..645e1e3c3110 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3576,10 +3576,10 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd if (!p) return nfserr_resource; /* - * XXX: By default, the ->readlink() VFS op will truncate symlinks - * if they would overflow the buffer. Is this kosher in NFSv4? If - * not, one easy fix is: if ->readlink() precisely fills the buffer, - * assume that truncation occurred, and return NFS4ERR_RESOURCE. + * XXX: By default, vfs_readlink() will truncate symlinks if they + * would overflow the buffer. Is this kosher in NFSv4? If not, one + * easy fix is: if vfs_readlink() precisely fills the buffer, assume + * that truncation occurred, and return NFS4ERR_RESOURCE. */ nfserr = nfsd_readlink(readlink->rl_rqstp, readlink->rl_fhp, (char *)p, &maxcount); diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 8ca642fe9b21..b854f02c1c36 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1451,7 +1451,6 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, __be32 nfsd_readlink(struct svc_rqst *rqstp, struct svc_fh *fhp, char *buf, int *lenp) { - struct inode *inode; mm_segment_t oldfs; __be32 err; int host_err; @@ -1463,10 +1462,9 @@ nfsd_readlink(struct svc_rqst *rqstp, struct svc_fh *fhp, char *buf, int *lenp) path.mnt = fhp->fh_export->ex_path.mnt; path.dentry = fhp->fh_dentry; - inode = d_inode(path.dentry); err = nfserr_inval; - if (!inode->i_op->readlink) + if (!d_is_symlink(path.dentry)) goto out; touch_atime(&path); @@ -1475,7 +1473,7 @@ nfsd_readlink(struct svc_rqst *rqstp, struct svc_fh *fhp, char *buf, int *lenp) */ oldfs = get_fs(); set_fs(KERNEL_DS); - host_err = inode->i_op->readlink(path.dentry, (char __user *)buf, *lenp); + host_err = vfs_readlink(path.dentry, (char __user *)buf, *lenp); set_fs(oldfs); if (host_err < 0) diff --git a/fs/stat.c b/fs/stat.c index bc045c7994e1..0b210c3ead5c 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -329,12 +329,14 @@ retry: struct inode *inode = d_backing_inode(path.dentry); error = empty ? -ENOENT : -EINVAL; - if (inode->i_op->readlink) { + /* + * AFS mountpoints allow readlink(2) but are not symlinks + */ + if (d_is_symlink(path.dentry) || inode->i_op->readlink) { error = security_inode_readlink(path.dentry); if (!error) { touch_atime(&path); - error = inode->i_op->readlink(path.dentry, - buf, bufsiz); + error = vfs_readlink(path.dentry, buf, bufsiz); } } path_put(&path); diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index c245bed3249b..9b12f7c993e9 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -287,7 +287,7 @@ xfs_readlink_by_handle( return PTR_ERR(dentry); /* Restrict this handle operation to symlinks only. */ - if (!d_inode(dentry)->i_op->readlink) { + if (!d_is_symlink(dentry)) { error = -EINVAL; goto out_dput; } @@ -297,7 +297,7 @@ xfs_readlink_by_handle( goto out_dput; } - error = d_inode(dentry)->i_op->readlink(dentry, hreq->ohandle, olen); + error = vfs_readlink(dentry, hreq->ohandle, olen); out_dput: dput(dentry); diff --git a/include/linux/fs.h b/include/linux/fs.h index dc0478c07b2a..eba20d1c068d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2935,6 +2935,7 @@ extern int vfs_lstat(const char __user *, struct kstat *); extern int vfs_fstat(unsigned int, struct kstat *); extern int vfs_fstatat(int , const char __user *, struct kstat *, int); extern const char *vfs_get_link(struct dentry *, struct delayed_call *); +extern int vfs_readlink(struct dentry *, char __user *, int); extern int __generic_block_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, -- cgit v1.2.3 From 76fca90e9f3abc82114d9d02d8e14e0324a18ca2 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 9 Dec 2016 16:45:04 +0100 Subject: vfs: default to generic_readlink() If i_op->readlink is NULL, but i_op->get_link is set then vfs_readlink() defaults to calling generic_readlink(). The IOP_DEFAULT_READLINK flag indicates that the above conditions are met and the default action can be taken. Signed-off-by: Miklos Szeredi --- Documentation/filesystems/porting | 4 ++++ Documentation/filesystems/vfs.txt | 9 ++++++--- fs/namei.c | 15 ++++++++++++--- include/linux/fs.h | 1 + 4 files changed, 23 insertions(+), 6 deletions(-) (limited to 'include/linux/fs.h') diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting index bdd025ceb763..95280079c0b3 100644 --- a/Documentation/filesystems/porting +++ b/Documentation/filesystems/porting @@ -596,3 +596,7 @@ in your dentry operations instead. [mandatory] ->rename() has an added flags argument. Any flags not handled by the filesystem should result in EINVAL being returned. +-- +[recommended] + ->readlink is optional for symlinks. Don't set, unless filesystem needs + to fake something for readlink(2). diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index b5039a00caaf..038241123ca5 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -451,9 +451,6 @@ otherwise noted. exist; this is checked by the VFS. Unlike plain rename, source and target may be of different type. - readlink: called by the readlink(2) system call. Only required if - you want to support reading symbolic links - get_link: called by the VFS to follow a symbolic link to the inode it points to. Only required if you want to support symbolic links. This method returns the symlink body @@ -468,6 +465,12 @@ otherwise noted. argument. If request can't be handled without leaving RCU mode, have it return ERR_PTR(-ECHILD). + readlink: this is now just an override for use by readlink(2) for the + cases when ->get_link uses nd_jump_link() or object is not in + fact a symlink. Normally filesystems should only implement + ->get_link for symlinks and readlink(2) will automatically use + that. + permission: called by the VFS to check for access rights on a POSIX-like filesystem. diff --git a/fs/namei.c b/fs/namei.c index 12a4159de72a..b87465d67c60 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -4682,10 +4682,19 @@ int vfs_readlink(struct dentry *dentry, char __user *buffer, int buflen) { struct inode *inode = d_inode(dentry); - if (!inode->i_op->readlink) - return -EINVAL; + if (unlikely(!(inode->i_opflags & IOP_DEFAULT_READLINK))) { + if (unlikely(inode->i_op->readlink)) + return inode->i_op->readlink(dentry, buffer, buflen); + + if (!d_is_symlink(dentry)) + return -EINVAL; + + spin_lock(&inode->i_lock); + inode->i_opflags |= IOP_DEFAULT_READLINK; + spin_unlock(&inode->i_lock); + } - return inode->i_op->readlink(dentry, buffer, buflen); + return generic_readlink(dentry, buffer, buflen); } EXPORT_SYMBOL(vfs_readlink); diff --git a/include/linux/fs.h b/include/linux/fs.h index eba20d1c068d..f6c206eae6ac 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -595,6 +595,7 @@ is_uncached_acl(struct posix_acl *acl) #define IOP_LOOKUP 0x0002 #define IOP_NOFOLLOW 0x0004 #define IOP_XATTR 0x0008 +#define IOP_DEFAULT_READLINK 0x0010 /* * Keep mostly read-only and often accessed (especially for -- cgit v1.2.3 From d16744ec8ad011793144bb932ce822cc0c1733cb Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 9 Dec 2016 16:45:04 +0100 Subject: vfs: make generic_readlink() static Signed-off-by: Miklos Szeredi --- fs/namei.c | 4 ++-- include/linux/fs.h | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux/fs.h') diff --git a/fs/namei.c b/fs/namei.c index c9baf3b3ffd7..c248a9e1edd2 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -4650,7 +4650,8 @@ out: * have ->get_link() not calling nd_jump_link(). Using (or not using) it * for any given inode is up to filesystem. */ -int generic_readlink(struct dentry *dentry, char __user *buffer, int buflen) +static int generic_readlink(struct dentry *dentry, char __user *buffer, + int buflen) { DEFINE_DELAYED_CALL(done); struct inode *inode = d_inode(dentry); @@ -4666,7 +4667,6 @@ int generic_readlink(struct dentry *dentry, char __user *buffer, int buflen) do_delayed_call(&done); return res; } -EXPORT_SYMBOL(generic_readlink); /** * vfs_readlink - copy symlink body into userspace buffer diff --git a/include/linux/fs.h b/include/linux/fs.h index f6c206eae6ac..e343d784651a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2915,7 +2915,6 @@ extern int __page_symlink(struct inode *inode, const char *symname, int len, extern int page_symlink(struct inode *inode, const char *symname, int len); extern const struct inode_operations page_symlink_inode_operations; extern void kfree_link(void *); -extern int generic_readlink(struct dentry *, char __user *, int); extern void generic_fillattr(struct inode *, struct kstat *); int vfs_getattr_nosec(struct path *path, struct kstat *stat); extern int vfs_getattr(struct path *, struct kstat *); -- cgit v1.2.3 From 876bec6f9bbfcb394916d17e35226b086c04dc45 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Fri, 9 Dec 2016 16:18:30 -0800 Subject: vfs: refactor clone/dedupe_file_range common functions Hoist both the XFS reflink inode state and preparation code and the XFS file blocks compare functions into the VFS so that ocfs2 can take advantage of it for reflink and dedupe. Signed-off-by: Darrick J. Wong --- fs/read_write.c | 204 ++++++++++++++++++++++++++++++++++++++++++++++++ fs/xfs/xfs_reflink.c | 213 +++------------------------------------------------ include/linux/fs.h | 6 ++ 3 files changed, 219 insertions(+), 204 deletions(-) (limited to 'include/linux/fs.h') diff --git a/fs/read_write.c b/fs/read_write.c index 6674a4b83c54..dbf3f7ffdf3f 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1667,6 +1667,114 @@ static int clone_verify_area(struct file *file, loff_t pos, u64 len, bool write) return security_file_permission(file, write ? MAY_WRITE : MAY_READ); } +/* + * Check that the two inodes are eligible for cloning, the ranges make + * sense, and then flush all dirty data. Caller must ensure that the + * inodes have been locked against any other modifications. + */ +int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in, + struct inode *inode_out, loff_t pos_out, + u64 *len, bool is_dedupe) +{ + loff_t bs = inode_out->i_sb->s_blocksize; + loff_t blen; + loff_t isize; + bool same_inode = (inode_in == inode_out); + int ret; + + /* Don't touch certain kinds of inodes */ + if (IS_IMMUTABLE(inode_out)) + return -EPERM; + + if (IS_SWAPFILE(inode_in) || IS_SWAPFILE(inode_out)) + return -ETXTBSY; + + /* Don't reflink dirs, pipes, sockets... */ + if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode)) + return -EISDIR; + if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode)) + return -EINVAL; + + /* Are we going all the way to the end? */ + isize = i_size_read(inode_in); + if (isize == 0) { + *len = 0; + return 0; + } + + /* Zero length dedupe exits immediately; reflink goes to EOF. */ + if (*len == 0) { + if (is_dedupe) { + *len = 0; + return 0; + } + *len = isize - pos_in; + } + + /* Ensure offsets don't wrap and the input is inside i_size */ + if (pos_in + *len < pos_in || pos_out + *len < pos_out || + pos_in + *len > isize) + return -EINVAL; + + /* Don't allow dedupe past EOF in the dest file */ + if (is_dedupe) { + loff_t disize; + + disize = i_size_read(inode_out); + if (pos_out >= disize || pos_out + *len > disize) + return -EINVAL; + } + + /* If we're linking to EOF, continue to the block boundary. */ + if (pos_in + *len == isize) + blen = ALIGN(isize, bs) - pos_in; + else + blen = *len; + + /* Only reflink if we're aligned to block boundaries */ + if (!IS_ALIGNED(pos_in, bs) || !IS_ALIGNED(pos_in + blen, bs) || + !IS_ALIGNED(pos_out, bs) || !IS_ALIGNED(pos_out + blen, bs)) + return -EINVAL; + + /* Don't allow overlapped reflink within the same file */ + if (same_inode) { + if (pos_out + blen > pos_in && pos_out < pos_in + blen) + return -EINVAL; + } + + /* Wait for the completion of any pending IOs on both files */ + inode_dio_wait(inode_in); + if (!same_inode) + inode_dio_wait(inode_out); + + ret = filemap_write_and_wait_range(inode_in->i_mapping, + pos_in, pos_in + *len - 1); + if (ret) + return ret; + + ret = filemap_write_and_wait_range(inode_out->i_mapping, + pos_out, pos_out + *len - 1); + if (ret) + return ret; + + /* + * Check that the extents are the same. + */ + if (is_dedupe) { + bool is_same = false; + + ret = vfs_dedupe_file_range_compare(inode_in, pos_in, + inode_out, pos_out, *len, &is_same); + if (ret) + return ret; + if (!is_same) + return -EBADE; + } + + return 0; +} +EXPORT_SYMBOL(vfs_clone_file_prep_inodes); + int vfs_clone_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, u64 len) { @@ -1718,6 +1826,102 @@ int vfs_clone_file_range(struct file *file_in, loff_t pos_in, } EXPORT_SYMBOL(vfs_clone_file_range); +/* + * Read a page's worth of file data into the page cache. Return the page + * locked. + */ +static struct page *vfs_dedupe_get_page(struct inode *inode, loff_t offset) +{ + struct address_space *mapping; + struct page *page; + pgoff_t n; + + n = offset >> PAGE_SHIFT; + mapping = inode->i_mapping; + page = read_mapping_page(mapping, n, NULL); + if (IS_ERR(page)) + return page; + if (!PageUptodate(page)) { + put_page(page); + return ERR_PTR(-EIO); + } + lock_page(page); + return page; +} + +/* + * Compare extents of two files to see if they are the same. + * Caller must have locked both inodes to prevent write races. + */ +int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff, + struct inode *dest, loff_t destoff, + loff_t len, bool *is_same) +{ + loff_t src_poff; + loff_t dest_poff; + void *src_addr; + void *dest_addr; + struct page *src_page; + struct page *dest_page; + loff_t cmp_len; + bool same; + int error; + + error = -EINVAL; + same = true; + while (len) { + src_poff = srcoff & (PAGE_SIZE - 1); + dest_poff = destoff & (PAGE_SIZE - 1); + cmp_len = min(PAGE_SIZE - src_poff, + PAGE_SIZE - dest_poff); + cmp_len = min(cmp_len, len); + if (cmp_len <= 0) + goto out_error; + + src_page = vfs_dedupe_get_page(src, srcoff); + if (IS_ERR(src_page)) { + error = PTR_ERR(src_page); + goto out_error; + } + dest_page = vfs_dedupe_get_page(dest, destoff); + if (IS_ERR(dest_page)) { + error = PTR_ERR(dest_page); + unlock_page(src_page); + put_page(src_page); + goto out_error; + } + src_addr = kmap_atomic(src_page); + dest_addr = kmap_atomic(dest_page); + + flush_dcache_page(src_page); + flush_dcache_page(dest_page); + + if (memcmp(src_addr + src_poff, dest_addr + dest_poff, cmp_len)) + same = false; + + kunmap_atomic(dest_addr); + kunmap_atomic(src_addr); + unlock_page(dest_page); + unlock_page(src_page); + put_page(dest_page); + put_page(src_page); + + if (!same) + break; + + srcoff += cmp_len; + destoff += cmp_len; + len -= cmp_len; + } + + *is_same = same; + return 0; + +out_error: + return error; +} +EXPORT_SYMBOL(vfs_dedupe_file_range_compare); + int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same) { struct file_dedupe_range_info *info; diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index a279b4e7f5fe..95d6828967f0 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -1164,111 +1164,6 @@ err: return error; } -/* - * Read a page's worth of file data into the page cache. Return the page - * locked. - */ -static struct page * -xfs_get_page( - struct inode *inode, - xfs_off_t offset) -{ - struct address_space *mapping; - struct page *page; - pgoff_t n; - - n = offset >> PAGE_SHIFT; - mapping = inode->i_mapping; - page = read_mapping_page(mapping, n, NULL); - if (IS_ERR(page)) - return page; - if (!PageUptodate(page)) { - put_page(page); - return ERR_PTR(-EIO); - } - lock_page(page); - return page; -} - -/* - * Compare extents of two files to see if they are the same. - */ -static int -xfs_compare_extents( - struct inode *src, - xfs_off_t srcoff, - struct inode *dest, - xfs_off_t destoff, - xfs_off_t len, - bool *is_same) -{ - xfs_off_t src_poff; - xfs_off_t dest_poff; - void *src_addr; - void *dest_addr; - struct page *src_page; - struct page *dest_page; - xfs_off_t cmp_len; - bool same; - int error; - - error = -EINVAL; - same = true; - while (len) { - src_poff = srcoff & (PAGE_SIZE - 1); - dest_poff = destoff & (PAGE_SIZE - 1); - cmp_len = min(PAGE_SIZE - src_poff, - PAGE_SIZE - dest_poff); - cmp_len = min(cmp_len, len); - ASSERT(cmp_len > 0); - - trace_xfs_reflink_compare_extents(XFS_I(src), srcoff, cmp_len, - XFS_I(dest), destoff); - - src_page = xfs_get_page(src, srcoff); - if (IS_ERR(src_page)) { - error = PTR_ERR(src_page); - goto out_error; - } - dest_page = xfs_get_page(dest, destoff); - if (IS_ERR(dest_page)) { - error = PTR_ERR(dest_page); - unlock_page(src_page); - put_page(src_page); - goto out_error; - } - src_addr = kmap_atomic(src_page); - dest_addr = kmap_atomic(dest_page); - - flush_dcache_page(src_page); - flush_dcache_page(dest_page); - - if (memcmp(src_addr + src_poff, dest_addr + dest_poff, cmp_len)) - same = false; - - kunmap_atomic(dest_addr); - kunmap_atomic(src_addr); - unlock_page(dest_page); - unlock_page(src_page); - put_page(dest_page); - put_page(src_page); - - if (!same) - break; - - srcoff += cmp_len; - destoff += cmp_len; - len -= cmp_len; - } - - *is_same = same; - return 0; - -out_error: - trace_xfs_reflink_compare_extents_error(XFS_I(dest), error, _RET_IP_); - return error; -} - /* * Link a range of blocks from one file to another. */ @@ -1286,14 +1181,11 @@ xfs_reflink_remap_range( struct inode *inode_out = file_inode(file_out); struct xfs_inode *dest = XFS_I(inode_out); struct xfs_mount *mp = src->i_mount; - loff_t bs = inode_out->i_sb->s_blocksize; bool same_inode = (inode_in == inode_out); xfs_fileoff_t sfsbno, dfsbno; xfs_filblks_t fsblen; xfs_extlen_t cowextsize; - loff_t isize; ssize_t ret; - loff_t blen; if (!xfs_sb_version_hasreflink(&mp->m_sb)) return -EOPNOTSUPP; @@ -1310,26 +1202,8 @@ xfs_reflink_remap_range( xfs_lock_two_inodes(src, dest, XFS_MMAPLOCK_EXCL); } - /* Don't touch certain kinds of inodes */ - ret = -EPERM; - if (IS_IMMUTABLE(inode_out)) - goto out_unlock; - - ret = -ETXTBSY; - if (IS_SWAPFILE(inode_in) || IS_SWAPFILE(inode_out)) - goto out_unlock; - - - /* Don't reflink dirs, pipes, sockets... */ - ret = -EISDIR; - if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode)) - goto out_unlock; + /* Check file eligibility and prepare for block sharing. */ ret = -EINVAL; - if (S_ISFIFO(inode_in->i_mode) || S_ISFIFO(inode_out->i_mode)) - goto out_unlock; - if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode)) - goto out_unlock; - /* Don't reflink realtime inodes */ if (XFS_IS_REALTIME_INODE(src) || XFS_IS_REALTIME_INODE(dest)) goto out_unlock; @@ -1338,91 +1212,18 @@ xfs_reflink_remap_range( if (IS_DAX(inode_in) || IS_DAX(inode_out)) goto out_unlock; - /* Are we going all the way to the end? */ - isize = i_size_read(inode_in); - if (isize == 0) { - ret = 0; - goto out_unlock; - } - - if (len == 0) - len = isize - pos_in; - - /* Ensure offsets don't wrap and the input is inside i_size */ - if (pos_in + len < pos_in || pos_out + len < pos_out || - pos_in + len > isize) - goto out_unlock; - - /* Don't allow dedupe past EOF in the dest file */ - if (is_dedupe) { - loff_t disize; - - disize = i_size_read(inode_out); - if (pos_out >= disize || pos_out + len > disize) - goto out_unlock; - } - - /* If we're linking to EOF, continue to the block boundary. */ - if (pos_in + len == isize) - blen = ALIGN(isize, bs) - pos_in; - else - blen = len; - - /* Only reflink if we're aligned to block boundaries */ - if (!IS_ALIGNED(pos_in, bs) || !IS_ALIGNED(pos_in + blen, bs) || - !IS_ALIGNED(pos_out, bs) || !IS_ALIGNED(pos_out + blen, bs)) - goto out_unlock; - - /* Don't allow overlapped reflink within the same file */ - if (same_inode) { - if (pos_out + blen > pos_in && pos_out < pos_in + blen) - goto out_unlock; - } - - /* Wait for the completion of any pending IOs on both files */ - inode_dio_wait(inode_in); - if (!same_inode) - inode_dio_wait(inode_out); - - ret = filemap_write_and_wait_range(inode_in->i_mapping, - pos_in, pos_in + len - 1); - if (ret) - goto out_unlock; - - ret = filemap_write_and_wait_range(inode_out->i_mapping, - pos_out, pos_out + len - 1); - if (ret) + ret = vfs_clone_file_prep_inodes(inode_in, pos_in, inode_out, pos_out, + &len, is_dedupe); + if (ret || len == 0) goto out_unlock; trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out); - /* - * Check that the extents are the same. - */ - if (is_dedupe) { - bool is_same = false; - - ret = xfs_compare_extents(inode_in, pos_in, inode_out, pos_out, - len, &is_same); - if (ret) - goto out_unlock; - if (!is_same) { - ret = -EBADE; - goto out_unlock; - } - } - + /* Set flags and remap blocks. */ ret = xfs_reflink_set_inode_flag(src, dest); if (ret) goto out_unlock; - /* - * Invalidate the page cache so that we can clear any CoW mappings - * in the destination file. - */ - truncate_inode_pages_range(&inode_out->i_data, pos_out, - PAGE_ALIGN(pos_out + len) - 1); - dfsbno = XFS_B_TO_FSBT(mp, pos_out); sfsbno = XFS_B_TO_FSBT(mp, pos_in); fsblen = XFS_B_TO_FSB(mp, len); @@ -1431,6 +1232,10 @@ xfs_reflink_remap_range( if (ret) goto out_unlock; + /* Zap any page cache for the destination file's range. */ + truncate_inode_pages_range(&inode_out->i_data, pos_out, + PAGE_ALIGN(pos_out + len) - 1); + /* * Carry the cowextsize hint from src to dest if we're sharing the * entire source file to the entire destination file, the source file diff --git a/include/linux/fs.h b/include/linux/fs.h index dc0478c07b2a..caea736fa09c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1778,8 +1778,14 @@ extern ssize_t vfs_writev(struct file *, const struct iovec __user *, unsigned long, loff_t *, int); extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *, loff_t, size_t, unsigned int); +extern int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in, + struct inode *inode_out, loff_t pos_out, + u64 *len, bool is_dedupe); extern int vfs_clone_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, u64 len); +extern int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff, + struct inode *dest, loff_t destoff, + loff_t len, bool *is_same); extern int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same); -- cgit v1.2.3 From 031a072a0b8ac2646def77aa310a95016c884bb0 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Fri, 23 Sep 2016 11:38:11 +0300 Subject: vfs: call vfs_clone_file_range() under freeze protection Move sb_start_write()/sb_end_write() out of the vfs helper and up into the ioctl handler. Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/ioctl.c | 2 +- fs/nfsd/vfs.c | 3 +-- fs/read_write.c | 3 --- include/linux/fs.h | 13 +++++++++++++ 4 files changed, 15 insertions(+), 6 deletions(-) (limited to 'include/linux/fs.h') diff --git a/fs/ioctl.c b/fs/ioctl.c index 6715b7208835..cb9b02940805 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -226,7 +226,7 @@ static long ioctl_file_clone(struct file *dst_file, unsigned long srcfd, ret = -EXDEV; if (src_file.file->f_path.mnt != dst_file->f_path.mnt) goto fdput; - ret = vfs_clone_file_range(src_file.file, off, dst_file, destoff, olen); + ret = do_clone_file_range(src_file.file, off, dst_file, destoff, olen); fdput: fdput(src_file); return ret; diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 8ca642fe9b21..357e844aee84 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -509,8 +509,7 @@ __be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp, __be32 nfsd4_clone_file_range(struct file *src, u64 src_pos, struct file *dst, u64 dst_pos, u64 count) { - return nfserrno(vfs_clone_file_range(src, src_pos, dst, dst_pos, - count)); + return nfserrno(do_clone_file_range(src, src_pos, dst, dst_pos, count)); } ssize_t nfsd_copy_file_range(struct file *src, u64 src_pos, struct file *dst, diff --git a/fs/read_write.c b/fs/read_write.c index 175d30e3b603..c4e206b875d0 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1687,8 +1687,6 @@ int vfs_clone_file_range(struct file *file_in, loff_t pos_in, if (pos_in + len > i_size_read(inode_in)) return -EINVAL; - sb_start_write(inode_out->i_sb); - ret = file_in->f_op->clone_file_range(file_in, pos_in, file_out, pos_out, len); if (!ret) { @@ -1696,7 +1694,6 @@ int vfs_clone_file_range(struct file *file_in, loff_t pos_in, fsnotify_modify(file_out); } - sb_end_write(inode_out->i_sb); return ret; } EXPORT_SYMBOL(vfs_clone_file_range); diff --git a/include/linux/fs.h b/include/linux/fs.h index dc0478c07b2a..52663f1f3084 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1783,6 +1783,19 @@ extern int vfs_clone_file_range(struct file *file_in, loff_t pos_in, extern int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same); +static inline int do_clone_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + u64 len) +{ + int ret; + + sb_start_write(file_inode(file_out)->i_sb); + ret = vfs_clone_file_range(file_in, pos_in, file_out, pos_out, len); + sb_end_write(file_inode(file_out)->i_sb); + + return ret; +} + struct super_operations { struct inode *(*alloc_inode)(struct super_block *sb); void (*destroy_inode)(struct inode *); -- cgit v1.2.3