diff options
Diffstat (limited to 'drivers/block/zloop.c')
| -rw-r--r-- | drivers/block/zloop.c | 160 |
1 files changed, 141 insertions, 19 deletions
diff --git a/drivers/block/zloop.c b/drivers/block/zloop.c index a423228e201b..3f50321aa4a7 100644 --- a/drivers/block/zloop.c +++ b/drivers/block/zloop.c @@ -32,6 +32,8 @@ enum { ZLOOP_OPT_NR_QUEUES = (1 << 6), ZLOOP_OPT_QUEUE_DEPTH = (1 << 7), ZLOOP_OPT_BUFFERED_IO = (1 << 8), + ZLOOP_OPT_ZONE_APPEND = (1 << 9), + ZLOOP_OPT_ORDERED_ZONE_APPEND = (1 << 10), }; static const match_table_t zloop_opt_tokens = { @@ -44,6 +46,8 @@ static const match_table_t zloop_opt_tokens = { { ZLOOP_OPT_NR_QUEUES, "nr_queues=%u" }, { ZLOOP_OPT_QUEUE_DEPTH, "queue_depth=%u" }, { ZLOOP_OPT_BUFFERED_IO, "buffered_io" }, + { ZLOOP_OPT_ZONE_APPEND, "zone_append=%u" }, + { ZLOOP_OPT_ORDERED_ZONE_APPEND, "ordered_zone_append" }, { ZLOOP_OPT_ERR, NULL } }; @@ -56,6 +60,8 @@ static const match_table_t zloop_opt_tokens = { #define ZLOOP_DEF_NR_QUEUES 1 #define ZLOOP_DEF_QUEUE_DEPTH 128 #define ZLOOP_DEF_BUFFERED_IO false +#define ZLOOP_DEF_ZONE_APPEND true +#define ZLOOP_DEF_ORDERED_ZONE_APPEND false /* Arbitrary limit on the zone size (16GB). */ #define ZLOOP_MAX_ZONE_SIZE_MB 16384 @@ -71,6 +77,8 @@ struct zloop_options { unsigned int nr_queues; unsigned int queue_depth; bool buffered_io; + bool zone_append; + bool ordered_zone_append; }; /* @@ -92,6 +100,7 @@ struct zloop_zone { unsigned long flags; struct mutex lock; + spinlock_t wp_lock; enum blk_zone_cond cond; sector_t start; sector_t wp; @@ -108,6 +117,8 @@ struct zloop_device { struct workqueue_struct *workqueue; bool buffered_io; + bool zone_append; + bool ordered_zone_append; const char *base_dir; struct file *data_dir; @@ -147,6 +158,7 @@ static int zloop_update_seq_zone(struct zloop_device *zlo, unsigned int zone_no) struct zloop_zone *zone = &zlo->zones[zone_no]; struct kstat stat; sector_t file_sectors; + unsigned long flags; int ret; lockdep_assert_held(&zone->lock); @@ -172,16 +184,18 @@ static int zloop_update_seq_zone(struct zloop_device *zlo, unsigned int zone_no) return -EINVAL; } + spin_lock_irqsave(&zone->wp_lock, flags); if (!file_sectors) { zone->cond = BLK_ZONE_COND_EMPTY; zone->wp = zone->start; } else if (file_sectors == zlo->zone_capacity) { zone->cond = BLK_ZONE_COND_FULL; - zone->wp = zone->start + zlo->zone_size; + zone->wp = ULLONG_MAX; } else { zone->cond = BLK_ZONE_COND_CLOSED; zone->wp = zone->start + file_sectors; } + spin_unlock_irqrestore(&zone->wp_lock, flags); return 0; } @@ -225,6 +239,7 @@ unlock: static int zloop_close_zone(struct zloop_device *zlo, unsigned int zone_no) { struct zloop_zone *zone = &zlo->zones[zone_no]; + unsigned long flags; int ret = 0; if (test_bit(ZLOOP_ZONE_CONV, &zone->flags)) @@ -243,10 +258,12 @@ static int zloop_close_zone(struct zloop_device *zlo, unsigned int zone_no) break; case BLK_ZONE_COND_IMP_OPEN: case BLK_ZONE_COND_EXP_OPEN: + spin_lock_irqsave(&zone->wp_lock, flags); if (zone->wp == zone->start) zone->cond = BLK_ZONE_COND_EMPTY; else zone->cond = BLK_ZONE_COND_CLOSED; + spin_unlock_irqrestore(&zone->wp_lock, flags); break; case BLK_ZONE_COND_EMPTY: case BLK_ZONE_COND_FULL: @@ -264,6 +281,7 @@ unlock: static int zloop_reset_zone(struct zloop_device *zlo, unsigned int zone_no) { struct zloop_zone *zone = &zlo->zones[zone_no]; + unsigned long flags; int ret = 0; if (test_bit(ZLOOP_ZONE_CONV, &zone->flags)) @@ -281,9 +299,11 @@ static int zloop_reset_zone(struct zloop_device *zlo, unsigned int zone_no) goto unlock; } + spin_lock_irqsave(&zone->wp_lock, flags); zone->cond = BLK_ZONE_COND_EMPTY; zone->wp = zone->start; clear_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags); + spin_unlock_irqrestore(&zone->wp_lock, flags); unlock: mutex_unlock(&zone->lock); @@ -308,6 +328,7 @@ static int zloop_reset_all_zones(struct zloop_device *zlo) static int zloop_finish_zone(struct zloop_device *zlo, unsigned int zone_no) { struct zloop_zone *zone = &zlo->zones[zone_no]; + unsigned long flags; int ret = 0; if (test_bit(ZLOOP_ZONE_CONV, &zone->flags)) @@ -325,9 +346,11 @@ static int zloop_finish_zone(struct zloop_device *zlo, unsigned int zone_no) goto unlock; } + spin_lock_irqsave(&zone->wp_lock, flags); zone->cond = BLK_ZONE_COND_FULL; - zone->wp = zone->start + zlo->zone_size; + zone->wp = ULLONG_MAX; clear_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags); + spin_unlock_irqrestore(&zone->wp_lock, flags); unlock: mutex_unlock(&zone->lock); @@ -369,6 +392,7 @@ static void zloop_rw(struct zloop_cmd *cmd) struct zloop_zone *zone; struct iov_iter iter; struct bio_vec tmp; + unsigned long flags; sector_t zone_end; int nr_bvec = 0; int ret; @@ -378,6 +402,11 @@ static void zloop_rw(struct zloop_cmd *cmd) cmd->nr_sectors = nr_sectors; cmd->ret = 0; + if (WARN_ON_ONCE(is_append && !zlo->zone_append)) { + ret = -EIO; + goto out; + } + /* We should never get an I/O beyond the device capacity. */ if (WARN_ON_ONCE(zone_no >= zlo->nr_zones)) { ret = -EIO; @@ -406,16 +435,31 @@ static void zloop_rw(struct zloop_cmd *cmd) if (!test_bit(ZLOOP_ZONE_CONV, &zone->flags) && is_write) { mutex_lock(&zone->lock); - if (is_append) { - sector = zone->wp; - cmd->sector = sector; - } + spin_lock_irqsave(&zone->wp_lock, flags); /* - * Write operations must be aligned to the write pointer and - * fully contained within the zone capacity. + * Zone append operations always go at the current write + * pointer, but regular write operations must already be + * aligned to the write pointer when submitted. */ - if (sector != zone->wp || zone->wp + nr_sectors > zone_end) { + if (is_append) { + /* + * If ordered zone append is in use, we already checked + * and set the target sector in zloop_queue_rq(). + */ + if (!zlo->ordered_zone_append) { + if (zone->cond == BLK_ZONE_COND_FULL || + zone->wp + nr_sectors > zone_end) { + spin_unlock_irqrestore(&zone->wp_lock, + flags); + ret = -EIO; + goto unlock; + } + sector = zone->wp; + } + cmd->sector = sector; + } else if (sector != zone->wp) { + spin_unlock_irqrestore(&zone->wp_lock, flags); pr_err("Zone %u: unaligned write: sect %llu, wp %llu\n", zone_no, sector, zone->wp); ret = -EIO; @@ -428,13 +472,19 @@ static void zloop_rw(struct zloop_cmd *cmd) zone->cond = BLK_ZONE_COND_IMP_OPEN; /* - * Advance the write pointer of sequential zones. If the write - * fails, the wp position will be corrected when the next I/O - * copmpletes. + * Advance the write pointer, unless ordered zone append is in + * use. If the write fails, the write pointer position will be + * corrected when the next I/O starts execution. */ - zone->wp += nr_sectors; - if (zone->wp == zone_end) - zone->cond = BLK_ZONE_COND_FULL; + if (!is_append || !zlo->ordered_zone_append) { + zone->wp += nr_sectors; + if (zone->wp == zone_end) { + zone->cond = BLK_ZONE_COND_FULL; + zone->wp = ULLONG_MAX; + } + } + + spin_unlock_irqrestore(&zone->wp_lock, flags); } rq_for_each_bvec(tmp, rq, rq_iter) @@ -498,6 +548,10 @@ static void zloop_handle_cmd(struct zloop_cmd *cmd) struct request *rq = blk_mq_rq_from_pdu(cmd); struct zloop_device *zlo = rq->q->queuedata; + /* We can block in this context, so ignore REQ_NOWAIT. */ + if (rq->cmd_flags & REQ_NOWAIT) + rq->cmd_flags &= ~REQ_NOWAIT; + switch (req_op(rq)) { case REQ_OP_READ: case REQ_OP_WRITE: @@ -608,6 +662,35 @@ static void zloop_complete_rq(struct request *rq) blk_mq_end_request(rq, sts); } +static bool zloop_set_zone_append_sector(struct request *rq) +{ + struct zloop_device *zlo = rq->q->queuedata; + unsigned int zone_no = rq_zone_no(rq); + struct zloop_zone *zone = &zlo->zones[zone_no]; + sector_t zone_end = zone->start + zlo->zone_capacity; + sector_t nr_sectors = blk_rq_sectors(rq); + unsigned long flags; + + spin_lock_irqsave(&zone->wp_lock, flags); + + if (zone->cond == BLK_ZONE_COND_FULL || + zone->wp + nr_sectors > zone_end) { + spin_unlock_irqrestore(&zone->wp_lock, flags); + return false; + } + + rq->__sector = zone->wp; + zone->wp += blk_rq_sectors(rq); + if (zone->wp >= zone_end) { + zone->cond = BLK_ZONE_COND_FULL; + zone->wp = ULLONG_MAX; + } + + spin_unlock_irqrestore(&zone->wp_lock, flags); + + return true; +} + static blk_status_t zloop_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) { @@ -618,6 +701,16 @@ static blk_status_t zloop_queue_rq(struct blk_mq_hw_ctx *hctx, if (zlo->state == Zlo_deleting) return BLK_STS_IOERR; + /* + * If we need to strongly order zone append operations, set the request + * sector to the zone write pointer location now instead of when the + * command work runs. + */ + if (zlo->ordered_zone_append && req_op(rq) == REQ_OP_ZONE_APPEND) { + if (!zloop_set_zone_append_sector(rq)) + return BLK_STS_IOERR; + } + blk_mq_start_request(rq); INIT_WORK(&cmd->work, zloop_cmd_workfn); @@ -647,11 +740,12 @@ static int zloop_open(struct gendisk *disk, blk_mode_t mode) } static int zloop_report_zones(struct gendisk *disk, sector_t sector, - unsigned int nr_zones, report_zones_cb cb, void *data) + unsigned int nr_zones, struct blk_report_zones_args *args) { struct zloop_device *zlo = disk->private_data; struct blk_zone blkz = {}; unsigned int first, i; + unsigned long flags; int ret; first = disk_zone_no(disk, sector); @@ -675,7 +769,9 @@ static int zloop_report_zones(struct gendisk *disk, sector_t sector, blkz.start = zone->start; blkz.len = zlo->zone_size; + spin_lock_irqsave(&zone->wp_lock, flags); blkz.wp = zone->wp; + spin_unlock_irqrestore(&zone->wp_lock, flags); blkz.cond = zone->cond; if (test_bit(ZLOOP_ZONE_CONV, &zone->flags)) { blkz.type = BLK_ZONE_TYPE_CONVENTIONAL; @@ -687,7 +783,7 @@ static int zloop_report_zones(struct gendisk *disk, sector_t sector, mutex_unlock(&zone->lock); - ret = cb(&blkz, i, data); + ret = disk_report_zone(disk, &blkz, i, args); if (ret) return ret; } @@ -783,6 +879,7 @@ static int zloop_init_zone(struct zloop_device *zlo, struct zloop_options *opts, int ret; mutex_init(&zone->lock); + spin_lock_init(&zone->wp_lock); zone->start = (sector_t)zone_no << zlo->zone_shift; if (!restore) @@ -884,7 +981,6 @@ static int zloop_ctl_add(struct zloop_options *opts) { struct queue_limits lim = { .max_hw_sectors = SZ_1M >> SECTOR_SHIFT, - .max_hw_zone_append_sectors = SZ_1M >> SECTOR_SHIFT, .chunk_sectors = opts->zone_size, .features = BLK_FEAT_ZONED, }; @@ -936,6 +1032,9 @@ static int zloop_ctl_add(struct zloop_options *opts) zlo->nr_zones = nr_zones; zlo->nr_conv_zones = opts->nr_conv_zones; zlo->buffered_io = opts->buffered_io; + zlo->zone_append = opts->zone_append; + if (zlo->zone_append) + zlo->ordered_zone_append = opts->ordered_zone_append; zlo->workqueue = alloc_workqueue("zloop%d", WQ_UNBOUND | WQ_FREEZABLE, opts->nr_queues * opts->queue_depth, zlo->id); @@ -976,6 +1075,8 @@ static int zloop_ctl_add(struct zloop_options *opts) lim.physical_block_size = zlo->block_size; lim.logical_block_size = zlo->block_size; + if (zlo->zone_append) + lim.max_hw_zone_append_sectors = lim.max_hw_sectors; zlo->tag_set.ops = &zloop_mq_ops; zlo->tag_set.nr_hw_queues = opts->nr_queues; @@ -1016,10 +1117,14 @@ static int zloop_ctl_add(struct zloop_options *opts) zlo->state = Zlo_live; mutex_unlock(&zloop_ctl_mutex); - pr_info("Added device %d: %u zones of %llu MB, %u B block size\n", + pr_info("zloop: device %d, %u zones of %llu MiB, %u B block size\n", zlo->id, zlo->nr_zones, ((sector_t)zlo->zone_size << SECTOR_SHIFT) >> 20, zlo->block_size); + pr_info("zloop%d: using %s%s zone append\n", + zlo->id, + zlo->ordered_zone_append ? "ordered " : "", + zlo->zone_append ? "native" : "emulated"); return 0; @@ -1106,6 +1211,8 @@ static int zloop_parse_options(struct zloop_options *opts, const char *buf) opts->nr_queues = ZLOOP_DEF_NR_QUEUES; opts->queue_depth = ZLOOP_DEF_QUEUE_DEPTH; opts->buffered_io = ZLOOP_DEF_BUFFERED_IO; + opts->zone_append = ZLOOP_DEF_ZONE_APPEND; + opts->ordered_zone_append = ZLOOP_DEF_ORDERED_ZONE_APPEND; if (!buf) return 0; @@ -1215,6 +1322,21 @@ static int zloop_parse_options(struct zloop_options *opts, const char *buf) case ZLOOP_OPT_BUFFERED_IO: opts->buffered_io = true; break; + case ZLOOP_OPT_ZONE_APPEND: + if (match_uint(args, &token)) { + ret = -EINVAL; + goto out; + } + if (token != 0 && token != 1) { + pr_err("Invalid zone_append value\n"); + ret = -EINVAL; + goto out; + } + opts->zone_append = token; + break; + case ZLOOP_OPT_ORDERED_ZONE_APPEND: + opts->ordered_zone_append = true; + break; case ZLOOP_OPT_ERR: default: pr_warn("unknown parameter or missing value '%s'\n", p); |
