summaryrefslogtreecommitdiff
path: root/drivers/block/zloop.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/block/zloop.c')
-rw-r--r--drivers/block/zloop.c160
1 files changed, 141 insertions, 19 deletions
diff --git a/drivers/block/zloop.c b/drivers/block/zloop.c
index a423228e201b..3f50321aa4a7 100644
--- a/drivers/block/zloop.c
+++ b/drivers/block/zloop.c
@@ -32,6 +32,8 @@ enum {
ZLOOP_OPT_NR_QUEUES = (1 << 6),
ZLOOP_OPT_QUEUE_DEPTH = (1 << 7),
ZLOOP_OPT_BUFFERED_IO = (1 << 8),
+ ZLOOP_OPT_ZONE_APPEND = (1 << 9),
+ ZLOOP_OPT_ORDERED_ZONE_APPEND = (1 << 10),
};
static const match_table_t zloop_opt_tokens = {
@@ -44,6 +46,8 @@ static const match_table_t zloop_opt_tokens = {
{ ZLOOP_OPT_NR_QUEUES, "nr_queues=%u" },
{ ZLOOP_OPT_QUEUE_DEPTH, "queue_depth=%u" },
{ ZLOOP_OPT_BUFFERED_IO, "buffered_io" },
+ { ZLOOP_OPT_ZONE_APPEND, "zone_append=%u" },
+ { ZLOOP_OPT_ORDERED_ZONE_APPEND, "ordered_zone_append" },
{ ZLOOP_OPT_ERR, NULL }
};
@@ -56,6 +60,8 @@ static const match_table_t zloop_opt_tokens = {
#define ZLOOP_DEF_NR_QUEUES 1
#define ZLOOP_DEF_QUEUE_DEPTH 128
#define ZLOOP_DEF_BUFFERED_IO false
+#define ZLOOP_DEF_ZONE_APPEND true
+#define ZLOOP_DEF_ORDERED_ZONE_APPEND false
/* Arbitrary limit on the zone size (16GB). */
#define ZLOOP_MAX_ZONE_SIZE_MB 16384
@@ -71,6 +77,8 @@ struct zloop_options {
unsigned int nr_queues;
unsigned int queue_depth;
bool buffered_io;
+ bool zone_append;
+ bool ordered_zone_append;
};
/*
@@ -92,6 +100,7 @@ struct zloop_zone {
unsigned long flags;
struct mutex lock;
+ spinlock_t wp_lock;
enum blk_zone_cond cond;
sector_t start;
sector_t wp;
@@ -108,6 +117,8 @@ struct zloop_device {
struct workqueue_struct *workqueue;
bool buffered_io;
+ bool zone_append;
+ bool ordered_zone_append;
const char *base_dir;
struct file *data_dir;
@@ -147,6 +158,7 @@ static int zloop_update_seq_zone(struct zloop_device *zlo, unsigned int zone_no)
struct zloop_zone *zone = &zlo->zones[zone_no];
struct kstat stat;
sector_t file_sectors;
+ unsigned long flags;
int ret;
lockdep_assert_held(&zone->lock);
@@ -172,16 +184,18 @@ static int zloop_update_seq_zone(struct zloop_device *zlo, unsigned int zone_no)
return -EINVAL;
}
+ spin_lock_irqsave(&zone->wp_lock, flags);
if (!file_sectors) {
zone->cond = BLK_ZONE_COND_EMPTY;
zone->wp = zone->start;
} else if (file_sectors == zlo->zone_capacity) {
zone->cond = BLK_ZONE_COND_FULL;
- zone->wp = zone->start + zlo->zone_size;
+ zone->wp = ULLONG_MAX;
} else {
zone->cond = BLK_ZONE_COND_CLOSED;
zone->wp = zone->start + file_sectors;
}
+ spin_unlock_irqrestore(&zone->wp_lock, flags);
return 0;
}
@@ -225,6 +239,7 @@ unlock:
static int zloop_close_zone(struct zloop_device *zlo, unsigned int zone_no)
{
struct zloop_zone *zone = &zlo->zones[zone_no];
+ unsigned long flags;
int ret = 0;
if (test_bit(ZLOOP_ZONE_CONV, &zone->flags))
@@ -243,10 +258,12 @@ static int zloop_close_zone(struct zloop_device *zlo, unsigned int zone_no)
break;
case BLK_ZONE_COND_IMP_OPEN:
case BLK_ZONE_COND_EXP_OPEN:
+ spin_lock_irqsave(&zone->wp_lock, flags);
if (zone->wp == zone->start)
zone->cond = BLK_ZONE_COND_EMPTY;
else
zone->cond = BLK_ZONE_COND_CLOSED;
+ spin_unlock_irqrestore(&zone->wp_lock, flags);
break;
case BLK_ZONE_COND_EMPTY:
case BLK_ZONE_COND_FULL:
@@ -264,6 +281,7 @@ unlock:
static int zloop_reset_zone(struct zloop_device *zlo, unsigned int zone_no)
{
struct zloop_zone *zone = &zlo->zones[zone_no];
+ unsigned long flags;
int ret = 0;
if (test_bit(ZLOOP_ZONE_CONV, &zone->flags))
@@ -281,9 +299,11 @@ static int zloop_reset_zone(struct zloop_device *zlo, unsigned int zone_no)
goto unlock;
}
+ spin_lock_irqsave(&zone->wp_lock, flags);
zone->cond = BLK_ZONE_COND_EMPTY;
zone->wp = zone->start;
clear_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags);
+ spin_unlock_irqrestore(&zone->wp_lock, flags);
unlock:
mutex_unlock(&zone->lock);
@@ -308,6 +328,7 @@ static int zloop_reset_all_zones(struct zloop_device *zlo)
static int zloop_finish_zone(struct zloop_device *zlo, unsigned int zone_no)
{
struct zloop_zone *zone = &zlo->zones[zone_no];
+ unsigned long flags;
int ret = 0;
if (test_bit(ZLOOP_ZONE_CONV, &zone->flags))
@@ -325,9 +346,11 @@ static int zloop_finish_zone(struct zloop_device *zlo, unsigned int zone_no)
goto unlock;
}
+ spin_lock_irqsave(&zone->wp_lock, flags);
zone->cond = BLK_ZONE_COND_FULL;
- zone->wp = zone->start + zlo->zone_size;
+ zone->wp = ULLONG_MAX;
clear_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags);
+ spin_unlock_irqrestore(&zone->wp_lock, flags);
unlock:
mutex_unlock(&zone->lock);
@@ -369,6 +392,7 @@ static void zloop_rw(struct zloop_cmd *cmd)
struct zloop_zone *zone;
struct iov_iter iter;
struct bio_vec tmp;
+ unsigned long flags;
sector_t zone_end;
int nr_bvec = 0;
int ret;
@@ -378,6 +402,11 @@ static void zloop_rw(struct zloop_cmd *cmd)
cmd->nr_sectors = nr_sectors;
cmd->ret = 0;
+ if (WARN_ON_ONCE(is_append && !zlo->zone_append)) {
+ ret = -EIO;
+ goto out;
+ }
+
/* We should never get an I/O beyond the device capacity. */
if (WARN_ON_ONCE(zone_no >= zlo->nr_zones)) {
ret = -EIO;
@@ -406,16 +435,31 @@ static void zloop_rw(struct zloop_cmd *cmd)
if (!test_bit(ZLOOP_ZONE_CONV, &zone->flags) && is_write) {
mutex_lock(&zone->lock);
- if (is_append) {
- sector = zone->wp;
- cmd->sector = sector;
- }
+ spin_lock_irqsave(&zone->wp_lock, flags);
/*
- * Write operations must be aligned to the write pointer and
- * fully contained within the zone capacity.
+ * Zone append operations always go at the current write
+ * pointer, but regular write operations must already be
+ * aligned to the write pointer when submitted.
*/
- if (sector != zone->wp || zone->wp + nr_sectors > zone_end) {
+ if (is_append) {
+ /*
+ * If ordered zone append is in use, we already checked
+ * and set the target sector in zloop_queue_rq().
+ */
+ if (!zlo->ordered_zone_append) {
+ if (zone->cond == BLK_ZONE_COND_FULL ||
+ zone->wp + nr_sectors > zone_end) {
+ spin_unlock_irqrestore(&zone->wp_lock,
+ flags);
+ ret = -EIO;
+ goto unlock;
+ }
+ sector = zone->wp;
+ }
+ cmd->sector = sector;
+ } else if (sector != zone->wp) {
+ spin_unlock_irqrestore(&zone->wp_lock, flags);
pr_err("Zone %u: unaligned write: sect %llu, wp %llu\n",
zone_no, sector, zone->wp);
ret = -EIO;
@@ -428,13 +472,19 @@ static void zloop_rw(struct zloop_cmd *cmd)
zone->cond = BLK_ZONE_COND_IMP_OPEN;
/*
- * Advance the write pointer of sequential zones. If the write
- * fails, the wp position will be corrected when the next I/O
- * copmpletes.
+ * Advance the write pointer, unless ordered zone append is in
+ * use. If the write fails, the write pointer position will be
+ * corrected when the next I/O starts execution.
*/
- zone->wp += nr_sectors;
- if (zone->wp == zone_end)
- zone->cond = BLK_ZONE_COND_FULL;
+ if (!is_append || !zlo->ordered_zone_append) {
+ zone->wp += nr_sectors;
+ if (zone->wp == zone_end) {
+ zone->cond = BLK_ZONE_COND_FULL;
+ zone->wp = ULLONG_MAX;
+ }
+ }
+
+ spin_unlock_irqrestore(&zone->wp_lock, flags);
}
rq_for_each_bvec(tmp, rq, rq_iter)
@@ -498,6 +548,10 @@ static void zloop_handle_cmd(struct zloop_cmd *cmd)
struct request *rq = blk_mq_rq_from_pdu(cmd);
struct zloop_device *zlo = rq->q->queuedata;
+ /* We can block in this context, so ignore REQ_NOWAIT. */
+ if (rq->cmd_flags & REQ_NOWAIT)
+ rq->cmd_flags &= ~REQ_NOWAIT;
+
switch (req_op(rq)) {
case REQ_OP_READ:
case REQ_OP_WRITE:
@@ -608,6 +662,35 @@ static void zloop_complete_rq(struct request *rq)
blk_mq_end_request(rq, sts);
}
+static bool zloop_set_zone_append_sector(struct request *rq)
+{
+ struct zloop_device *zlo = rq->q->queuedata;
+ unsigned int zone_no = rq_zone_no(rq);
+ struct zloop_zone *zone = &zlo->zones[zone_no];
+ sector_t zone_end = zone->start + zlo->zone_capacity;
+ sector_t nr_sectors = blk_rq_sectors(rq);
+ unsigned long flags;
+
+ spin_lock_irqsave(&zone->wp_lock, flags);
+
+ if (zone->cond == BLK_ZONE_COND_FULL ||
+ zone->wp + nr_sectors > zone_end) {
+ spin_unlock_irqrestore(&zone->wp_lock, flags);
+ return false;
+ }
+
+ rq->__sector = zone->wp;
+ zone->wp += blk_rq_sectors(rq);
+ if (zone->wp >= zone_end) {
+ zone->cond = BLK_ZONE_COND_FULL;
+ zone->wp = ULLONG_MAX;
+ }
+
+ spin_unlock_irqrestore(&zone->wp_lock, flags);
+
+ return true;
+}
+
static blk_status_t zloop_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
@@ -618,6 +701,16 @@ static blk_status_t zloop_queue_rq(struct blk_mq_hw_ctx *hctx,
if (zlo->state == Zlo_deleting)
return BLK_STS_IOERR;
+ /*
+ * If we need to strongly order zone append operations, set the request
+ * sector to the zone write pointer location now instead of when the
+ * command work runs.
+ */
+ if (zlo->ordered_zone_append && req_op(rq) == REQ_OP_ZONE_APPEND) {
+ if (!zloop_set_zone_append_sector(rq))
+ return BLK_STS_IOERR;
+ }
+
blk_mq_start_request(rq);
INIT_WORK(&cmd->work, zloop_cmd_workfn);
@@ -647,11 +740,12 @@ static int zloop_open(struct gendisk *disk, blk_mode_t mode)
}
static int zloop_report_zones(struct gendisk *disk, sector_t sector,
- unsigned int nr_zones, report_zones_cb cb, void *data)
+ unsigned int nr_zones, struct blk_report_zones_args *args)
{
struct zloop_device *zlo = disk->private_data;
struct blk_zone blkz = {};
unsigned int first, i;
+ unsigned long flags;
int ret;
first = disk_zone_no(disk, sector);
@@ -675,7 +769,9 @@ static int zloop_report_zones(struct gendisk *disk, sector_t sector,
blkz.start = zone->start;
blkz.len = zlo->zone_size;
+ spin_lock_irqsave(&zone->wp_lock, flags);
blkz.wp = zone->wp;
+ spin_unlock_irqrestore(&zone->wp_lock, flags);
blkz.cond = zone->cond;
if (test_bit(ZLOOP_ZONE_CONV, &zone->flags)) {
blkz.type = BLK_ZONE_TYPE_CONVENTIONAL;
@@ -687,7 +783,7 @@ static int zloop_report_zones(struct gendisk *disk, sector_t sector,
mutex_unlock(&zone->lock);
- ret = cb(&blkz, i, data);
+ ret = disk_report_zone(disk, &blkz, i, args);
if (ret)
return ret;
}
@@ -783,6 +879,7 @@ static int zloop_init_zone(struct zloop_device *zlo, struct zloop_options *opts,
int ret;
mutex_init(&zone->lock);
+ spin_lock_init(&zone->wp_lock);
zone->start = (sector_t)zone_no << zlo->zone_shift;
if (!restore)
@@ -884,7 +981,6 @@ static int zloop_ctl_add(struct zloop_options *opts)
{
struct queue_limits lim = {
.max_hw_sectors = SZ_1M >> SECTOR_SHIFT,
- .max_hw_zone_append_sectors = SZ_1M >> SECTOR_SHIFT,
.chunk_sectors = opts->zone_size,
.features = BLK_FEAT_ZONED,
};
@@ -936,6 +1032,9 @@ static int zloop_ctl_add(struct zloop_options *opts)
zlo->nr_zones = nr_zones;
zlo->nr_conv_zones = opts->nr_conv_zones;
zlo->buffered_io = opts->buffered_io;
+ zlo->zone_append = opts->zone_append;
+ if (zlo->zone_append)
+ zlo->ordered_zone_append = opts->ordered_zone_append;
zlo->workqueue = alloc_workqueue("zloop%d", WQ_UNBOUND | WQ_FREEZABLE,
opts->nr_queues * opts->queue_depth, zlo->id);
@@ -976,6 +1075,8 @@ static int zloop_ctl_add(struct zloop_options *opts)
lim.physical_block_size = zlo->block_size;
lim.logical_block_size = zlo->block_size;
+ if (zlo->zone_append)
+ lim.max_hw_zone_append_sectors = lim.max_hw_sectors;
zlo->tag_set.ops = &zloop_mq_ops;
zlo->tag_set.nr_hw_queues = opts->nr_queues;
@@ -1016,10 +1117,14 @@ static int zloop_ctl_add(struct zloop_options *opts)
zlo->state = Zlo_live;
mutex_unlock(&zloop_ctl_mutex);
- pr_info("Added device %d: %u zones of %llu MB, %u B block size\n",
+ pr_info("zloop: device %d, %u zones of %llu MiB, %u B block size\n",
zlo->id, zlo->nr_zones,
((sector_t)zlo->zone_size << SECTOR_SHIFT) >> 20,
zlo->block_size);
+ pr_info("zloop%d: using %s%s zone append\n",
+ zlo->id,
+ zlo->ordered_zone_append ? "ordered " : "",
+ zlo->zone_append ? "native" : "emulated");
return 0;
@@ -1106,6 +1211,8 @@ static int zloop_parse_options(struct zloop_options *opts, const char *buf)
opts->nr_queues = ZLOOP_DEF_NR_QUEUES;
opts->queue_depth = ZLOOP_DEF_QUEUE_DEPTH;
opts->buffered_io = ZLOOP_DEF_BUFFERED_IO;
+ opts->zone_append = ZLOOP_DEF_ZONE_APPEND;
+ opts->ordered_zone_append = ZLOOP_DEF_ORDERED_ZONE_APPEND;
if (!buf)
return 0;
@@ -1215,6 +1322,21 @@ static int zloop_parse_options(struct zloop_options *opts, const char *buf)
case ZLOOP_OPT_BUFFERED_IO:
opts->buffered_io = true;
break;
+ case ZLOOP_OPT_ZONE_APPEND:
+ if (match_uint(args, &token)) {
+ ret = -EINVAL;
+ goto out;
+ }
+ if (token != 0 && token != 1) {
+ pr_err("Invalid zone_append value\n");
+ ret = -EINVAL;
+ goto out;
+ }
+ opts->zone_append = token;
+ break;
+ case ZLOOP_OPT_ORDERED_ZONE_APPEND:
+ opts->ordered_zone_append = true;
+ break;
case ZLOOP_OPT_ERR:
default:
pr_warn("unknown parameter or missing value '%s'\n", p);