1 files changed, 141 insertions, 19 deletions
diff --git a/drivers/block/zloop.c b/drivers/block/zloop.c
index a423228e201b..3f50321aa4a7 100644
--- a/drivers/block/zloop.c
+++ b/drivers/block/zloop.c
@@ -32,6 +32,8 @@ enum {
 	ZLOOP_OPT_NR_QUEUES		= (1 << 6),
 	ZLOOP_OPT_QUEUE_DEPTH		= (1 << 7),
 	ZLOOP_OPT_BUFFERED_IO		= (1 << 8),
+	ZLOOP_OPT_ZONE_APPEND		= (1 << 9),
+	ZLOOP_OPT_ORDERED_ZONE_APPEND	= (1 << 10),
 };
 
 static const match_table_t zloop_opt_tokens = {
@@ -44,6 +46,8 @@ static const match_table_t zloop_opt_tokens = {
 	{ ZLOOP_OPT_NR_QUEUES,		"nr_queues=%u"		},
 	{ ZLOOP_OPT_QUEUE_DEPTH,	"queue_depth=%u"	},
 	{ ZLOOP_OPT_BUFFERED_IO,	"buffered_io"		},
+	{ ZLOOP_OPT_ZONE_APPEND,	"zone_append=%u"	},
+	{ ZLOOP_OPT_ORDERED_ZONE_APPEND, "ordered_zone_append"	},
 	{ ZLOOP_OPT_ERR,		NULL			}
 };
 
@@ -56,6 +60,8 @@ static const match_table_t zloop_opt_tokens = {
 #define ZLOOP_DEF_NR_QUEUES		1
 #define ZLOOP_DEF_QUEUE_DEPTH		128
 #define ZLOOP_DEF_BUFFERED_IO		false
+#define ZLOOP_DEF_ZONE_APPEND		true
+#define ZLOOP_DEF_ORDERED_ZONE_APPEND	false
 
 /* Arbitrary limit on the zone size (16GB). */
 #define ZLOOP_MAX_ZONE_SIZE_MB		16384
@@ -71,6 +77,8 @@ struct zloop_options {
 	unsigned int		nr_queues;
 	unsigned int		queue_depth;
 	bool			buffered_io;
+	bool			zone_append;
+	bool			ordered_zone_append;
 };
 
 /*
@@ -92,6 +100,7 @@ struct zloop_zone {
 
 	unsigned long		flags;
 	struct mutex		lock;
+	spinlock_t		wp_lock;
 	enum blk_zone_cond	cond;
 	sector_t		start;
 	sector_t		wp;
@@ -108,6 +117,8 @@ struct zloop_device {
 
 	struct workqueue_struct *workqueue;
 	bool			buffered_io;
+	bool			zone_append;
+	bool			ordered_zone_append;
 
 	const char		*base_dir;
 	struct file		*data_dir;
@@ -147,6 +158,7 @@ static int zloop_update_seq_zone(struct zloop_device *zlo, unsigned int zone_no)
 	struct zloop_zone *zone = &zlo->zones[zone_no];
 	struct kstat stat;
 	sector_t file_sectors;
+	unsigned long flags;
 	int ret;
 
 	lockdep_assert_held(&zone->lock);
@@ -172,16 +184,18 @@ static int zloop_update_seq_zone(struct zloop_device *zlo, unsigned int zone_no)
 		return -EINVAL;
 	}
 
+	spin_lock_irqsave(&zone->wp_lock, flags);
 	if (!file_sectors) {
 		zone->cond = BLK_ZONE_COND_EMPTY;
 		zone->wp = zone->start;
 	} else if (file_sectors == zlo->zone_capacity) {
 		zone->cond = BLK_ZONE_COND_FULL;
-		zone->wp = zone->start + zlo->zone_size;
+		zone->wp = ULLONG_MAX;
 	} else {
 		zone->cond = BLK_ZONE_COND_CLOSED;
 		zone->wp = zone->start + file_sectors;
 	}
+	spin_unlock_irqrestore(&zone->wp_lock, flags);
 
 	return 0;
 }
@@ -225,6 +239,7 @@ unlock:
 static int zloop_close_zone(struct zloop_device *zlo, unsigned int zone_no)
 {
 	struct zloop_zone *zone = &zlo->zones[zone_no];
+	unsigned long flags;
 	int ret = 0;
 
 	if (test_bit(ZLOOP_ZONE_CONV, &zone->flags))
@@ -243,10 +258,12 @@ static int zloop_close_zone(struct zloop_device *zlo, unsigned int zone_no)
 		break;
 	case BLK_ZONE_COND_IMP_OPEN:
 	case BLK_ZONE_COND_EXP_OPEN:
+		spin_lock_irqsave(&zone->wp_lock, flags);
 		if (zone->wp == zone->start)
 			zone->cond = BLK_ZONE_COND_EMPTY;
 		else
 			zone->cond = BLK_ZONE_COND_CLOSED;
+		spin_unlock_irqrestore(&zone->wp_lock, flags);
 		break;
 	case BLK_ZONE_COND_EMPTY:
 	case BLK_ZONE_COND_FULL:
@@ -264,6 +281,7 @@ unlock:
 static int zloop_reset_zone(struct zloop_device *zlo, unsigned int zone_no)
 {
 	struct zloop_zone *zone = &zlo->zones[zone_no];
+	unsigned long flags;
 	int ret = 0;
 
 	if (test_bit(ZLOOP_ZONE_CONV, &zone->flags))
@@ -281,9 +299,11 @@ static int zloop_reset_zone(struct zloop_device *zlo, unsigned int zone_no)
 		goto unlock;
 	}
 
+	spin_lock_irqsave(&zone->wp_lock, flags);
 	zone->cond = BLK_ZONE_COND_EMPTY;
 	zone->wp = zone->start;
 	clear_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags);
+	spin_unlock_irqrestore(&zone->wp_lock, flags);
 
 unlock:
 	mutex_unlock(&zone->lock);
@@ -308,6 +328,7 @@ static int zloop_reset_all_zones(struct zloop_device *zlo)
 static int zloop_finish_zone(struct zloop_device *zlo, unsigned int zone_no)
 {
 	struct zloop_zone *zone = &zlo->zones[zone_no];
+	unsigned long flags;
 	int ret = 0;
 
 	if (test_bit(ZLOOP_ZONE_CONV, &zone->flags))
@@ -325,9 +346,11 @@ static int zloop_finish_zone(struct zloop_device *zlo, unsigned int zone_no)
 		goto unlock;
 	}
 
+	spin_lock_irqsave(&zone->wp_lock, flags);
 	zone->cond = BLK_ZONE_COND_FULL;
-	zone->wp = zone->start + zlo->zone_size;
+	zone->wp = ULLONG_MAX;
 	clear_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags);
+	spin_unlock_irqrestore(&zone->wp_lock, flags);
 
  unlock:
 	mutex_unlock(&zone->lock);
@@ -369,6 +392,7 @@ static void zloop_rw(struct zloop_cmd *cmd)
 	struct zloop_zone *zone;
 	struct iov_iter iter;
 	struct bio_vec tmp;
+	unsigned long flags;
 	sector_t zone_end;
 	int nr_bvec = 0;
 	int ret;
@@ -378,6 +402,11 @@ static void zloop_rw(struct zloop_cmd *cmd)
 	cmd->nr_sectors = nr_sectors;
 	cmd->ret = 0;
 
+	if (WARN_ON_ONCE(is_append && !zlo->zone_append)) {
+		ret = -EIO;
+		goto out;
+	}
+
 	/* We should never get an I/O beyond the device capacity. */
 	if (WARN_ON_ONCE(zone_no >= zlo->nr_zones)) {
 		ret = -EIO;
@@ -406,16 +435,31 @@ static void zloop_rw(struct zloop_cmd *cmd)
 	if (!test_bit(ZLOOP_ZONE_CONV, &zone->flags) && is_write) {
 		mutex_lock(&zone->lock);
 
-		if (is_append) {
-			sector = zone->wp;
-			cmd->sector = sector;
-		}
+		spin_lock_irqsave(&zone->wp_lock, flags);
 
 		/*
-		 * Write operations must be aligned to the write pointer and
-		 * fully contained within the zone capacity.
+		 * Zone append operations always go at the current write
+		 * pointer, but regular write operations must already be
+		 * aligned to the write pointer when submitted.
 		 */
-		if (sector != zone->wp || zone->wp + nr_sectors > zone_end) {
+		if (is_append) {
+			/*
+			 * If ordered zone append is in use, we already checked
+			 * and set the target sector in zloop_queue_rq().
+			 */
+			if (!zlo->ordered_zone_append) {
+				if (zone->cond == BLK_ZONE_COND_FULL ||
+				    zone->wp + nr_sectors > zone_end) {
+					spin_unlock_irqrestore(&zone->wp_lock,
+							       flags);
+					ret = -EIO;
+					goto unlock;
+				}
+				sector = zone->wp;
+			}
+			cmd->sector = sector;
+		} else if (sector != zone->wp) {
+			spin_unlock_irqrestore(&zone->wp_lock, flags);
 			pr_err("Zone %u: unaligned write: sect %llu, wp %llu\n",
 			       zone_no, sector, zone->wp);
 			ret = -EIO;
@@ -428,13 +472,19 @@ static void zloop_rw(struct zloop_cmd *cmd)
 			zone->cond = BLK_ZONE_COND_IMP_OPEN;
 
 		/*
-		 * Advance the write pointer of sequential zones. If the write
-		 * fails, the wp position will be corrected when the next I/O
-		 * copmpletes.
+		 * Advance the write pointer, unless ordered zone append is in
+		 * use. If the write fails, the write pointer position will be
+		 * corrected when the next I/O starts execution.
 		 */
-		zone->wp += nr_sectors;
-		if (zone->wp == zone_end)
-			zone->cond = BLK_ZONE_COND_FULL;
+		if (!is_append || !zlo->ordered_zone_append) {
+			zone->wp += nr_sectors;
+			if (zone->wp == zone_end) {
+				zone->cond = BLK_ZONE_COND_FULL;
+				zone->wp = ULLONG_MAX;
+			}
+		}
+
+		spin_unlock_irqrestore(&zone->wp_lock, flags);
 	}
 
 	rq_for_each_bvec(tmp, rq, rq_iter)
@@ -498,6 +548,10 @@ static void zloop_handle_cmd(struct zloop_cmd *cmd)
 	struct request *rq = blk_mq_rq_from_pdu(cmd);
 	struct zloop_device *zlo = rq->q->queuedata;
 
+	/* We can block in this context, so ignore REQ_NOWAIT. */
+	if (rq->cmd_flags & REQ_NOWAIT)
+		rq->cmd_flags &= ~REQ_NOWAIT;
+
 	switch (req_op(rq)) {
 	case REQ_OP_READ:
 	case REQ_OP_WRITE:
@@ -608,6 +662,35 @@ static void zloop_complete_rq(struct request *rq)
 	blk_mq_end_request(rq, sts);
 }
 
+static bool zloop_set_zone_append_sector(struct request *rq)
+{
+	struct zloop_device *zlo = rq->q->queuedata;
+	unsigned int zone_no = rq_zone_no(rq);
+	struct zloop_zone *zone = &zlo->zones[zone_no];
+	sector_t zone_end = zone->start + zlo->zone_capacity;
+	sector_t nr_sectors = blk_rq_sectors(rq);
+	unsigned long flags;
+
+	spin_lock_irqsave(&zone->wp_lock, flags);
+
+	if (zone->cond == BLK_ZONE_COND_FULL ||
+	    zone->wp + nr_sectors > zone_end) {
+		spin_unlock_irqrestore(&zone->wp_lock, flags);
+		return false;
+	}
+
+	rq->__sector = zone->wp;
+	zone->wp += blk_rq_sectors(rq);
+	if (zone->wp >= zone_end) {
+		zone->cond = BLK_ZONE_COND_FULL;
+		zone->wp = ULLONG_MAX;
+	}
+
+	spin_unlock_irqrestore(&zone->wp_lock, flags);
+
+	return true;
+}
+
 static blk_status_t zloop_queue_rq(struct blk_mq_hw_ctx *hctx,
 				   const struct blk_mq_queue_data *bd)
 {
@@ -618,6 +701,16 @@ static blk_status_t zloop_queue_rq(struct blk_mq_hw_ctx *hctx,
 	if (zlo->state == Zlo_deleting)
 		return BLK_STS_IOERR;
 
+	/*
+	 * If we need to strongly order zone append operations, set the request
+	 * sector to the zone write pointer location now instead of when the
+	 * command work runs.
+	 */
+	if (zlo->ordered_zone_append && req_op(rq) == REQ_OP_ZONE_APPEND) {
+		if (!zloop_set_zone_append_sector(rq))
+			return BLK_STS_IOERR;
+	}
+
 	blk_mq_start_request(rq);
 
 	INIT_WORK(&cmd->work, zloop_cmd_workfn);
@@ -647,11 +740,12 @@ static int zloop_open(struct gendisk *disk, blk_mode_t mode)
 }
 
 static int zloop_report_zones(struct gendisk *disk, sector_t sector,
-		unsigned int nr_zones, report_zones_cb cb, void *data)
+		unsigned int nr_zones, struct blk_report_zones_args *args)
 {
 	struct zloop_device *zlo = disk->private_data;
 	struct blk_zone blkz = {};
 	unsigned int first, i;
+	unsigned long flags;
 	int ret;
 
 	first = disk_zone_no(disk, sector);
@@ -675,7 +769,9 @@ static int zloop_report_zones(struct gendisk *disk, sector_t sector,
 
 		blkz.start = zone->start;
 		blkz.len = zlo->zone_size;
+		spin_lock_irqsave(&zone->wp_lock, flags);
 		blkz.wp = zone->wp;
+		spin_unlock_irqrestore(&zone->wp_lock, flags);
 		blkz.cond = zone->cond;
 		if (test_bit(ZLOOP_ZONE_CONV, &zone->flags)) {
 			blkz.type = BLK_ZONE_TYPE_CONVENTIONAL;
@@ -687,7 +783,7 @@ static int zloop_report_zones(struct gendisk *disk, sector_t sector,
 
 		mutex_unlock(&zone->lock);
 
-		ret = cb(&blkz, i, data);
+		ret = disk_report_zone(disk, &blkz, i, args);
 		if (ret)
 			return ret;
 	}
@@ -783,6 +879,7 @@ static int zloop_init_zone(struct zloop_device *zlo, struct zloop_options *opts,
 	int ret;
 
 	mutex_init(&zone->lock);
+	spin_lock_init(&zone->wp_lock);
 	zone->start = (sector_t)zone_no << zlo->zone_shift;
 
 	if (!restore)
@@ -884,7 +981,6 @@ static int zloop_ctl_add(struct zloop_options *opts)
 {
 	struct queue_limits lim = {
 		.max_hw_sectors		= SZ_1M >> SECTOR_SHIFT,
-		.max_hw_zone_append_sectors = SZ_1M >> SECTOR_SHIFT,
 		.chunk_sectors		= opts->zone_size,
 		.features		= BLK_FEAT_ZONED,
 	};
@@ -936,6 +1032,9 @@ static int zloop_ctl_add(struct zloop_options *opts)
 	zlo->nr_zones = nr_zones;
 	zlo->nr_conv_zones = opts->nr_conv_zones;
 	zlo->buffered_io = opts->buffered_io;
+	zlo->zone_append = opts->zone_append;
+	if (zlo->zone_append)
+		zlo->ordered_zone_append = opts->ordered_zone_append;
 
 	zlo->workqueue = alloc_workqueue("zloop%d", WQ_UNBOUND | WQ_FREEZABLE,
 				opts->nr_queues * opts->queue_depth, zlo->id);
@@ -976,6 +1075,8 @@ static int zloop_ctl_add(struct zloop_options *opts)
 
 	lim.physical_block_size = zlo->block_size;
 	lim.logical_block_size = zlo->block_size;
+	if (zlo->zone_append)
+		lim.max_hw_zone_append_sectors = lim.max_hw_sectors;
 
 	zlo->tag_set.ops = &zloop_mq_ops;
 	zlo->tag_set.nr_hw_queues = opts->nr_queues;
@@ -1016,10 +1117,14 @@ static int zloop_ctl_add(struct zloop_options *opts)
 	zlo->state = Zlo_live;
 	mutex_unlock(&zloop_ctl_mutex);
 
-	pr_info("Added device %d: %u zones of %llu MB, %u B block size\n",
+	pr_info("zloop: device %d, %u zones of %llu MiB, %u B block size\n",
 		zlo->id, zlo->nr_zones,
 		((sector_t)zlo->zone_size << SECTOR_SHIFT) >> 20,
 		zlo->block_size);
+	pr_info("zloop%d: using %s%s zone append\n",
+		zlo->id,
+		zlo->ordered_zone_append ? "ordered " : "",
+		zlo->zone_append ? "native" : "emulated");
 
 	return 0;
 
@@ -1106,6 +1211,8 @@ static int zloop_parse_options(struct zloop_options *opts, const char *buf)
 	opts->nr_queues = ZLOOP_DEF_NR_QUEUES;
 	opts->queue_depth = ZLOOP_DEF_QUEUE_DEPTH;
 	opts->buffered_io = ZLOOP_DEF_BUFFERED_IO;
+	opts->zone_append = ZLOOP_DEF_ZONE_APPEND;
+	opts->ordered_zone_append = ZLOOP_DEF_ORDERED_ZONE_APPEND;
 
 	if (!buf)
 		return 0;
@@ -1215,6 +1322,21 @@ static int zloop_parse_options(struct zloop_options *opts, const char *buf)
 		case ZLOOP_OPT_BUFFERED_IO:
 			opts->buffered_io = true;
 			break;
+		case ZLOOP_OPT_ZONE_APPEND:
+			if (match_uint(args, &token)) {
+				ret = -EINVAL;
+				goto out;
+			}
+			if (token != 0 && token != 1) {
+				pr_err("Invalid zone_append value\n");
+				ret = -EINVAL;
+				goto out;
+			}
+			opts->zone_append = token;
+			break;
+		case ZLOOP_OPT_ORDERED_ZONE_APPEND:
+			opts->ordered_zone_append = true;
+			break;
 		case ZLOOP_OPT_ERR:
 		default:
 			pr_warn("unknown parameter or missing value '%s'\n", p);