summaryrefslogtreecommitdiff
path: root/drivers/block
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/block')
-rw-r--r--drivers/block/drbd/drbd_bitmap.c10
-rw-r--r--drivers/block/drbd/drbd_receiver.c14
-rw-r--r--drivers/block/floppy.c2
-rw-r--r--drivers/block/loop.c4
-rw-r--r--drivers/block/nbd.c44
-rw-r--r--drivers/block/null_blk/main.c81
-rw-r--r--drivers/block/null_blk/null_blk.h3
-rw-r--r--drivers/block/null_blk/zoned.c6
-rw-r--r--drivers/block/ps3disk.c4
-rw-r--r--drivers/block/rnbd/rnbd-proto.h15
-rw-r--r--drivers/block/rnull/configfs.rs9
-rw-r--r--drivers/block/rnull/rnull.rs3
-rw-r--r--drivers/block/ublk_drv.c407
-rw-r--r--drivers/block/virtio_blk.c24
-rw-r--r--drivers/block/zloop.c160
15 files changed, 461 insertions, 325 deletions
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c
index 85ca000a0564..d90fa3e7f4cf 100644
--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c
@@ -1210,7 +1210,7 @@ static int bm_rw(struct drbd_device *device, const unsigned int flags, unsigned
return err;
}
-/**
+/*
* drbd_bm_read() - Read the whole bitmap from its on disk location.
* @device: DRBD device.
*/
@@ -1221,7 +1221,7 @@ int drbd_bm_read(struct drbd_device *device,
return bm_rw(device, BM_AIO_READ, 0);
}
-/**
+/*
* drbd_bm_write() - Write the whole bitmap to its on disk location.
* @device: DRBD device.
*
@@ -1233,7 +1233,7 @@ int drbd_bm_write(struct drbd_device *device,
return bm_rw(device, 0, 0);
}
-/**
+/*
* drbd_bm_write_all() - Write the whole bitmap to its on disk location.
* @device: DRBD device.
*
@@ -1255,7 +1255,7 @@ int drbd_bm_write_lazy(struct drbd_device *device, unsigned upper_idx) __must_ho
return bm_rw(device, BM_AIO_COPY_PAGES, upper_idx);
}
-/**
+/*
* drbd_bm_write_copy_pages() - Write the whole bitmap to its on disk location.
* @device: DRBD device.
*
@@ -1272,7 +1272,7 @@ int drbd_bm_write_copy_pages(struct drbd_device *device,
return bm_rw(device, BM_AIO_COPY_PAGES, 0);
}
-/**
+/*
* drbd_bm_write_hinted() - Write bitmap pages with "hint" marks, if they have changed.
* @device: DRBD device.
*/
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index caaf2781136d..3de919b6f0e1 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -450,7 +450,7 @@ static struct socket *drbd_try_connect(struct drbd_connection *connection)
* a free one dynamically.
*/
what = "bind before connect";
- err = sock->ops->bind(sock, (struct sockaddr *) &src_in6, my_addr_len);
+ err = sock->ops->bind(sock, (struct sockaddr_unsized *) &src_in6, my_addr_len);
if (err < 0)
goto out;
@@ -458,7 +458,7 @@ static struct socket *drbd_try_connect(struct drbd_connection *connection)
* stay C_WF_CONNECTION, don't go Disconnecting! */
disconnect_on_error = 0;
what = "connect";
- err = sock->ops->connect(sock, (struct sockaddr *) &peer_in6, peer_addr_len, 0);
+ err = sock->ops->connect(sock, (struct sockaddr_unsized *) &peer_in6, peer_addr_len, 0);
out:
if (err < 0) {
@@ -537,7 +537,7 @@ static int prepare_listen_socket(struct drbd_connection *connection, struct acce
drbd_setbufsize(s_listen, sndbuf_size, rcvbuf_size);
what = "bind before listen";
- err = s_listen->ops->bind(s_listen, (struct sockaddr *)&my_addr, my_addr_len);
+ err = s_listen->ops->bind(s_listen, (struct sockaddr_unsized *)&my_addr, my_addr_len);
if (err < 0)
goto out;
@@ -1736,13 +1736,13 @@ read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
page = peer_req->pages;
page_chain_for_each(page) {
unsigned len = min_t(int, ds, PAGE_SIZE);
- data = kmap(page);
+ data = kmap_local_page(page);
err = drbd_recv_all_warn(peer_device->connection, data, len);
if (drbd_insert_fault(device, DRBD_FAULT_RECEIVE)) {
drbd_err(device, "Fault injection: Corrupting data on receive\n");
data[0] = data[0] ^ (unsigned long)-1;
}
- kunmap(page);
+ kunmap_local(data);
if (err) {
drbd_free_peer_req(device, peer_req);
return NULL;
@@ -1777,7 +1777,7 @@ static int drbd_drain_block(struct drbd_peer_device *peer_device, int data_size)
page = drbd_alloc_pages(peer_device, 1, 1);
- data = kmap(page);
+ data = kmap_local_page(page);
while (data_size) {
unsigned int len = min_t(int, data_size, PAGE_SIZE);
@@ -1786,7 +1786,7 @@ static int drbd_drain_block(struct drbd_peer_device *peer_device, int data_size)
break;
data_size -= len;
}
- kunmap(page);
+ kunmap_local(data);
drbd_free_pages(peer_device->device, page);
return err;
}
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 5336c3c5ca36..c28786e0fe1c 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -329,7 +329,7 @@ static bool initialized;
* This default is used whenever the current disk size is unknown.
* [Now it is rather a minimum]
*/
-#define MAX_DISK_SIZE 4 /* 3984 */
+#define MAX_DISK_SIZE (PAGE_SIZE / 1024)
/*
* globals used by 'result()'
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 13ce229d450c..ebe751f39742 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1908,6 +1908,10 @@ static void loop_handle_cmd(struct loop_cmd *cmd)
goto failed;
}
+ /* We can block in this context, so ignore REQ_NOWAIT. */
+ if (rq->cmd_flags & REQ_NOWAIT)
+ rq->cmd_flags &= ~REQ_NOWAIT;
+
if (cmd_blkcg_css)
kthread_associate_blkcg(cmd_blkcg_css);
if (cmd_memcg_css)
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 1188f32a5e5e..f6c33b21f69e 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -565,24 +565,27 @@ static int __sock_xmit(struct nbd_device *nbd, struct socket *sock, int send,
msg.msg_iter = *iter;
noreclaim_flag = memalloc_noreclaim_save();
- do {
- sock->sk->sk_allocation = GFP_NOIO | __GFP_MEMALLOC;
- sock->sk->sk_use_task_frag = false;
- msg.msg_flags = msg_flags | MSG_NOSIGNAL;
-
- if (send)
- result = sock_sendmsg(sock, &msg);
- else
- result = sock_recvmsg(sock, &msg, msg.msg_flags);
-
- if (result <= 0) {
- if (result == 0)
- result = -EPIPE; /* short read */
- break;
- }
- if (sent)
- *sent += result;
- } while (msg_data_left(&msg));
+
+ scoped_with_kernel_creds() {
+ do {
+ sock->sk->sk_allocation = GFP_NOIO | __GFP_MEMALLOC;
+ sock->sk->sk_use_task_frag = false;
+ msg.msg_flags = msg_flags | MSG_NOSIGNAL;
+
+ if (send)
+ result = sock_sendmsg(sock, &msg);
+ else
+ result = sock_recvmsg(sock, &msg, msg.msg_flags);
+
+ if (result <= 0) {
+ if (result == 0)
+ result = -EPIPE; /* short read */
+ break;
+ }
+ if (sent)
+ *sent += result;
+ } while (msg_data_left(&msg));
+ }
memalloc_noreclaim_restore(noreclaim_flag);
@@ -1018,9 +1021,9 @@ static void recv_work(struct work_struct *work)
nbd_mark_nsock_dead(nbd, nsock, 1);
mutex_unlock(&nsock->tx_lock);
- nbd_config_put(nbd);
atomic_dec(&config->recv_threads);
wake_up(&config->recv_wq);
+ nbd_config_put(nbd);
kfree(args);
}
@@ -2235,12 +2238,13 @@ again:
ret = nbd_start_device(nbd);
out:
- mutex_unlock(&nbd->config_lock);
if (!ret) {
set_bit(NBD_RT_HAS_CONFIG_REF, &config->runtime_flags);
refcount_inc(&nbd->config_refs);
nbd_connect_reply(info, nbd->index);
}
+ mutex_unlock(&nbd->config_lock);
+
nbd_config_put(nbd);
if (put_dev)
nbd_put(nbd);
diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c
index f982027e8c85..c7c0fb79a6bf 100644
--- a/drivers/block/null_blk/main.c
+++ b/drivers/block/null_blk/main.c
@@ -1129,26 +1129,28 @@ again:
return 0;
}
-static int copy_to_nullb(struct nullb *nullb, struct page *source,
- unsigned int off, sector_t sector, size_t n, bool is_fua)
+static blk_status_t copy_to_nullb(struct nullb *nullb, void *source,
+ loff_t pos, size_t n, bool is_fua)
{
size_t temp, count = 0;
- unsigned int offset;
struct nullb_page *t_page;
+ sector_t sector;
while (count < n) {
- temp = min_t(size_t, nullb->dev->blocksize, n - count);
+ temp = min3(nullb->dev->blocksize, n - count,
+ PAGE_SIZE - offset_in_page(pos));
+ sector = pos >> SECTOR_SHIFT;
if (null_cache_active(nullb) && !is_fua)
null_make_cache_space(nullb, PAGE_SIZE);
- offset = (sector & SECTOR_MASK) << SECTOR_SHIFT;
t_page = null_insert_page(nullb, sector,
!null_cache_active(nullb) || is_fua);
if (!t_page)
- return -ENOSPC;
+ return BLK_STS_NOSPC;
- memcpy_page(t_page->page, offset, source, off + count, temp);
+ memcpy_to_page(t_page->page, offset_in_page(pos),
+ source + count, temp);
__set_bit(sector & SECTOR_MASK, t_page->bitmap);
@@ -1156,41 +1158,34 @@ static int copy_to_nullb(struct nullb *nullb, struct page *source,
null_free_sector(nullb, sector, true);
count += temp;
- sector += temp >> SECTOR_SHIFT;
+ pos += temp;
}
- return 0;
+ return BLK_STS_OK;
}
-static int copy_from_nullb(struct nullb *nullb, struct page *dest,
- unsigned int off, sector_t sector, size_t n)
+static void copy_from_nullb(struct nullb *nullb, void *dest, loff_t pos,
+ size_t n)
{
size_t temp, count = 0;
- unsigned int offset;
struct nullb_page *t_page;
+ sector_t sector;
while (count < n) {
- temp = min_t(size_t, nullb->dev->blocksize, n - count);
+ temp = min3(nullb->dev->blocksize, n - count,
+ PAGE_SIZE - offset_in_page(pos));
+ sector = pos >> SECTOR_SHIFT;
- offset = (sector & SECTOR_MASK) << SECTOR_SHIFT;
t_page = null_lookup_page(nullb, sector, false,
!null_cache_active(nullb));
-
if (t_page)
- memcpy_page(dest, off + count, t_page->page, offset,
- temp);
+ memcpy_from_page(dest + count, t_page->page,
+ offset_in_page(pos), temp);
else
- memzero_page(dest, off + count, temp);
+ memset(dest + count, 0, temp);
count += temp;
- sector += temp >> SECTOR_SHIFT;
+ pos += temp;
}
- return 0;
-}
-
-static void nullb_fill_pattern(struct nullb *nullb, struct page *page,
- unsigned int len, unsigned int off)
-{
- memset_page(page, off, 0xff, len);
}
blk_status_t null_handle_discard(struct nullb_device *dev,
@@ -1234,34 +1229,39 @@ static blk_status_t null_handle_flush(struct nullb *nullb)
return errno_to_blk_status(err);
}
-static int null_transfer(struct nullb *nullb, struct page *page,
- unsigned int len, unsigned int off, bool is_write, sector_t sector,
+static blk_status_t null_transfer(struct nullb *nullb, struct page *page,
+ unsigned int len, unsigned int off, bool is_write, loff_t pos,
bool is_fua)
{
struct nullb_device *dev = nullb->dev;
+ blk_status_t err = BLK_STS_OK;
unsigned int valid_len = len;
- int err = 0;
+ void *p;
+ p = kmap_local_page(page) + off;
if (!is_write) {
- if (dev->zoned)
+ if (dev->zoned) {
valid_len = null_zone_valid_read_len(nullb,
- sector, len);
+ pos >> SECTOR_SHIFT, len);
+ if (valid_len && valid_len != len)
+ valid_len -= pos & (SECTOR_SIZE - 1);
+ }
if (valid_len) {
- err = copy_from_nullb(nullb, page, off,
- sector, valid_len);
+ copy_from_nullb(nullb, p, pos, valid_len);
off += valid_len;
len -= valid_len;
}
if (len)
- nullb_fill_pattern(nullb, page, len, off);
+ memset(p + valid_len, 0xff, len);
flush_dcache_page(page);
} else {
flush_dcache_page(page);
- err = copy_to_nullb(nullb, page, off, sector, len, is_fua);
+ err = copy_to_nullb(nullb, p, pos, len, is_fua);
}
+ kunmap_local(p);
return err;
}
@@ -1274,9 +1274,9 @@ static blk_status_t null_handle_data_transfer(struct nullb_cmd *cmd,
{
struct request *rq = blk_mq_rq_from_pdu(cmd);
struct nullb *nullb = cmd->nq->dev->nullb;
- int err = 0;
+ blk_status_t err = BLK_STS_OK;
unsigned int len;
- sector_t sector = blk_rq_pos(rq);
+ loff_t pos = blk_rq_pos(rq) << SECTOR_SHIFT;
unsigned int max_bytes = nr_sectors << SECTOR_SHIFT;
unsigned int transferred_bytes = 0;
struct req_iterator iter;
@@ -1288,18 +1288,18 @@ static blk_status_t null_handle_data_transfer(struct nullb_cmd *cmd,
if (transferred_bytes + len > max_bytes)
len = max_bytes - transferred_bytes;
err = null_transfer(nullb, bvec.bv_page, len, bvec.bv_offset,
- op_is_write(req_op(rq)), sector,
+ op_is_write(req_op(rq)), pos,
rq->cmd_flags & REQ_FUA);
if (err)
break;
- sector += len >> SECTOR_SHIFT;
+ pos += len;
transferred_bytes += len;
if (transferred_bytes >= max_bytes)
break;
}
spin_unlock_irq(&nullb->lock);
- return errno_to_blk_status(err);
+ return err;
}
static inline blk_status_t null_handle_throttled(struct nullb_cmd *cmd)
@@ -1949,6 +1949,7 @@ static int null_add_dev(struct nullb_device *dev)
.logical_block_size = dev->blocksize,
.physical_block_size = dev->blocksize,
.max_hw_sectors = dev->max_sectors,
+ .dma_alignment = 1,
};
struct nullb *nullb;
diff --git a/drivers/block/null_blk/null_blk.h b/drivers/block/null_blk/null_blk.h
index 7bb6128dbaaf..6c4c4bbe7dad 100644
--- a/drivers/block/null_blk/null_blk.h
+++ b/drivers/block/null_blk/null_blk.h
@@ -143,7 +143,8 @@ int null_init_zoned_dev(struct nullb_device *dev, struct queue_limits *lim);
int null_register_zoned_dev(struct nullb *nullb);
void null_free_zoned_dev(struct nullb_device *dev);
int null_report_zones(struct gendisk *disk, sector_t sector,
- unsigned int nr_zones, report_zones_cb cb, void *data);
+ unsigned int nr_zones,
+ struct blk_report_zones_args *args);
blk_status_t null_process_zoned_cmd(struct nullb_cmd *cmd, enum req_op op,
sector_t sector, sector_t nr_sectors);
size_t null_zone_valid_read_len(struct nullb *nullb,
diff --git a/drivers/block/null_blk/zoned.c b/drivers/block/null_blk/zoned.c
index 4e5728f45989..0ada35dc0989 100644
--- a/drivers/block/null_blk/zoned.c
+++ b/drivers/block/null_blk/zoned.c
@@ -191,7 +191,7 @@ void null_free_zoned_dev(struct nullb_device *dev)
}
int null_report_zones(struct gendisk *disk, sector_t sector,
- unsigned int nr_zones, report_zones_cb cb, void *data)
+ unsigned int nr_zones, struct blk_report_zones_args *args)
{
struct nullb *nullb = disk->private_data;
struct nullb_device *dev = nullb->dev;
@@ -225,7 +225,7 @@ int null_report_zones(struct gendisk *disk, sector_t sector,
blkz.capacity = zone->capacity;
null_unlock_zone(dev, zone);
- error = cb(&blkz, i, data);
+ error = disk_report_zone(disk, &blkz, i, args);
if (error)
return error;
}
@@ -242,7 +242,7 @@ size_t null_zone_valid_read_len(struct nullb *nullb,
{
struct nullb_device *dev = nullb->dev;
struct nullb_zone *zone = &dev->zones[null_zone_no(dev, sector)];
- unsigned int nr_sectors = len >> SECTOR_SHIFT;
+ unsigned int nr_sectors = DIV_ROUND_UP(len, SECTOR_SIZE);
/* Read must be below the write pointer position */
if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL ||
diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c
index dc9e4a14b885..8892f218a814 100644
--- a/drivers/block/ps3disk.c
+++ b/drivers/block/ps3disk.c
@@ -85,10 +85,14 @@ static void ps3disk_scatter_gather(struct ps3_storage_device *dev,
struct bio_vec bvec;
rq_for_each_segment(bvec, req, iter) {
+ dev_dbg(&dev->sbd.core, "%s:%u: %u sectors from %llu\n",
+ __func__, __LINE__, bio_sectors(iter.bio),
+ iter.bio->bi_iter.bi_sector);
if (gather)
memcpy_from_bvec(dev->bounce_buf + offset, &bvec);
else
memcpy_to_bvec(&bvec, dev->bounce_buf + offset);
+ offset += bvec.bv_len;
}
}
diff --git a/drivers/block/rnbd/rnbd-proto.h b/drivers/block/rnbd/rnbd-proto.h
index f35be51d213c..77360c2a6069 100644
--- a/drivers/block/rnbd/rnbd-proto.h
+++ b/drivers/block/rnbd/rnbd-proto.h
@@ -24,7 +24,7 @@
#define RTRS_PORT 1234
/**
- * enum rnbd_msg_types - RNBD message types
+ * enum rnbd_msg_type - RNBD message types
* @RNBD_MSG_SESS_INFO: initial session info from client to server
* @RNBD_MSG_SESS_INFO_RSP: initial session info from server to client
* @RNBD_MSG_OPEN: open (map) device request
@@ -47,10 +47,11 @@ enum rnbd_msg_type {
*/
struct rnbd_msg_hdr {
__le16 type;
+ /* private: */
__le16 __padding;
};
-/**
+/*
* We allow to map RO many times and RW only once. We allow to map yet another
* time RW, if MIGRATION is provided (second RW export can be required for
* example for VM migration)
@@ -78,6 +79,7 @@ static const __maybe_unused struct {
struct rnbd_msg_sess_info {
struct rnbd_msg_hdr hdr;
u8 ver;
+ /* private: */
u8 reserved[31];
};
@@ -89,6 +91,7 @@ struct rnbd_msg_sess_info {
struct rnbd_msg_sess_info_rsp {
struct rnbd_msg_hdr hdr;
u8 ver;
+ /* private: */
u8 reserved[31];
};
@@ -97,13 +100,16 @@ struct rnbd_msg_sess_info_rsp {
* @hdr: message header
* @access_mode: the mode to open remote device, valid values see:
* enum rnbd_access_mode
- * @device_name: device path on remote side
+ * @dev_name: device path on remote side
*/
struct rnbd_msg_open {
struct rnbd_msg_hdr hdr;
u8 access_mode;
+ /* private: */
u8 resv1;
+ /* public: */
s8 dev_name[NAME_MAX];
+ /* private: */
u8 reserved[3];
};
@@ -155,6 +161,7 @@ struct rnbd_msg_open_rsp {
__le16 secure_discard;
u8 obsolete_rotational;
u8 cache_policy;
+ /* private: */
u8 reserved[10];
};
@@ -187,7 +194,7 @@ struct rnbd_msg_io {
* @RNBD_OP_DISCARD: discard sectors
* @RNBD_OP_SECURE_ERASE: securely erase sectors
* @RNBD_OP_WRITE_ZEROES: write zeroes sectors
-
+ *
* @RNBD_F_SYNC: request is sync (sync write or read)
* @RNBD_F_FUA: forced unit access
*/
diff --git a/drivers/block/rnull/configfs.rs b/drivers/block/rnull/configfs.rs
index 8498e9bae6fd..6713a6d92391 100644
--- a/drivers/block/rnull/configfs.rs
+++ b/drivers/block/rnull/configfs.rs
@@ -1,12 +1,13 @@
// SPDX-License-Identifier: GPL-2.0
use super::{NullBlkDevice, THIS_MODULE};
-use core::fmt::{Display, Write};
use kernel::{
block::mq::gen_disk::{GenDisk, GenDiskBuilder},
c_str,
configfs::{self, AttributeOperations},
- configfs_attrs, new_mutex,
+ configfs_attrs,
+ fmt::{self, Write as _},
+ new_mutex,
page::PAGE_SIZE,
prelude::*,
str::{kstrtobool_bytes, CString},
@@ -99,8 +100,8 @@ impl TryFrom<u8> for IRQMode {
}
}
-impl Display for IRQMode {
- fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+impl fmt::Display for IRQMode {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::None => f.write_str("0")?,
Self::Soft => f.write_str("1")?,
diff --git a/drivers/block/rnull/rnull.rs b/drivers/block/rnull/rnull.rs
index 1ec694d7f1a6..a9d5e575a2c4 100644
--- a/drivers/block/rnull/rnull.rs
+++ b/drivers/block/rnull/rnull.rs
@@ -17,8 +17,7 @@ use kernel::{
error::Result,
pr_info,
prelude::*,
- sync::Arc,
- types::ARef,
+ sync::{aref::ARef, Arc},
};
use pin_init::PinInit;
diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
index 0c74a41a6753..2c715df63f23 100644
--- a/drivers/block/ublk_drv.c
+++ b/drivers/block/ublk_drv.c
@@ -155,12 +155,13 @@ struct ublk_uring_cmd_pdu {
*/
#define UBLK_REFCOUNT_INIT (REFCOUNT_MAX / 2)
+union ublk_io_buf {
+ __u64 addr;
+ struct ublk_auto_buf_reg auto_reg;
+};
+
struct ublk_io {
- /* userspace buffer address from io cmd */
- union {
- __u64 addr;
- struct ublk_auto_buf_reg buf;
- };
+ union ublk_io_buf buf;
unsigned int flags;
int res;
@@ -203,15 +204,12 @@ struct ublk_queue {
bool fail_io; /* copy of dev->state == UBLK_S_DEV_FAIL_IO */
spinlock_t cancel_lock;
struct ublk_device *dev;
- struct ublk_io ios[];
+ struct ublk_io ios[] __counted_by(q_depth);
};
struct ublk_device {
struct gendisk *ub_disk;
- char *__queues;
-
- unsigned int queue_size;
struct ublksrv_ctrl_dev_info dev_info;
struct blk_mq_tag_set tag_set;
@@ -239,6 +237,8 @@ struct ublk_device {
bool canceling;
pid_t ublksrv_tgid;
struct delayed_work exit_work;
+
+ struct ublk_queue *queues[];
};
/* header of ublk_params */
@@ -265,7 +265,7 @@ static inline bool ublk_dev_is_zoned(const struct ublk_device *ub)
return ub->dev_info.flags & UBLK_F_ZONED;
}
-static inline bool ublk_queue_is_zoned(struct ublk_queue *ubq)
+static inline bool ublk_queue_is_zoned(const struct ublk_queue *ubq)
{
return ubq->flags & UBLK_F_ZONED;
}
@@ -368,7 +368,7 @@ static void *ublk_alloc_report_buffer(struct ublk_device *ublk,
}
static int ublk_report_zones(struct gendisk *disk, sector_t sector,
- unsigned int nr_zones, report_zones_cb cb, void *data)
+ unsigned int nr_zones, struct blk_report_zones_args *args)
{
struct ublk_device *ub = disk->private_data;
unsigned int zone_size_sectors = disk->queue->limits.chunk_sectors;
@@ -431,7 +431,7 @@ free_req:
if (!zone->len)
break;
- ret = cb(zone, i, data);
+ ret = disk_report_zone(disk, zone, i, args);
if (ret)
goto out;
@@ -499,7 +499,7 @@ static blk_status_t ublk_setup_iod_zoned(struct ublk_queue *ubq,
iod->op_flags = ublk_op | ublk_req_build_flags(req);
iod->nr_sectors = blk_rq_sectors(req);
iod->start_sector = blk_rq_pos(req);
- iod->addr = io->addr;
+ iod->addr = io->buf.addr;
return BLK_STS_OK;
}
@@ -781,7 +781,7 @@ static noinline void ublk_put_device(struct ublk_device *ub)
static inline struct ublk_queue *ublk_get_queue(struct ublk_device *dev,
int qid)
{
- return (struct ublk_queue *)&(dev->__queues[qid * dev->queue_size]);
+ return dev->queues[qid];
}
static inline bool ublk_rq_has_data(const struct request *rq)
@@ -914,73 +914,6 @@ static const struct block_device_operations ub_fops = {
.report_zones = ublk_report_zones,
};
-#define UBLK_MAX_PIN_PAGES 32
-
-struct ublk_io_iter {
- struct page *pages[UBLK_MAX_PIN_PAGES];
- struct bio *bio;
- struct bvec_iter iter;
-};
-
-/* return how many pages are copied */
-static void ublk_copy_io_pages(struct ublk_io_iter *data,
- size_t total, size_t pg_off, int dir)
-{
- unsigned done = 0;
- unsigned pg_idx = 0;
-
- while (done < total) {
- struct bio_vec bv = bio_iter_iovec(data->bio, data->iter);
- unsigned int bytes = min3(bv.bv_len, (unsigned)total - done,
- (unsigned)(PAGE_SIZE - pg_off));
- void *bv_buf = bvec_kmap_local(&bv);
- void *pg_buf = kmap_local_page(data->pages[pg_idx]);
-
- if (dir == ITER_DEST)
- memcpy(pg_buf + pg_off, bv_buf, bytes);
- else
- memcpy(bv_buf, pg_buf + pg_off, bytes);
-
- kunmap_local(pg_buf);
- kunmap_local(bv_buf);
-
- /* advance page array */
- pg_off += bytes;
- if (pg_off == PAGE_SIZE) {
- pg_idx += 1;
- pg_off = 0;
- }
-
- done += bytes;
-
- /* advance bio */
- bio_advance_iter_single(data->bio, &data->iter, bytes);
- if (!data->iter.bi_size) {
- data->bio = data->bio->bi_next;
- if (data->bio == NULL)
- break;
- data->iter = data->bio->bi_iter;
- }
- }
-}
-
-static bool ublk_advance_io_iter(const struct request *req,
- struct ublk_io_iter *iter, unsigned int offset)
-{
- struct bio *bio = req->bio;
-
- for_each_bio(bio) {
- if (bio->bi_iter.bi_size > offset) {
- iter->bio = bio;
- iter->iter = bio->bi_iter;
- bio_advance_iter(iter->bio, &iter->iter, offset);
- return true;
- }
- offset -= bio->bi_iter.bi_size;
- }
- return false;
-}
-
/*
* Copy data between request pages and io_iter, and 'offset'
* is the start point of linear offset of request.
@@ -988,34 +921,35 @@ static bool ublk_advance_io_iter(const struct request *req,
static size_t ublk_copy_user_pages(const struct request *req,
unsigned offset, struct iov_iter *uiter, int dir)
{
- struct ublk_io_iter iter;
+ struct req_iterator iter;
+ struct bio_vec bv;
size_t done = 0;
- if (!ublk_advance_io_iter(req, &iter, offset))
- return 0;
+ rq_for_each_segment(bv, req, iter) {
+ void *bv_buf;
+ size_t copied;
- while (iov_iter_count(uiter) && iter.bio) {
- unsigned nr_pages;
- ssize_t len;
- size_t off;
- int i;
-
- len = iov_iter_get_pages2(uiter, iter.pages,
- iov_iter_count(uiter),
- UBLK_MAX_PIN_PAGES, &off);
- if (len <= 0)
- return done;
-
- ublk_copy_io_pages(&iter, len, off, dir);
- nr_pages = DIV_ROUND_UP(len + off, PAGE_SIZE);
- for (i = 0; i < nr_pages; i++) {
- if (dir == ITER_DEST)
- set_page_dirty(iter.pages[i]);
- put_page(iter.pages[i]);
+ if (offset >= bv.bv_len) {
+ offset -= bv.bv_len;
+ continue;
}
- done += len;
- }
+ bv.bv_offset += offset;
+ bv.bv_len -= offset;
+ bv_buf = bvec_kmap_local(&bv);
+ if (dir == ITER_DEST)
+ copied = copy_to_iter(bv_buf, bv.bv_len, uiter);
+ else
+ copied = copy_from_iter(bv_buf, bv.bv_len, uiter);
+
+ kunmap_local(bv_buf);
+
+ done += copied;
+ if (copied < bv.bv_len)
+ break;
+
+ offset = 0;
+ }
return done;
}
@@ -1030,8 +964,9 @@ static inline bool ublk_need_unmap_req(const struct request *req)
(req_op(req) == REQ_OP_READ || req_op(req) == REQ_OP_DRV_IN);
}
-static int ublk_map_io(const struct ublk_queue *ubq, const struct request *req,
- const struct ublk_io *io)
+static unsigned int ublk_map_io(const struct ublk_queue *ubq,
+ const struct request *req,
+ const struct ublk_io *io)
{
const unsigned int rq_bytes = blk_rq_bytes(req);
@@ -1047,13 +982,13 @@ static int ublk_map_io(const struct ublk_queue *ubq, const struct request *req,
struct iov_iter iter;
const int dir = ITER_DEST;
- import_ubuf(dir, u64_to_user_ptr(io->addr), rq_bytes, &iter);
+ import_ubuf(dir, u64_to_user_ptr(io->buf.addr), rq_bytes, &iter);
return ublk_copy_user_pages(req, 0, &iter, dir);
}
return rq_bytes;
}
-static int ublk_unmap_io(bool need_map,
+static unsigned int ublk_unmap_io(bool need_map,
const struct request *req,
const struct ublk_io *io)
{
@@ -1068,7 +1003,7 @@ static int ublk_unmap_io(bool need_map,
WARN_ON_ONCE(io->res > rq_bytes);
- import_ubuf(dir, u64_to_user_ptr(io->addr), io->res, &iter);
+ import_ubuf(dir, u64_to_user_ptr(io->buf.addr), io->res, &iter);
return ublk_copy_user_pages(req, 0, &iter, dir);
}
return rq_bytes;
@@ -1134,7 +1069,7 @@ static blk_status_t ublk_setup_iod(struct ublk_queue *ubq, struct request *req)
iod->op_flags = ublk_op | ublk_req_build_flags(req);
iod->nr_sectors = blk_rq_sectors(req);
iod->start_sector = blk_rq_pos(req);
- iod->addr = io->addr;
+ iod->addr = io->buf.addr;
return BLK_STS_OK;
}
@@ -1233,45 +1168,65 @@ static inline void __ublk_abort_rq(struct ublk_queue *ubq,
}
static void
-ublk_auto_buf_reg_fallback(const struct ublk_queue *ubq, struct ublk_io *io)
+ublk_auto_buf_reg_fallback(const struct ublk_queue *ubq, unsigned tag)
{
- unsigned tag = io - ubq->ios;
struct ublksrv_io_desc *iod = ublk_get_iod(ubq, tag);
iod->op_flags |= UBLK_IO_F_NEED_REG_BUF;
}
-static bool ublk_auto_buf_reg(const struct ublk_queue *ubq, struct request *req,
- struct ublk_io *io, unsigned int issue_flags)
+enum auto_buf_reg_res {
+ AUTO_BUF_REG_FAIL,
+ AUTO_BUF_REG_FALLBACK,
+ AUTO_BUF_REG_OK,
+};
+
+static void ublk_prep_auto_buf_reg_io(const struct ublk_queue *ubq,
+ struct request *req, struct ublk_io *io,
+ struct io_uring_cmd *cmd,
+ enum auto_buf_reg_res res)
+{
+ if (res == AUTO_BUF_REG_OK) {
+ io->task_registered_buffers = 1;
+ io->buf_ctx_handle = io_uring_cmd_ctx_handle(cmd);
+ io->flags |= UBLK_IO_FLAG_AUTO_BUF_REG;
+ }
+ ublk_init_req_ref(ubq, io);
+ __ublk_prep_compl_io_cmd(io, req);
+}
+
+static enum auto_buf_reg_res
+__ublk_do_auto_buf_reg(const struct ublk_queue *ubq, struct request *req,
+ struct ublk_io *io, struct io_uring_cmd *cmd,
+ unsigned int issue_flags)
{
int ret;
- ret = io_buffer_register_bvec(io->cmd, req, ublk_io_release,
- io->buf.index, issue_flags);
+ ret = io_buffer_register_bvec(cmd, req, ublk_io_release,
+ io->buf.auto_reg.index, issue_flags);
if (ret) {
- if (io->buf.flags & UBLK_AUTO_BUF_REG_FALLBACK) {
- ublk_auto_buf_reg_fallback(ubq, io);
- return true;
+ if (io->buf.auto_reg.flags & UBLK_AUTO_BUF_REG_FALLBACK) {
+ ublk_auto_buf_reg_fallback(ubq, req->tag);
+ return AUTO_BUF_REG_FALLBACK;
}
blk_mq_end_request(req, BLK_STS_IOERR);
- return false;
+ return AUTO_BUF_REG_FAIL;
}
- io->task_registered_buffers = 1;
- io->buf_ctx_handle = io_uring_cmd_ctx_handle(io->cmd);
- io->flags |= UBLK_IO_FLAG_AUTO_BUF_REG;
- return true;
+ return AUTO_BUF_REG_OK;
}
-static bool ublk_prep_auto_buf_reg(struct ublk_queue *ubq,
- struct request *req, struct ublk_io *io,
- unsigned int issue_flags)
+static void ublk_do_auto_buf_reg(const struct ublk_queue *ubq, struct request *req,
+ struct ublk_io *io, struct io_uring_cmd *cmd,
+ unsigned int issue_flags)
{
- ublk_init_req_ref(ubq, io);
- if (ublk_support_auto_buf_reg(ubq) && ublk_rq_has_data(req))
- return ublk_auto_buf_reg(ubq, req, io, issue_flags);
+ enum auto_buf_reg_res res = __ublk_do_auto_buf_reg(ubq, req, io, cmd,
+ issue_flags);
- return true;
+ if (res != AUTO_BUF_REG_FAIL) {
+ ublk_prep_auto_buf_reg_io(ubq, req, io, cmd, res);
+ io_uring_cmd_done(cmd, UBLK_IO_RES_OK, issue_flags);
+ }
}
static bool ublk_start_io(const struct ublk_queue *ubq, struct request *req,
@@ -1302,10 +1257,9 @@ static bool ublk_start_io(const struct ublk_queue *ubq, struct request *req,
return true;
}
-static void ublk_dispatch_req(struct ublk_queue *ubq,
- struct request *req,
- unsigned int issue_flags)
+static void ublk_dispatch_req(struct ublk_queue *ubq, struct request *req)
{
+ unsigned int issue_flags = IO_URING_CMD_TASK_WORK_ISSUE_FLAGS;
int tag = req->tag;
struct ublk_io *io = &ubq->ios[tag];
@@ -1344,17 +1298,21 @@ static void ublk_dispatch_req(struct ublk_queue *ubq,
if (!ublk_start_io(ubq, req, io))
return;
- if (ublk_prep_auto_buf_reg(ubq, req, io, issue_flags))
+ if (ublk_support_auto_buf_reg(ubq) && ublk_rq_has_data(req)) {
+ ublk_do_auto_buf_reg(ubq, req, io, io->cmd, issue_flags);
+ } else {
+ ublk_init_req_ref(ubq, io);
ublk_complete_io_cmd(io, req, UBLK_IO_RES_OK, issue_flags);
+ }
}
-static void ublk_cmd_tw_cb(struct io_uring_cmd *cmd,
- unsigned int issue_flags)
+static void ublk_cmd_tw_cb(struct io_tw_req tw_req, io_tw_token_t tw)
{
+ struct io_uring_cmd *cmd = io_uring_cmd_from_tw(tw_req);
struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd);
struct ublk_queue *ubq = pdu->ubq;
- ublk_dispatch_req(ubq, pdu->req, issue_flags);
+ ublk_dispatch_req(ubq, pdu->req);
}
static void ublk_queue_cmd(struct ublk_queue *ubq, struct request *rq)
@@ -1366,9 +1324,9 @@ static void ublk_queue_cmd(struct ublk_queue *ubq, struct request *rq)
io_uring_cmd_complete_in_task(cmd, ublk_cmd_tw_cb);
}
-static void ublk_cmd_list_tw_cb(struct io_uring_cmd *cmd,
- unsigned int issue_flags)
+static void ublk_cmd_list_tw_cb(struct io_tw_req tw_req, io_tw_token_t tw)
{
+ struct io_uring_cmd *cmd = io_uring_cmd_from_tw(tw_req);
struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd);
struct request *rq = pdu->req_list;
struct request *next;
@@ -1376,7 +1334,7 @@ static void ublk_cmd_list_tw_cb(struct io_uring_cmd *cmd,
do {
next = rq->rq_next;
rq->rq_next = NULL;
- ublk_dispatch_req(rq->mq_hctx->driver_data, rq, issue_flags);
+ ublk_dispatch_req(rq->mq_hctx->driver_data, rq);
rq = next;
} while (rq);
}
@@ -1537,7 +1495,7 @@ static void ublk_queue_reinit(struct ublk_device *ub, struct ublk_queue *ubq)
*/
io->flags &= UBLK_IO_FLAG_CANCELED;
io->cmd = NULL;
- io->addr = 0;
+ io->buf.addr = 0;
/*
* old task is PF_EXITING, put it now
@@ -2098,13 +2056,16 @@ static inline int ublk_check_cmd_op(u32 cmd_op)
static inline int ublk_set_auto_buf_reg(struct ublk_io *io, struct io_uring_cmd *cmd)
{
- io->buf = ublk_sqe_addr_to_auto_buf_reg(READ_ONCE(cmd->sqe->addr));
+ struct ublk_auto_buf_reg buf;
- if (io->buf.reserved0 || io->buf.reserved1)
+ buf = ublk_sqe_addr_to_auto_buf_reg(READ_ONCE(cmd->sqe->addr));
+
+ if (buf.reserved0 || buf.reserved1)
return -EINVAL;
- if (io->buf.flags & ~UBLK_AUTO_BUF_REG_F_MASK)
+ if (buf.flags & ~UBLK_AUTO_BUF_REG_F_MASK)
return -EINVAL;
+ io->buf.auto_reg = buf;
return 0;
}
@@ -2126,7 +2087,7 @@ static int ublk_handle_auto_buf_reg(struct ublk_io *io,
* this ublk request gets stuck.
*/
if (io->buf_ctx_handle == io_uring_cmd_ctx_handle(cmd))
- *buf_idx = io->buf.index;
+ *buf_idx = io->buf.auto_reg.index;
}
return ublk_set_auto_buf_reg(io, cmd);
@@ -2154,7 +2115,7 @@ ublk_config_io_buf(const struct ublk_device *ub, struct ublk_io *io,
if (ublk_dev_support_auto_buf_reg(ub))
return ublk_handle_auto_buf_reg(io, cmd, buf_idx);
- io->addr = buf_addr;
+ io->buf.addr = buf_addr;
return 0;
}
@@ -2272,39 +2233,41 @@ static int ublk_check_fetch_buf(const struct ublk_device *ub, __u64 buf_addr)
return 0;
}
-static int ublk_fetch(struct io_uring_cmd *cmd, struct ublk_device *ub,
- struct ublk_io *io, __u64 buf_addr)
+static int __ublk_fetch(struct io_uring_cmd *cmd, struct ublk_device *ub,
+ struct ublk_io *io)
{
- int ret = 0;
-
- /*
- * When handling FETCH command for setting up ublk uring queue,
- * ub->mutex is the innermost lock, and we won't block for handling
- * FETCH, so it is fine even for IO_URING_F_NONBLOCK.
- */
- mutex_lock(&ub->mutex);
/* UBLK_IO_FETCH_REQ is only allowed before dev is setup */
- if (ublk_dev_ready(ub)) {
- ret = -EBUSY;
- goto out;
- }
+ if (ublk_dev_ready(ub))
+ return -EBUSY;
/* allow each command to be FETCHed at most once */
- if (io->flags & UBLK_IO_FLAG_ACTIVE) {
- ret = -EINVAL;
- goto out;
- }
+ if (io->flags & UBLK_IO_FLAG_ACTIVE)
+ return -EINVAL;
WARN_ON_ONCE(io->flags & UBLK_IO_FLAG_OWNED_BY_SRV);
ublk_fill_io_cmd(io, cmd);
- ret = ublk_config_io_buf(ub, io, cmd, buf_addr, NULL);
- if (ret)
- goto out;
WRITE_ONCE(io->task, get_task_struct(current));
ublk_mark_io_ready(ub);
-out:
+
+ return 0;
+}
+
+static int ublk_fetch(struct io_uring_cmd *cmd, struct ublk_device *ub,
+ struct ublk_io *io, __u64 buf_addr)
+{
+ int ret;
+
+ /*
+ * When handling FETCH command for setting up ublk uring queue,
+ * ub->mutex is the innermost lock, and we won't block for handling
+ * FETCH, so it is fine even for IO_URING_F_NONBLOCK.
+ */
+ mutex_lock(&ub->mutex);
+ ret = __ublk_fetch(cmd, ub, io);
+ if (!ret)
+ ret = ublk_config_io_buf(ub, io, cmd, buf_addr, NULL);
mutex_unlock(&ub->mutex);
return ret;
}
@@ -2351,7 +2314,7 @@ static bool ublk_get_data(const struct ublk_queue *ubq, struct ublk_io *io,
*/
io->flags &= ~UBLK_IO_FLAG_NEED_GET_DATA;
/* update iod->addr because ublksrv may have passed a new io buffer */
- ublk_get_iod(ubq, req->tag)->addr = io->addr;
+ ublk_get_iod(ubq, req->tag)->addr = io->buf.addr;
pr_devel("%s: update iod->addr: qid %d tag %d io_flags %x addr %llx\n",
__func__, ubq->q_id, req->tag, io->flags,
ublk_get_iod(ubq, req->tag)->addr);
@@ -2367,7 +2330,7 @@ static int ublk_ch_uring_cmd_local(struct io_uring_cmd *cmd,
u16 buf_idx = UBLK_INVALID_BUF_IDX;
struct ublk_device *ub = cmd->file->private_data;
struct ublk_queue *ubq;
- struct ublk_io *io;
+ struct ublk_io *io = NULL;
u32 cmd_op = cmd->cmd_op;
u16 q_id = READ_ONCE(ub_src->q_id);
u16 tag = READ_ONCE(ub_src->tag);
@@ -2488,7 +2451,7 @@ static int ublk_ch_uring_cmd_local(struct io_uring_cmd *cmd,
out:
pr_devel("%s: complete: cmd op %d, tag %d ret %x io_flags %x\n",
- __func__, cmd_op, tag, ret, io->flags);
+ __func__, cmd_op, tag, ret, io ? io->flags : 0);
return ret;
}
@@ -2523,9 +2486,10 @@ fail_put:
return NULL;
}
-static void ublk_ch_uring_cmd_cb(struct io_uring_cmd *cmd,
- unsigned int issue_flags)
+static void ublk_ch_uring_cmd_cb(struct io_tw_req tw_req, io_tw_token_t tw)
{
+ unsigned int issue_flags = IO_URING_CMD_TASK_WORK_ISSUE_FLAGS;
+ struct io_uring_cmd *cmd = io_uring_cmd_from_tw(tw_req);
int ret = ublk_ch_uring_cmd_local(cmd, issue_flags);
if (ret != -EIOCBQUEUED)
@@ -2575,9 +2539,6 @@ static struct request *ublk_check_and_get_req(struct kiocb *iocb,
size_t buf_off;
u16 tag, q_id;
- if (!ub)
- return ERR_PTR(-EACCES);
-
if (!user_backed_iter(iter))
return ERR_PTR(-EACCES);
@@ -2603,9 +2564,6 @@ static struct request *ublk_check_and_get_req(struct kiocb *iocb,
if (!req)
return ERR_PTR(-EINVAL);
- if (!req->mq_hctx || !req->mq_hctx->driver_data)
- goto fail;
-
if (!ublk_check_ubuf_dir(req, dir))
goto fail;
@@ -2662,9 +2620,13 @@ static const struct file_operations ublk_ch_fops = {
static void ublk_deinit_queue(struct ublk_device *ub, int q_id)
{
- int size = ublk_queue_cmd_buf_size(ub);
- struct ublk_queue *ubq = ublk_get_queue(ub, q_id);
- int i;
+ struct ublk_queue *ubq = ub->queues[q_id];
+ int size, i;
+
+ if (!ubq)
+ return;
+
+ size = ublk_queue_cmd_buf_size(ub);
for (i = 0; i < ubq->q_depth; i++) {
struct ublk_io *io = &ubq->ios[i];
@@ -2676,57 +2638,76 @@ static void ublk_deinit_queue(struct ublk_device *ub, int q_id)
if (ubq->io_cmd_buf)
free_pages((unsigned long)ubq->io_cmd_buf, get_order(size));
+
+ kvfree(ubq);
+ ub->queues[q_id] = NULL;
+}
+
+static int ublk_get_queue_numa_node(struct ublk_device *ub, int q_id)
+{
+ unsigned int cpu;
+
+ /* Find first CPU mapped to this queue */
+ for_each_possible_cpu(cpu) {
+ if (ub->tag_set.map[HCTX_TYPE_DEFAULT].mq_map[cpu] == q_id)
+ return cpu_to_node(cpu);
+ }
+
+ return NUMA_NO_NODE;
}
static int ublk_init_queue(struct ublk_device *ub, int q_id)
{
- struct ublk_queue *ubq = ublk_get_queue(ub, q_id);
+ int depth = ub->dev_info.queue_depth;
gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO;
- void *ptr;
+ struct ublk_queue *ubq;
+ struct page *page;
+ int numa_node;
int size;
+ /* Determine NUMA node based on queue's CPU affinity */
+ numa_node = ublk_get_queue_numa_node(ub, q_id);
+
+ /* Allocate queue structure on local NUMA node */
+ ubq = kvzalloc_node(struct_size(ubq, ios, depth), GFP_KERNEL,
+ numa_node);
+ if (!ubq)
+ return -ENOMEM;
+
spin_lock_init(&ubq->cancel_lock);
ubq->flags = ub->dev_info.flags;
ubq->q_id = q_id;
- ubq->q_depth = ub->dev_info.queue_depth;
+ ubq->q_depth = depth;
size = ublk_queue_cmd_buf_size(ub);
- ptr = (void *) __get_free_pages(gfp_flags, get_order(size));
- if (!ptr)
+ /* Allocate I/O command buffer on local NUMA node */
+ page = alloc_pages_node(numa_node, gfp_flags, get_order(size));
+ if (!page) {
+ kvfree(ubq);
return -ENOMEM;
+ }
+ ubq->io_cmd_buf = page_address(page);
- ubq->io_cmd_buf = ptr;
+ ub->queues[q_id] = ubq;
ubq->dev = ub;
return 0;
}
static void ublk_deinit_queues(struct ublk_device *ub)
{
- int nr_queues = ub->dev_info.nr_hw_queues;
int i;
- if (!ub->__queues)
- return;
-
- for (i = 0; i < nr_queues; i++)
+ for (i = 0; i < ub->dev_info.nr_hw_queues; i++)
ublk_deinit_queue(ub, i);
- kvfree(ub->__queues);
}
static int ublk_init_queues(struct ublk_device *ub)
{
- int nr_queues = ub->dev_info.nr_hw_queues;
- int depth = ub->dev_info.queue_depth;
- int ubq_size = sizeof(struct ublk_queue) + depth * sizeof(struct ublk_io);
- int i, ret = -ENOMEM;
+ int i, ret;
- ub->queue_size = ubq_size;
- ub->__queues = kvcalloc(nr_queues, ubq_size, GFP_KERNEL);
- if (!ub->__queues)
- return ret;
-
- for (i = 0; i < nr_queues; i++) {
- if (ublk_init_queue(ub, i))
+ for (i = 0; i < ub->dev_info.nr_hw_queues; i++) {
+ ret = ublk_init_queue(ub, i);
+ if (ret)
goto fail;
}
@@ -3128,7 +3109,7 @@ static int ublk_ctrl_add_dev(const struct ublksrv_ctrl_cmd *header)
goto out_unlock;
ret = -ENOMEM;
- ub = kzalloc(sizeof(*ub), GFP_KERNEL);
+ ub = kzalloc(struct_size(ub, queues, info.nr_hw_queues), GFP_KERNEL);
if (!ub)
goto out_unlock;
mutex_init(&ub->mutex);
@@ -3178,17 +3159,17 @@ static int ublk_ctrl_add_dev(const struct ublksrv_ctrl_cmd *header)
ub->dev_info.nr_hw_queues, nr_cpu_ids);
ublk_align_max_io_size(ub);
- ret = ublk_init_queues(ub);
+ ret = ublk_add_tag_set(ub);
if (ret)
goto out_free_dev_number;
- ret = ublk_add_tag_set(ub);
+ ret = ublk_init_queues(ub);
if (ret)
- goto out_deinit_queues;
+ goto out_free_tag_set;
ret = -EFAULT;
if (copy_to_user(argp, &ub->dev_info, sizeof(info)))
- goto out_free_tag_set;
+ goto out_deinit_queues;
/*
* Add the char dev so that ublksrv daemon can be setup.
@@ -3197,10 +3178,10 @@ static int ublk_ctrl_add_dev(const struct ublksrv_ctrl_cmd *header)
ret = ublk_add_chdev(ub);
goto out_unlock;
-out_free_tag_set:
- blk_mq_free_tag_set(&ub->tag_set);
out_deinit_queues:
ublk_deinit_queues(ub);
+out_free_tag_set:
+ blk_mq_free_tag_set(&ub->tag_set);
out_free_dev_number:
ublk_free_dev_number(ub);
out_free_ub:
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index f061420dfb10..357434bdae99 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -584,7 +584,8 @@ out:
static int virtblk_parse_zone(struct virtio_blk *vblk,
struct virtio_blk_zone_descriptor *entry,
- unsigned int idx, report_zones_cb cb, void *data)
+ unsigned int idx,
+ struct blk_report_zones_args *args)
{
struct blk_zone zone = { };
@@ -650,12 +651,12 @@ static int virtblk_parse_zone(struct virtio_blk *vblk,
* The callback below checks the validity of the reported
* entry data, no need to further validate it here.
*/
- return cb(&zone, idx, data);
+ return disk_report_zone(vblk->disk, &zone, idx, args);
}
static int virtblk_report_zones(struct gendisk *disk, sector_t sector,
- unsigned int nr_zones, report_zones_cb cb,
- void *data)
+ unsigned int nr_zones,
+ struct blk_report_zones_args *args)
{
struct virtio_blk *vblk = disk->private_data;
struct virtio_blk_zone_report *report;
@@ -693,7 +694,7 @@ static int virtblk_report_zones(struct gendisk *disk, sector_t sector,
for (i = 0; i < nz && zone_idx < nr_zones; i++) {
ret = virtblk_parse_zone(vblk, &report->zones[i],
- zone_idx, cb, data);
+ zone_idx, args);
if (ret)
goto fail_report;
@@ -1026,8 +1027,13 @@ static int init_vq(struct virtio_blk *vblk)
out:
kfree(vqs);
kfree(vqs_info);
- if (err)
+ if (err) {
kfree(vblk->vqs);
+ /*
+ * Set to NULL to prevent freeing vqs again during freezing.
+ */
+ vblk->vqs = NULL;
+ }
return err;
}
@@ -1598,6 +1604,12 @@ static int virtblk_freeze_priv(struct virtio_device *vdev)
vdev->config->del_vqs(vdev);
kfree(vblk->vqs);
+ /*
+ * Set to NULL to prevent freeing vqs again after a failed vqs
+ * allocation during resume. Note that kfree() already handles NULL
+ * pointers safely.
+ */
+ vblk->vqs = NULL;
return 0;
}
diff --git a/drivers/block/zloop.c b/drivers/block/zloop.c
index a423228e201b..3f50321aa4a7 100644
--- a/drivers/block/zloop.c
+++ b/drivers/block/zloop.c
@@ -32,6 +32,8 @@ enum {
ZLOOP_OPT_NR_QUEUES = (1 << 6),
ZLOOP_OPT_QUEUE_DEPTH = (1 << 7),
ZLOOP_OPT_BUFFERED_IO = (1 << 8),
+ ZLOOP_OPT_ZONE_APPEND = (1 << 9),
+ ZLOOP_OPT_ORDERED_ZONE_APPEND = (1 << 10),
};
static const match_table_t zloop_opt_tokens = {
@@ -44,6 +46,8 @@ static const match_table_t zloop_opt_tokens = {
{ ZLOOP_OPT_NR_QUEUES, "nr_queues=%u" },
{ ZLOOP_OPT_QUEUE_DEPTH, "queue_depth=%u" },
{ ZLOOP_OPT_BUFFERED_IO, "buffered_io" },
+ { ZLOOP_OPT_ZONE_APPEND, "zone_append=%u" },
+ { ZLOOP_OPT_ORDERED_ZONE_APPEND, "ordered_zone_append" },
{ ZLOOP_OPT_ERR, NULL }
};
@@ -56,6 +60,8 @@ static const match_table_t zloop_opt_tokens = {
#define ZLOOP_DEF_NR_QUEUES 1
#define ZLOOP_DEF_QUEUE_DEPTH 128
#define ZLOOP_DEF_BUFFERED_IO false
+#define ZLOOP_DEF_ZONE_APPEND true
+#define ZLOOP_DEF_ORDERED_ZONE_APPEND false
/* Arbitrary limit on the zone size (16GB). */
#define ZLOOP_MAX_ZONE_SIZE_MB 16384
@@ -71,6 +77,8 @@ struct zloop_options {
unsigned int nr_queues;
unsigned int queue_depth;
bool buffered_io;
+ bool zone_append;
+ bool ordered_zone_append;
};
/*
@@ -92,6 +100,7 @@ struct zloop_zone {
unsigned long flags;
struct mutex lock;
+ spinlock_t wp_lock;
enum blk_zone_cond cond;
sector_t start;
sector_t wp;
@@ -108,6 +117,8 @@ struct zloop_device {
struct workqueue_struct *workqueue;
bool buffered_io;
+ bool zone_append;
+ bool ordered_zone_append;
const char *base_dir;
struct file *data_dir;
@@ -147,6 +158,7 @@ static int zloop_update_seq_zone(struct zloop_device *zlo, unsigned int zone_no)
struct zloop_zone *zone = &zlo->zones[zone_no];
struct kstat stat;
sector_t file_sectors;
+ unsigned long flags;
int ret;
lockdep_assert_held(&zone->lock);
@@ -172,16 +184,18 @@ static int zloop_update_seq_zone(struct zloop_device *zlo, unsigned int zone_no)
return -EINVAL;
}
+ spin_lock_irqsave(&zone->wp_lock, flags);
if (!file_sectors) {
zone->cond = BLK_ZONE_COND_EMPTY;
zone->wp = zone->start;
} else if (file_sectors == zlo->zone_capacity) {
zone->cond = BLK_ZONE_COND_FULL;
- zone->wp = zone->start + zlo->zone_size;
+ zone->wp = ULLONG_MAX;
} else {
zone->cond = BLK_ZONE_COND_CLOSED;
zone->wp = zone->start + file_sectors;
}
+ spin_unlock_irqrestore(&zone->wp_lock, flags);
return 0;
}
@@ -225,6 +239,7 @@ unlock:
static int zloop_close_zone(struct zloop_device *zlo, unsigned int zone_no)
{
struct zloop_zone *zone = &zlo->zones[zone_no];
+ unsigned long flags;
int ret = 0;
if (test_bit(ZLOOP_ZONE_CONV, &zone->flags))
@@ -243,10 +258,12 @@ static int zloop_close_zone(struct zloop_device *zlo, unsigned int zone_no)
break;
case BLK_ZONE_COND_IMP_OPEN:
case BLK_ZONE_COND_EXP_OPEN:
+ spin_lock_irqsave(&zone->wp_lock, flags);
if (zone->wp == zone->start)
zone->cond = BLK_ZONE_COND_EMPTY;
else
zone->cond = BLK_ZONE_COND_CLOSED;
+ spin_unlock_irqrestore(&zone->wp_lock, flags);
break;
case BLK_ZONE_COND_EMPTY:
case BLK_ZONE_COND_FULL:
@@ -264,6 +281,7 @@ unlock:
static int zloop_reset_zone(struct zloop_device *zlo, unsigned int zone_no)
{
struct zloop_zone *zone = &zlo->zones[zone_no];
+ unsigned long flags;
int ret = 0;
if (test_bit(ZLOOP_ZONE_CONV, &zone->flags))
@@ -281,9 +299,11 @@ static int zloop_reset_zone(struct zloop_device *zlo, unsigned int zone_no)
goto unlock;
}
+ spin_lock_irqsave(&zone->wp_lock, flags);
zone->cond = BLK_ZONE_COND_EMPTY;
zone->wp = zone->start;
clear_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags);
+ spin_unlock_irqrestore(&zone->wp_lock, flags);
unlock:
mutex_unlock(&zone->lock);
@@ -308,6 +328,7 @@ static int zloop_reset_all_zones(struct zloop_device *zlo)
static int zloop_finish_zone(struct zloop_device *zlo, unsigned int zone_no)
{
struct zloop_zone *zone = &zlo->zones[zone_no];
+ unsigned long flags;
int ret = 0;
if (test_bit(ZLOOP_ZONE_CONV, &zone->flags))
@@ -325,9 +346,11 @@ static int zloop_finish_zone(struct zloop_device *zlo, unsigned int zone_no)
goto unlock;
}
+ spin_lock_irqsave(&zone->wp_lock, flags);
zone->cond = BLK_ZONE_COND_FULL;
- zone->wp = zone->start + zlo->zone_size;
+ zone->wp = ULLONG_MAX;
clear_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags);
+ spin_unlock_irqrestore(&zone->wp_lock, flags);
unlock:
mutex_unlock(&zone->lock);
@@ -369,6 +392,7 @@ static void zloop_rw(struct zloop_cmd *cmd)
struct zloop_zone *zone;
struct iov_iter iter;
struct bio_vec tmp;
+ unsigned long flags;
sector_t zone_end;
int nr_bvec = 0;
int ret;
@@ -378,6 +402,11 @@ static void zloop_rw(struct zloop_cmd *cmd)
cmd->nr_sectors = nr_sectors;
cmd->ret = 0;
+ if (WARN_ON_ONCE(is_append && !zlo->zone_append)) {
+ ret = -EIO;
+ goto out;
+ }
+
/* We should never get an I/O beyond the device capacity. */
if (WARN_ON_ONCE(zone_no >= zlo->nr_zones)) {
ret = -EIO;
@@ -406,16 +435,31 @@ static void zloop_rw(struct zloop_cmd *cmd)
if (!test_bit(ZLOOP_ZONE_CONV, &zone->flags) && is_write) {
mutex_lock(&zone->lock);
- if (is_append) {
- sector = zone->wp;
- cmd->sector = sector;
- }
+ spin_lock_irqsave(&zone->wp_lock, flags);
/*
- * Write operations must be aligned to the write pointer and
- * fully contained within the zone capacity.
+ * Zone append operations always go at the current write
+ * pointer, but regular write operations must already be
+ * aligned to the write pointer when submitted.
*/
- if (sector != zone->wp || zone->wp + nr_sectors > zone_end) {
+ if (is_append) {
+ /*
+ * If ordered zone append is in use, we already checked
+ * and set the target sector in zloop_queue_rq().
+ */
+ if (!zlo->ordered_zone_append) {
+ if (zone->cond == BLK_ZONE_COND_FULL ||
+ zone->wp + nr_sectors > zone_end) {
+ spin_unlock_irqrestore(&zone->wp_lock,
+ flags);
+ ret = -EIO;
+ goto unlock;
+ }
+ sector = zone->wp;
+ }
+ cmd->sector = sector;
+ } else if (sector != zone->wp) {
+ spin_unlock_irqrestore(&zone->wp_lock, flags);
pr_err("Zone %u: unaligned write: sect %llu, wp %llu\n",
zone_no, sector, zone->wp);
ret = -EIO;
@@ -428,13 +472,19 @@ static void zloop_rw(struct zloop_cmd *cmd)
zone->cond = BLK_ZONE_COND_IMP_OPEN;
/*
- * Advance the write pointer of sequential zones. If the write
- * fails, the wp position will be corrected when the next I/O
- * copmpletes.
+ * Advance the write pointer, unless ordered zone append is in
+ * use. If the write fails, the write pointer position will be
+ * corrected when the next I/O starts execution.
*/
- zone->wp += nr_sectors;
- if (zone->wp == zone_end)
- zone->cond = BLK_ZONE_COND_FULL;
+ if (!is_append || !zlo->ordered_zone_append) {
+ zone->wp += nr_sectors;
+ if (zone->wp == zone_end) {
+ zone->cond = BLK_ZONE_COND_FULL;
+ zone->wp = ULLONG_MAX;
+ }
+ }
+
+ spin_unlock_irqrestore(&zone->wp_lock, flags);
}
rq_for_each_bvec(tmp, rq, rq_iter)
@@ -498,6 +548,10 @@ static void zloop_handle_cmd(struct zloop_cmd *cmd)
struct request *rq = blk_mq_rq_from_pdu(cmd);
struct zloop_device *zlo = rq->q->queuedata;
+ /* We can block in this context, so ignore REQ_NOWAIT. */
+ if (rq->cmd_flags & REQ_NOWAIT)
+ rq->cmd_flags &= ~REQ_NOWAIT;
+
switch (req_op(rq)) {
case REQ_OP_READ:
case REQ_OP_WRITE:
@@ -608,6 +662,35 @@ static void zloop_complete_rq(struct request *rq)
blk_mq_end_request(rq, sts);
}
+static bool zloop_set_zone_append_sector(struct request *rq)
+{
+ struct zloop_device *zlo = rq->q->queuedata;
+ unsigned int zone_no = rq_zone_no(rq);
+ struct zloop_zone *zone = &zlo->zones[zone_no];
+ sector_t zone_end = zone->start + zlo->zone_capacity;
+ sector_t nr_sectors = blk_rq_sectors(rq);
+ unsigned long flags;
+
+ spin_lock_irqsave(&zone->wp_lock, flags);
+
+ if (zone->cond == BLK_ZONE_COND_FULL ||
+ zone->wp + nr_sectors > zone_end) {
+ spin_unlock_irqrestore(&zone->wp_lock, flags);
+ return false;
+ }
+
+ rq->__sector = zone->wp;
+ zone->wp += blk_rq_sectors(rq);
+ if (zone->wp >= zone_end) {
+ zone->cond = BLK_ZONE_COND_FULL;
+ zone->wp = ULLONG_MAX;
+ }
+
+ spin_unlock_irqrestore(&zone->wp_lock, flags);
+
+ return true;
+}
+
static blk_status_t zloop_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
@@ -618,6 +701,16 @@ static blk_status_t zloop_queue_rq(struct blk_mq_hw_ctx *hctx,
if (zlo->state == Zlo_deleting)
return BLK_STS_IOERR;
+ /*
+ * If we need to strongly order zone append operations, set the request
+ * sector to the zone write pointer location now instead of when the
+ * command work runs.
+ */
+ if (zlo->ordered_zone_append && req_op(rq) == REQ_OP_ZONE_APPEND) {
+ if (!zloop_set_zone_append_sector(rq))
+ return BLK_STS_IOERR;
+ }
+
blk_mq_start_request(rq);
INIT_WORK(&cmd->work, zloop_cmd_workfn);
@@ -647,11 +740,12 @@ static int zloop_open(struct gendisk *disk, blk_mode_t mode)
}
static int zloop_report_zones(struct gendisk *disk, sector_t sector,
- unsigned int nr_zones, report_zones_cb cb, void *data)
+ unsigned int nr_zones, struct blk_report_zones_args *args)
{
struct zloop_device *zlo = disk->private_data;
struct blk_zone blkz = {};
unsigned int first, i;
+ unsigned long flags;
int ret;
first = disk_zone_no(disk, sector);
@@ -675,7 +769,9 @@ static int zloop_report_zones(struct gendisk *disk, sector_t sector,
blkz.start = zone->start;
blkz.len = zlo->zone_size;
+ spin_lock_irqsave(&zone->wp_lock, flags);
blkz.wp = zone->wp;
+ spin_unlock_irqrestore(&zone->wp_lock, flags);
blkz.cond = zone->cond;
if (test_bit(ZLOOP_ZONE_CONV, &zone->flags)) {
blkz.type = BLK_ZONE_TYPE_CONVENTIONAL;
@@ -687,7 +783,7 @@ static int zloop_report_zones(struct gendisk *disk, sector_t sector,
mutex_unlock(&zone->lock);
- ret = cb(&blkz, i, data);
+ ret = disk_report_zone(disk, &blkz, i, args);
if (ret)
return ret;
}
@@ -783,6 +879,7 @@ static int zloop_init_zone(struct zloop_device *zlo, struct zloop_options *opts,
int ret;
mutex_init(&zone->lock);
+ spin_lock_init(&zone->wp_lock);
zone->start = (sector_t)zone_no << zlo->zone_shift;
if (!restore)
@@ -884,7 +981,6 @@ static int zloop_ctl_add(struct zloop_options *opts)
{
struct queue_limits lim = {
.max_hw_sectors = SZ_1M >> SECTOR_SHIFT,
- .max_hw_zone_append_sectors = SZ_1M >> SECTOR_SHIFT,
.chunk_sectors = opts->zone_size,
.features = BLK_FEAT_ZONED,
};
@@ -936,6 +1032,9 @@ static int zloop_ctl_add(struct zloop_options *opts)
zlo->nr_zones = nr_zones;
zlo->nr_conv_zones = opts->nr_conv_zones;
zlo->buffered_io = opts->buffered_io;
+ zlo->zone_append = opts->zone_append;
+ if (zlo->zone_append)
+ zlo->ordered_zone_append = opts->ordered_zone_append;
zlo->workqueue = alloc_workqueue("zloop%d", WQ_UNBOUND | WQ_FREEZABLE,
opts->nr_queues * opts->queue_depth, zlo->id);
@@ -976,6 +1075,8 @@ static int zloop_ctl_add(struct zloop_options *opts)
lim.physical_block_size = zlo->block_size;
lim.logical_block_size = zlo->block_size;
+ if (zlo->zone_append)
+ lim.max_hw_zone_append_sectors = lim.max_hw_sectors;
zlo->tag_set.ops = &zloop_mq_ops;
zlo->tag_set.nr_hw_queues = opts->nr_queues;
@@ -1016,10 +1117,14 @@ static int zloop_ctl_add(struct zloop_options *opts)
zlo->state = Zlo_live;
mutex_unlock(&zloop_ctl_mutex);
- pr_info("Added device %d: %u zones of %llu MB, %u B block size\n",
+ pr_info("zloop: device %d, %u zones of %llu MiB, %u B block size\n",
zlo->id, zlo->nr_zones,
((sector_t)zlo->zone_size << SECTOR_SHIFT) >> 20,
zlo->block_size);
+ pr_info("zloop%d: using %s%s zone append\n",
+ zlo->id,
+ zlo->ordered_zone_append ? "ordered " : "",
+ zlo->zone_append ? "native" : "emulated");
return 0;
@@ -1106,6 +1211,8 @@ static int zloop_parse_options(struct zloop_options *opts, const char *buf)
opts->nr_queues = ZLOOP_DEF_NR_QUEUES;
opts->queue_depth = ZLOOP_DEF_QUEUE_DEPTH;
opts->buffered_io = ZLOOP_DEF_BUFFERED_IO;
+ opts->zone_append = ZLOOP_DEF_ZONE_APPEND;
+ opts->ordered_zone_append = ZLOOP_DEF_ORDERED_ZONE_APPEND;
if (!buf)
return 0;
@@ -1215,6 +1322,21 @@ static int zloop_parse_options(struct zloop_options *opts, const char *buf)
case ZLOOP_OPT_BUFFERED_IO:
opts->buffered_io = true;
break;
+ case ZLOOP_OPT_ZONE_APPEND:
+ if (match_uint(args, &token)) {
+ ret = -EINVAL;
+ goto out;
+ }
+ if (token != 0 && token != 1) {
+ pr_err("Invalid zone_append value\n");
+ ret = -EINVAL;
+ goto out;
+ }
+ opts->zone_append = token;
+ break;
+ case ZLOOP_OPT_ORDERED_ZONE_APPEND:
+ opts->ordered_zone_append = true;
+ break;
case ZLOOP_OPT_ERR:
default:
pr_warn("unknown parameter or missing value '%s'\n", p);