summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
Diffstat (limited to 'include')
-rw-r--r--include/linux/backing-dev-defs.h4
-rw-r--r--include/linux/bio-integrity.h7
-rw-r--r--include/linux/bio.h2
-rw-r--r--include/linux/blk-integrity.h19
-rw-r--r--include/linux/blk-mq-dma.h28
-rw-r--r--include/linux/blk-mq.h30
-rw-r--r--include/linux/blk_types.h14
-rw-r--r--include/linux/blkdev.h62
-rw-r--r--include/linux/blktrace_api.h3
-rw-r--r--include/linux/device-mapper.h10
-rw-r--r--include/linux/kfifo.h34
-rw-r--r--include/linux/sbitmap.h6
-rw-r--r--include/uapi/linux/blktrace_api.h55
-rw-r--r--include/uapi/linux/blkzoned.h46
-rw-r--r--include/uapi/linux/fs.h3
-rw-r--r--include/uapi/linux/raid/md_p.h3
16 files changed, 244 insertions, 82 deletions
diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h
index 610ef62b6a32..0217c1073735 100644
--- a/include/linux/backing-dev-defs.h
+++ b/include/linux/backing-dev-defs.h
@@ -170,7 +170,9 @@ struct backing_dev_info {
u64 id;
struct rb_node rb_node; /* keyed by ->id */
struct list_head bdi_list;
- unsigned long ra_pages; /* max readahead in PAGE_SIZE units */
+ /* max readahead in PAGE_SIZE units */
+ unsigned long __data_racy ra_pages;
+
unsigned long io_pages; /* max allowed IO size */
struct kref refcnt; /* Reference counter for the structure */
diff --git a/include/linux/bio-integrity.h b/include/linux/bio-integrity.h
index 851254f36eb3..21e4652dcfd2 100644
--- a/include/linux/bio-integrity.h
+++ b/include/linux/bio-integrity.h
@@ -13,7 +13,8 @@ enum bip_flags {
BIP_CHECK_GUARD = 1 << 5, /* guard check */
BIP_CHECK_REFTAG = 1 << 6, /* reftag check */
BIP_CHECK_APPTAG = 1 << 7, /* apptag check */
- BIP_P2P_DMA = 1 << 8, /* using P2P address */
+
+ BIP_MEMPOOL = 1 << 15, /* buffer backed by mempool */
};
struct bio_integrity_payload {
@@ -140,4 +141,8 @@ static inline int bio_integrity_add_page(struct bio *bio, struct page *page,
return 0;
}
#endif /* CONFIG_BLK_DEV_INTEGRITY */
+
+void bio_integrity_alloc_buf(struct bio *bio, bool zero_buffer);
+void bio_integrity_free_buf(struct bio_integrity_payload *bip);
+
#endif /* _LINUX_BIO_INTEGRITY_H */
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 16c1c85613b7..ad2d57908c1c 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -324,6 +324,8 @@ extern struct bio *bio_split(struct bio *bio, int sectors,
gfp_t gfp, struct bio_set *bs);
int bio_split_io_at(struct bio *bio, const struct queue_limits *lim,
unsigned *segs, unsigned max_bytes, unsigned len_align);
+u8 bio_seg_gap(struct request_queue *q, struct bio *prev, struct bio *next,
+ u8 gaps_bit);
/**
* bio_next_split - get next @sectors from a bio, splitting if necessary
diff --git a/include/linux/blk-integrity.h b/include/linux/blk-integrity.h
index b659373788f6..a6b84206eb94 100644
--- a/include/linux/blk-integrity.h
+++ b/include/linux/blk-integrity.h
@@ -8,6 +8,11 @@
struct request;
+/*
+ * Maximum contiguous integrity buffer allocation.
+ */
+#define BLK_INTEGRITY_MAX_SIZE SZ_2M
+
enum blk_integrity_flags {
BLK_INTEGRITY_NOVERIFY = 1 << 0,
BLK_INTEGRITY_NOGENERATE = 1 << 1,
@@ -28,14 +33,6 @@ static inline bool queue_limits_stack_integrity_bdev(struct queue_limits *t,
#ifdef CONFIG_BLK_DEV_INTEGRITY
int blk_rq_map_integrity_sg(struct request *, struct scatterlist *);
-static inline bool blk_rq_integrity_dma_unmap(struct request *req,
- struct device *dma_dev, struct dma_iova_state *state,
- size_t mapped_len)
-{
- return blk_dma_unmap(req, dma_dev, state, mapped_len,
- bio_integrity(req->bio)->bip_flags & BIP_P2P_DMA);
-}
-
int blk_rq_count_integrity_sg(struct request_queue *, struct bio *);
int blk_rq_integrity_map_user(struct request *rq, void __user *ubuf,
ssize_t bytes);
@@ -124,12 +121,6 @@ static inline int blk_rq_map_integrity_sg(struct request *q,
{
return 0;
}
-static inline bool blk_rq_integrity_dma_unmap(struct request *req,
- struct device *dma_dev, struct dma_iova_state *state,
- size_t mapped_len)
-{
- return false;
-}
static inline int blk_rq_integrity_map_user(struct request *rq,
void __user *ubuf,
ssize_t bytes)
diff --git a/include/linux/blk-mq-dma.h b/include/linux/blk-mq-dma.h
index 51829958d872..cb88fc791fbd 100644
--- a/include/linux/blk-mq-dma.h
+++ b/include/linux/blk-mq-dma.h
@@ -16,13 +16,13 @@ struct blk_dma_iter {
/* Output address range for this iteration */
dma_addr_t addr;
u32 len;
+ struct pci_p2pdma_map_state p2pdma;
/* Status code. Only valid when blk_rq_dma_map_iter_* returned false */
blk_status_t status;
/* Internal to blk_rq_dma_map_iter_* */
struct blk_map_iter iter;
- struct pci_p2pdma_map_state p2pdma;
};
bool blk_rq_dma_map_iter_start(struct request *req, struct device *dma_dev,
@@ -43,36 +43,34 @@ static inline bool blk_rq_dma_map_coalesce(struct dma_iova_state *state)
}
/**
- * blk_dma_unmap - try to DMA unmap a request
+ * blk_rq_dma_unmap - try to DMA unmap a request
* @req: request to unmap
* @dma_dev: device to unmap from
* @state: DMA IOVA state
* @mapped_len: number of bytes to unmap
- * @is_p2p: true if mapped with PCI_P2PDMA_MAP_BUS_ADDR
+ * @map: peer-to-peer mapping type
*
* Returns %false if the callers need to manually unmap every DMA segment
* mapped using @iter or %true if no work is left to be done.
*/
-static inline bool blk_dma_unmap(struct request *req, struct device *dma_dev,
- struct dma_iova_state *state, size_t mapped_len, bool is_p2p)
+static inline bool blk_rq_dma_unmap(struct request *req, struct device *dma_dev,
+ struct dma_iova_state *state, size_t mapped_len,
+ enum pci_p2pdma_map_type map)
{
- if (is_p2p)
+ if (map == PCI_P2PDMA_MAP_BUS_ADDR)
return true;
if (dma_use_iova(state)) {
+ unsigned int attrs = 0;
+
+ if (map == PCI_P2PDMA_MAP_THRU_HOST_BRIDGE)
+ attrs |= DMA_ATTR_MMIO;
+
dma_iova_destroy(dma_dev, state, mapped_len, rq_dma_dir(req),
- 0);
+ attrs);
return true;
}
return !dma_need_unmap(dma_dev);
}
-
-static inline bool blk_rq_dma_unmap(struct request *req, struct device *dma_dev,
- struct dma_iova_state *state, size_t mapped_len)
-{
- return blk_dma_unmap(req, dma_dev, state, mapped_len,
- req->cmd_flags & REQ_P2PDMA);
-}
-
#endif /* BLK_MQ_DMA_H */
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index b25d12545f46..eb7254b3dddd 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -152,6 +152,14 @@ struct request {
unsigned short nr_phys_segments;
unsigned short nr_integrity_segments;
+ /*
+ * The lowest set bit for address gaps between physical segments. This
+ * provides information necessary for dma optimization opprotunities,
+ * like for testing if the segments can be coalesced against the
+ * device's iommu granule.
+ */
+ unsigned char phys_gap_bit;
+
#ifdef CONFIG_BLK_INLINE_ENCRYPTION
struct bio_crypt_ctx *crypt_ctx;
struct blk_crypto_keyslot *crypt_keyslot;
@@ -208,6 +216,14 @@ struct request {
void *end_io_data;
};
+/*
+ * Returns a mask with all bits starting at req->phys_gap_bit set to 1.
+ */
+static inline unsigned long req_phys_gap_mask(const struct request *req)
+{
+ return ~(((1 << req->phys_gap_bit) >> 1) - 1);
+}
+
static inline enum req_op req_op(const struct request *req)
{
return req->cmd_flags & REQ_OP_MASK;
@@ -999,8 +1015,20 @@ static inline void *blk_mq_rq_to_pdu(struct request *rq)
return rq + 1;
}
+static inline struct blk_mq_hw_ctx *queue_hctx(struct request_queue *q, int id)
+{
+ struct blk_mq_hw_ctx *hctx;
+
+ rcu_read_lock();
+ hctx = rcu_dereference(q->queue_hw_ctx)[id];
+ rcu_read_unlock();
+
+ return hctx;
+}
+
#define queue_for_each_hw_ctx(q, hctx, i) \
- xa_for_each(&(q)->hctx_table, (i), (hctx))
+ for ((i) = 0; (i) < (q)->nr_hw_queues && \
+ ({ hctx = queue_hctx((q), i); 1; }); (i)++)
#define hctx_for_each_ctx(hctx, ctx, i) \
for ((i) = 0; (i) < (hctx)->nr_ctx && \
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 44c30183ecc3..cbbcb9051ec3 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -218,6 +218,18 @@ struct bio {
enum rw_hint bi_write_hint;
u8 bi_write_stream;
blk_status_t bi_status;
+
+ /*
+ * The bvec gap bit indicates the lowest set bit in any address offset
+ * between all bi_io_vecs. This field is initialized only after the bio
+ * is split to the hardware limits (see bio_split_io_at()). The value
+ * may be used to consider DMA optimization when performing that
+ * mapping. The value is compared to a power of two mask where the
+ * result depends on any bit set within the mask, so saving the lowest
+ * bit is sufficient to know if any segment gap collides with the mask.
+ */
+ u8 bi_bvec_gap_bit;
+
atomic_t __bi_remaining;
struct bvec_iter bi_iter;
@@ -381,7 +393,6 @@ enum req_flag_bits {
__REQ_DRV, /* for driver use */
__REQ_FS_PRIVATE, /* for file system (submitter) use */
__REQ_ATOMIC, /* for atomic write operations */
- __REQ_P2PDMA, /* contains P2P DMA pages */
/*
* Command specific flags, keep last:
*/
@@ -414,7 +425,6 @@ enum req_flag_bits {
#define REQ_DRV (__force blk_opf_t)(1ULL << __REQ_DRV)
#define REQ_FS_PRIVATE (__force blk_opf_t)(1ULL << __REQ_FS_PRIVATE)
#define REQ_ATOMIC (__force blk_opf_t)(1ULL << __REQ_ATOMIC)
-#define REQ_P2PDMA (__force blk_opf_t)(1ULL << __REQ_P2PDMA)
#define REQ_NOUNMAP (__force blk_opf_t)(1ULL << __REQ_NOUNMAP)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 70b671a9a7f7..72e34acd439c 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -38,6 +38,7 @@ struct blk_flush_queue;
struct kiocb;
struct pr_ops;
struct rq_qos;
+struct blk_report_zones_args;
struct blk_queue_stats;
struct blk_stat_callback;
struct blk_crypto_profile;
@@ -172,6 +173,7 @@ struct gendisk {
#define GD_ADDED 4
#define GD_SUPPRESS_PART_SCAN 5
#define GD_OWNS_QUEUE 6
+#define GD_ZONE_APPEND_USED 7
struct mutex open_mutex; /* open/close mutex */
unsigned open_partitions; /* number of open partitions */
@@ -195,7 +197,7 @@ struct gendisk {
unsigned int nr_zones;
unsigned int zone_capacity;
unsigned int last_zone_capacity;
- unsigned long __rcu *conv_zones_bitmap;
+ u8 __rcu *zones_cond;
unsigned int zone_wplugs_hash_bits;
atomic_t nr_zone_wplugs;
spinlock_t zone_wplugs_lock;
@@ -378,7 +380,7 @@ struct queue_limits {
unsigned int max_sectors;
unsigned int max_user_sectors;
unsigned int max_segment_size;
- unsigned int min_segment_size;
+ unsigned int max_fast_segment_size;
unsigned int physical_block_size;
unsigned int logical_block_size;
unsigned int alignment_offset;
@@ -432,9 +434,17 @@ struct queue_limits {
typedef int (*report_zones_cb)(struct blk_zone *zone, unsigned int idx,
void *data);
+int disk_report_zone(struct gendisk *disk, struct blk_zone *zone,
+ unsigned int idx, struct blk_report_zones_args *args);
+
+int blkdev_get_zone_info(struct block_device *bdev, sector_t sector,
+ struct blk_zone *zone);
+
#define BLK_ALL_ZONES ((unsigned int)-1)
int blkdev_report_zones(struct block_device *bdev, sector_t sector,
unsigned int nr_zones, report_zones_cb cb, void *data);
+int blkdev_report_zones_cached(struct block_device *bdev, sector_t sector,
+ unsigned int nr_zones, report_zones_cb cb, void *data);
int blkdev_zone_mgmt(struct block_device *bdev, enum req_op op,
sector_t sectors, sector_t nr_sectors);
int blk_revalidate_disk_zones(struct gendisk *disk);
@@ -485,7 +495,7 @@ struct request_queue {
*/
unsigned long queue_flags;
- unsigned int rq_timeout;
+ unsigned int __data_racy rq_timeout;
unsigned int queue_depth;
@@ -493,7 +503,7 @@ struct request_queue {
/* hw dispatch queues */
unsigned int nr_hw_queues;
- struct xarray hctx_table;
+ struct blk_mq_hw_ctx * __rcu *queue_hw_ctx;
struct percpu_ref q_usage_counter;
struct lock_class_key io_lock_cls_key;
@@ -921,12 +931,20 @@ static inline unsigned int bdev_zone_capacity(struct block_device *bdev,
{
return disk_zone_capacity(bdev->bd_disk, pos);
}
+
+bool bdev_zone_is_seq(struct block_device *bdev, sector_t sector);
+
#else /* CONFIG_BLK_DEV_ZONED */
static inline unsigned int disk_nr_zones(struct gendisk *disk)
{
return 0;
}
+static inline bool bdev_zone_is_seq(struct block_device *bdev, sector_t sector)
+{
+ return false;
+}
+
static inline bool bio_needs_zone_write_plugging(struct bio *bio)
{
return false;
@@ -1504,6 +1522,12 @@ static inline sector_t bdev_zone_sectors(struct block_device *bdev)
return q->limits.chunk_sectors;
}
+static inline sector_t bdev_zone_start(struct block_device *bdev,
+ sector_t sector)
+{
+ return sector & ~(bdev_zone_sectors(bdev) - 1);
+}
+
static inline sector_t bdev_offset_from_zone_start(struct block_device *bdev,
sector_t sector)
{
@@ -1529,33 +1553,6 @@ static inline bool bdev_is_zone_aligned(struct block_device *bdev,
return bdev_is_zone_start(bdev, sector);
}
-/**
- * bdev_zone_is_seq - check if a sector belongs to a sequential write zone
- * @bdev: block device to check
- * @sector: sector number
- *
- * Check if @sector on @bdev is contained in a sequential write required zone.
- */
-static inline bool bdev_zone_is_seq(struct block_device *bdev, sector_t sector)
-{
- bool is_seq = false;
-
-#if IS_ENABLED(CONFIG_BLK_DEV_ZONED)
- if (bdev_is_zoned(bdev)) {
- struct gendisk *disk = bdev->bd_disk;
- unsigned long *bitmap;
-
- rcu_read_lock();
- bitmap = rcu_dereference(disk->conv_zones_bitmap);
- is_seq = !bitmap ||
- !test_bit(disk_zone_no(disk, sector), bitmap);
- rcu_read_unlock();
- }
-#endif
-
- return is_seq;
-}
-
int blk_zone_issue_zeroout(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask);
@@ -1662,7 +1659,8 @@ struct block_device_operations {
/* this callback is with swap_lock and sometimes page table lock held */
void (*swap_slot_free_notify) (struct block_device *, unsigned long);
int (*report_zones)(struct gendisk *, sector_t sector,
- unsigned int nr_zones, report_zones_cb cb, void *data);
+ unsigned int nr_zones,
+ struct blk_report_zones_args *args);
char *(*devnode)(struct gendisk *disk, umode_t *mode);
/* returns the length of the identifier or a negative errno: */
int (*get_unique_id)(struct gendisk *disk, u8 id[16],
diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index 122c62e561fc..05c8754456aa 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -14,11 +14,12 @@
#include <linux/sysfs.h>
struct blk_trace {
+ int version;
int trace_state;
struct rchan *rchan;
unsigned long __percpu *sequence;
unsigned char __percpu *msg_data;
- u16 act_mask;
+ u64 act_mask;
u64 start_lba;
u64 end_lba;
u32 pid;
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 84fdc3a6a19a..38f625af6ab4 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -538,12 +538,18 @@ void dm_submit_bio_remap(struct bio *clone, struct bio *tgt_clone);
#ifdef CONFIG_BLK_DEV_ZONED
struct dm_report_zones_args {
struct dm_target *tgt;
+ struct gendisk *disk;
sector_t next_sector;
- void *orig_data;
- report_zones_cb orig_cb;
unsigned int zone_idx;
+ /* for block layer ->report_zones */
+ struct blk_report_zones_args *rep_args;
+
+ /* for internal users */
+ report_zones_cb cb;
+ void *data;
+
/* must be filled by ->report_zones before calling dm_report_zones_cb */
sector_t start;
};
diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index fd743d4c4b4b..8b81ac74829c 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -370,6 +370,30 @@ __kfifo_int_must_check_helper( \
)
/**
+ * kfifo_alloc_node - dynamically allocates a new fifo buffer on a NUMA node
+ * @fifo: pointer to the fifo
+ * @size: the number of elements in the fifo, this must be a power of 2
+ * @gfp_mask: get_free_pages mask, passed to kmalloc()
+ * @node: NUMA node to allocate memory on
+ *
+ * This macro dynamically allocates a new fifo buffer with NUMA node awareness.
+ *
+ * The number of elements will be rounded-up to a power of 2.
+ * The fifo will be release with kfifo_free().
+ * Return 0 if no error, otherwise an error code.
+ */
+#define kfifo_alloc_node(fifo, size, gfp_mask, node) \
+__kfifo_int_must_check_helper( \
+({ \
+ typeof((fifo) + 1) __tmp = (fifo); \
+ struct __kfifo *__kfifo = &__tmp->kfifo; \
+ __is_kfifo_ptr(__tmp) ? \
+ __kfifo_alloc_node(__kfifo, size, sizeof(*__tmp->type), gfp_mask, node) : \
+ -EINVAL; \
+}) \
+)
+
+/**
* kfifo_free - frees the fifo
* @fifo: the fifo to be freed
*/
@@ -899,8 +923,14 @@ __kfifo_uint_must_check_helper( \
)
-extern int __kfifo_alloc(struct __kfifo *fifo, unsigned int size,
- size_t esize, gfp_t gfp_mask);
+extern int __kfifo_alloc_node(struct __kfifo *fifo, unsigned int size,
+ size_t esize, gfp_t gfp_mask, int node);
+
+static inline int __kfifo_alloc(struct __kfifo *fifo, unsigned int size,
+ size_t esize, gfp_t gfp_mask)
+{
+ return __kfifo_alloc_node(fifo, size, esize, gfp_mask, NUMA_NO_NODE);
+}
extern void __kfifo_free(struct __kfifo *fifo);
diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h
index ffb9907c7070..cc7ad189caa5 100644
--- a/include/linux/sbitmap.h
+++ b/include/linux/sbitmap.h
@@ -75,7 +75,7 @@ struct sbitmap {
*/
struct sbitmap_word *map;
- /*
+ /**
* @alloc_hint: Cache of last successfully allocated or freed bit.
*
* This is per-cpu, which allows multiple users to stick to different
@@ -128,7 +128,7 @@ struct sbitmap_queue {
*/
struct sbq_wait_state *ws;
- /*
+ /**
* @ws_active: count of currently active ws waitqueues
*/
atomic_t ws_active;
@@ -547,6 +547,8 @@ static inline void sbq_index_atomic_inc(atomic_t *index)
* sbitmap_queue.
* @sbq: Bitmap queue to wait on.
* @wait_index: A counter per "user" of @sbq.
+ *
+ * Return: Next wait queue to be used
*/
static inline struct sbq_wait_state *sbq_wait_ptr(struct sbitmap_queue *sbq,
atomic_t *wait_index)
diff --git a/include/uapi/linux/blktrace_api.h b/include/uapi/linux/blktrace_api.h
index 1bfb635e309b..7c092d9f3aa4 100644
--- a/include/uapi/linux/blktrace_api.h
+++ b/include/uapi/linux/blktrace_api.h
@@ -26,11 +26,22 @@ enum blktrace_cat {
BLK_TC_DRV_DATA = 1 << 14, /* binary per-driver data */
BLK_TC_FUA = 1 << 15, /* fua requests */
- BLK_TC_END = 1 << 15, /* we've run out of bits! */
+ BLK_TC_END_V1 = 1 << 15, /* we've run out of bits! */
+
+ BLK_TC_ZONE_APPEND = 1ull << 16, /* zone append */
+ BLK_TC_ZONE_RESET = 1ull << 17, /* zone reset */
+ BLK_TC_ZONE_RESET_ALL = 1ull << 18, /* zone reset all */
+ BLK_TC_ZONE_FINISH = 1ull << 19, /* zone finish */
+ BLK_TC_ZONE_OPEN = 1ull << 20, /* zone open */
+ BLK_TC_ZONE_CLOSE = 1ull << 21, /* zone close */
+
+ BLK_TC_WRITE_ZEROES = 1ull << 22, /* write-zeroes */
+
+ BLK_TC_END_V2 = 1ull << 22,
};
#define BLK_TC_SHIFT (16)
-#define BLK_TC_ACT(act) ((act) << BLK_TC_SHIFT)
+#define BLK_TC_ACT(act) ((u64)(act) << BLK_TC_SHIFT)
/*
* Basic trace actions
@@ -53,6 +64,8 @@ enum blktrace_act {
__BLK_TA_REMAP, /* bio was remapped */
__BLK_TA_ABORT, /* request aborted */
__BLK_TA_DRV_DATA, /* driver-specific binary data */
+ __BLK_TA_ZONE_PLUG, /* zone write plug was plugged */
+ __BLK_TA_ZONE_UNPLUG, /* zone write plug was unplugged */
__BLK_TA_CGROUP = 1 << 8, /* from a cgroup*/
};
@@ -88,12 +101,19 @@ enum blktrace_notify {
#define BLK_TA_ABORT (__BLK_TA_ABORT | BLK_TC_ACT(BLK_TC_QUEUE))
#define BLK_TA_DRV_DATA (__BLK_TA_DRV_DATA | BLK_TC_ACT(BLK_TC_DRV_DATA))
+#define BLK_TA_ZONE_APPEND (__BLK_TA_COMPLETE |\
+ BLK_TC_ACT(BLK_TC_ZONE_APPEND))
+#define BLK_TA_ZONE_PLUG (__BLK_TA_ZONE_PLUG | BLK_TC_ACT(BLK_TC_QUEUE))
+#define BLK_TA_ZONE_UNPLUG (__BLK_TA_ZONE_UNPLUG |\
+ BLK_TC_ACT(BLK_TC_QUEUE))
+
#define BLK_TN_PROCESS (__BLK_TN_PROCESS | BLK_TC_ACT(BLK_TC_NOTIFY))
#define BLK_TN_TIMESTAMP (__BLK_TN_TIMESTAMP | BLK_TC_ACT(BLK_TC_NOTIFY))
#define BLK_TN_MESSAGE (__BLK_TN_MESSAGE | BLK_TC_ACT(BLK_TC_NOTIFY))
#define BLK_IO_TRACE_MAGIC 0x65617400
#define BLK_IO_TRACE_VERSION 0x07
+#define BLK_IO_TRACE2_VERSION 0x08
/*
* The trace itself
@@ -113,6 +133,21 @@ struct blk_io_trace {
/* cgroup id will be stored here if exists */
};
+struct blk_io_trace2 {
+ __u32 magic; /* MAGIC << 8 | BLK_IO_TRACE2_VERSION */
+ __u32 sequence; /* event number */
+ __u64 time; /* in nanoseconds */
+ __u64 sector; /* disk offset */
+ __u32 bytes; /* transfer length */
+ __u32 pid; /* who did it */
+ __u64 action; /* what happened */
+ __u32 device; /* device number */
+ __u32 cpu; /* on what cpu did it happen */
+ __u16 error; /* completion error */
+ __u16 pdu_len; /* length of data after this trace */
+ __u8 pad[12];
+ /* cgroup id will be stored here if it exists */
+};
/*
* The remap event
*/
@@ -129,6 +164,7 @@ enum {
};
#define BLKTRACE_BDEV_SIZE 32
+#define BLKTRACE_BDEV_SIZE2 64
/*
* User setup structure passed with BLKTRACESETUP
@@ -143,4 +179,19 @@ struct blk_user_trace_setup {
__u32 pid;
};
+/*
+ * User setup structure passed with BLKTRACESETUP2
+ */
+struct blk_user_trace_setup2 {
+ char name[BLKTRACE_BDEV_SIZE2]; /* output */
+ __u64 act_mask; /* input */
+ __u32 buf_size; /* input */
+ __u32 buf_nr; /* input */
+ __u64 start_lba;
+ __u64 end_lba;
+ __u32 pid;
+ __u32 flags; /* currently unused */
+ __u64 reserved[11];
+};
+
#endif /* _UAPIBLKTRACE_H */
diff --git a/include/uapi/linux/blkzoned.h b/include/uapi/linux/blkzoned.h
index f85743ef6e7d..e33f02703350 100644
--- a/include/uapi/linux/blkzoned.h
+++ b/include/uapi/linux/blkzoned.h
@@ -48,6 +48,8 @@ enum blk_zone_type {
* FINISH ZONE command.
* @BLK_ZONE_COND_READONLY: The zone is read-only.
* @BLK_ZONE_COND_OFFLINE: The zone is offline (sectors cannot be read/written).
+ * @BLK_ZONE_COND_ACTIVE: The zone is either implicitly open, explicitly open,
+ * or closed.
*
* The Zone Condition state machine in the ZBC/ZAC standards maps the above
* deinitions as:
@@ -61,6 +63,13 @@ enum blk_zone_type {
*
* Conditions 0x5 to 0xC are reserved by the current ZBC/ZAC spec and should
* be considered invalid.
+ *
+ * The condition BLK_ZONE_COND_ACTIVE is used only with cached zone reports.
+ * It is used to report any of the BLK_ZONE_COND_IMP_OPEN,
+ * BLK_ZONE_COND_EXP_OPEN and BLK_ZONE_COND_CLOSED conditions. Conversely, a
+ * regular zone report will never report a zone condition using
+ * BLK_ZONE_COND_ACTIVE and instead use the conditions BLK_ZONE_COND_IMP_OPEN,
+ * BLK_ZONE_COND_EXP_OPEN or BLK_ZONE_COND_CLOSED as reported by the device.
*/
enum blk_zone_cond {
BLK_ZONE_COND_NOT_WP = 0x0,
@@ -71,15 +80,27 @@ enum blk_zone_cond {
BLK_ZONE_COND_READONLY = 0xD,
BLK_ZONE_COND_FULL = 0xE,
BLK_ZONE_COND_OFFLINE = 0xF,
+
+ BLK_ZONE_COND_ACTIVE = 0xFF,
};
/**
* enum blk_zone_report_flags - Feature flags of reported zone descriptors.
*
- * @BLK_ZONE_REP_CAPACITY: Zone descriptor has capacity field.
+ * @BLK_ZONE_REP_CAPACITY: Output only. Indicates that zone descriptors in a
+ * zone report have a valid capacity field.
+ * @BLK_ZONE_REP_CACHED: Input only. Indicates that the zone report should be
+ * generated using cached zone information. In this case,
+ * the implicit open, explicit open and closed zone
+ * conditions are all reported with the
+ * BLK_ZONE_COND_ACTIVE condition.
*/
enum blk_zone_report_flags {
- BLK_ZONE_REP_CAPACITY = (1 << 0),
+ /* Output flags */
+ BLK_ZONE_REP_CAPACITY = (1U << 0),
+
+ /* Input flags */
+ BLK_ZONE_REP_CACHED = (1U << 31),
};
/**
@@ -122,6 +143,10 @@ struct blk_zone {
* @sector: starting sector of report
* @nr_zones: IN maximum / OUT actual
* @flags: one or more flags as defined by enum blk_zone_report_flags.
+ * @flags: one or more flags as defined by enum blk_zone_report_flags.
+ * With BLKREPORTZONE, this field is ignored as an input and is valid
+ * only as an output. Using BLKREPORTZONEV2, this field is used as both
+ * input and output.
* @zones: Space to hold @nr_zones @zones entries on reply.
*
* The array of at most @nr_zones must follow this structure in memory.
@@ -148,9 +173,19 @@ struct blk_zone_range {
/**
* Zoned block device ioctl's:
*
- * @BLKREPORTZONE: Get zone information. Takes a zone report as argument.
- * The zone report will start from the zone containing the
- * sector specified in the report request structure.
+ * @BLKREPORTZONE: Get zone information from a zoned device. Takes a zone report
+ * as argument. The zone report will start from the zone
+ * containing the sector specified in struct blk_zone_report.
+ * The flags field of struct blk_zone_report is used as an
+ * output only and ignored as an input.
+ * DEPRECATED, use BLKREPORTZONEV2 instead.
+ * @BLKREPORTZONEV2: Same as @BLKREPORTZONE but uses the flags field of
+ * struct blk_zone_report as an input, allowing to get a zone
+ * report using cached zone information if the flag
+ * BLK_ZONE_REP_CACHED is set. In such case, the zone report
+ * may include zones with the condition @BLK_ZONE_COND_ACTIVE
+ * (c.f. the description of this condition above for more
+ * details).
* @BLKRESETZONE: Reset the write pointer of the zones in the specified
* sector range. The sector range must be zone aligned.
* @BLKGETZONESZ: Get the device zone size in number of 512 B sectors.
@@ -169,5 +204,6 @@ struct blk_zone_range {
#define BLKOPENZONE _IOW(0x12, 134, struct blk_zone_range)
#define BLKCLOSEZONE _IOW(0x12, 135, struct blk_zone_range)
#define BLKFINISHZONE _IOW(0x12, 136, struct blk_zone_range)
+#define BLKREPORTZONEV2 _IOWR(0x12, 142, struct blk_zone_report)
#endif /* _UAPI_BLKZONED_H */
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index beb4c2d1e41c..66ca526cf786 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -298,8 +298,9 @@ struct file_attr {
#define BLKROTATIONAL _IO(0x12,126)
#define BLKZEROOUT _IO(0x12,127)
#define BLKGETDISKSEQ _IOR(0x12,128,__u64)
-/* 130-136 are used by zoned block device ioctls (uapi/linux/blkzoned.h) */
+/* 130-136 and 142 are used by zoned block device ioctls (uapi/linux/blkzoned.h) */
/* 137-141 are used by blk-crypto ioctls (uapi/linux/blk-crypto.h) */
+#define BLKTRACESETUP2 _IOWR(0x12, 142, struct blk_user_trace_setup2)
#define BMAP_IOCTL 1 /* obsolete - kept for compatibility */
#define FIBMAP _IO(0x00,1) /* bmap access */
diff --git a/include/uapi/linux/raid/md_p.h b/include/uapi/linux/raid/md_p.h
index ac74133a4768..310068bb2a1d 100644
--- a/include/uapi/linux/raid/md_p.h
+++ b/include/uapi/linux/raid/md_p.h
@@ -291,7 +291,8 @@ struct mdp_superblock_1 {
__le64 resync_offset; /* data before this offset (from data_offset) known to be in sync */
__le32 sb_csum; /* checksum up to devs[max_dev] */
__le32 max_dev; /* size of devs[] array to consider */
- __u8 pad3[64-32]; /* set to 0 when writing */
+ __le32 logical_block_size; /* same as q->limits->logical_block_size */
+ __u8 pad3[64-36]; /* set to 0 when writing */
/* device state information. Indexed by dev_number.
* 2 bytes per device