diff options
| author | Thomas Zimmermann <tzimmermann@suse.de> | 2021-12-17 11:33:33 +0100 |
|---|---|---|
| committer | Thomas Zimmermann <tzimmermann@suse.de> | 2021-12-17 11:33:33 +0100 |
| commit | 1758047057dbe329be712a31b79db7151b5871f8 (patch) | |
| tree | 00203eb55328f2feda70b3d37c964287b364796f /drivers/nvme/host/core.c | |
| parent | bcae3af286f49bf4f6cda03f165fbe530f4a6bed (diff) | |
| parent | 1c405ca11bf563de1725e5ecfb4a74ee289d2ee9 (diff) | |
Merge drm/drm-next into drm-misc-next-fixes
Backmerging to bring drm-misc-next-fixes up to the latest state for
the current release cycle.
Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Diffstat (limited to 'drivers/nvme/host/core.c')
| -rw-r--r-- | drivers/nvme/host/core.c | 271 |
1 files changed, 197 insertions, 74 deletions
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 7efb31b87f37..1af8a4513708 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -6,6 +6,7 @@ #include <linux/blkdev.h> #include <linux/blk-mq.h> +#include <linux/blk-integrity.h> #include <linux/compat.h> #include <linux/delay.h> #include <linux/errno.h> @@ -13,7 +14,6 @@ #include <linux/kernel.h> #include <linux/module.h> #include <linux/backing-dev.h> -#include <linux/list_sort.h> #include <linux/slab.h> #include <linux/types.h> #include <linux/pr.h> @@ -119,25 +119,6 @@ static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl, static void nvme_update_keep_alive(struct nvme_ctrl *ctrl, struct nvme_command *cmd); -/* - * Prepare a queue for teardown. - * - * This must forcibly unquiesce queues to avoid blocking dispatch, and only set - * the capacity to 0 after that to avoid blocking dispatchers that may be - * holding bd_butex. This will end buffered writers dirtying pages that can't - * be synced. - */ -static void nvme_set_queue_dying(struct nvme_ns *ns) -{ - if (test_and_set_bit(NVME_NS_DEAD, &ns->flags)) - return; - - blk_set_queue_dying(ns->queue); - blk_mq_unquiesce_queue(ns->queue); - - set_capacity_and_notify(ns->disk, 0); -} - void nvme_queue_scan(struct nvme_ctrl *ctrl) { /* @@ -222,7 +203,7 @@ int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl) static void nvme_do_delete_ctrl(struct nvme_ctrl *ctrl) { dev_info(ctrl->device, - "Removing ctrl: NQN \"%s\"\n", ctrl->opts->subsysnqn); + "Removing ctrl: NQN \"%s\"\n", nvmf_ctrl_subsysnqn(ctrl)); flush_work(&ctrl->reset_work); nvme_stop_ctrl(ctrl); @@ -346,15 +327,19 @@ static inline enum nvme_disposition nvme_decide_disposition(struct request *req) return RETRY; } -static inline void nvme_end_req(struct request *req) +static inline void nvme_end_req_zoned(struct request *req) { - blk_status_t status = nvme_error_status(nvme_req(req)->status); - if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && req_op(req) == REQ_OP_ZONE_APPEND) req->__sector = nvme_lba_to_sect(req->q->queuedata, le64_to_cpu(nvme_req(req)->result.u64)); +} +static inline void nvme_end_req(struct request *req) +{ + blk_status_t status = nvme_error_status(nvme_req(req)->status); + + nvme_end_req_zoned(req); nvme_trace_bio_complete(req); blk_mq_end_request(req, status); } @@ -381,6 +366,13 @@ void nvme_complete_rq(struct request *req) } EXPORT_SYMBOL_GPL(nvme_complete_rq); +void nvme_complete_batch_req(struct request *req) +{ + nvme_cleanup_cmd(req); + nvme_end_req_zoned(req); +} +EXPORT_SYMBOL_GPL(nvme_complete_batch_req); + /* * Called to unwind from ->queue_rq on a failed command submission so that the * multipathing code gets called to potentially failover to another path. @@ -632,7 +624,7 @@ static inline void nvme_init_request(struct request *req, req->cmd_flags |= REQ_FAILFAST_DRIVER; if (req->mq_hctx->type == HCTX_TYPE_POLL) - req->cmd_flags |= REQ_HIPRI; + req->cmd_flags |= REQ_POLLED; nvme_clear_nvme_request(req); memcpy(nvme_req(req)->cmd, cmd, sizeof(*cmd)); } @@ -674,6 +666,7 @@ blk_status_t nvme_fail_nonready_command(struct nvme_ctrl *ctrl, struct request *rq) { if (ctrl->state != NVME_CTRL_DELETING_NOIO && + ctrl->state != NVME_CTRL_DELETING && ctrl->state != NVME_CTRL_DEAD && !test_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags) && !blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH)) @@ -823,6 +816,7 @@ static void nvme_assign_write_stream(struct nvme_ctrl *ctrl, static inline void nvme_setup_flush(struct nvme_ns *ns, struct nvme_command *cmnd) { + memset(cmnd, 0, sizeof(*cmnd)); cmnd->common.opcode = nvme_cmd_flush; cmnd->common.nsid = cpu_to_le32(ns->head->ns_id); } @@ -874,6 +868,7 @@ static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req, return BLK_STS_IOERR; } + memset(cmnd, 0, sizeof(*cmnd)); cmnd->dsm.opcode = nvme_cmd_dsm; cmnd->dsm.nsid = cpu_to_le32(ns->head->ns_id); cmnd->dsm.nr = cpu_to_le32(segments - 1); @@ -890,6 +885,8 @@ static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req, static inline blk_status_t nvme_setup_write_zeroes(struct nvme_ns *ns, struct request *req, struct nvme_command *cmnd) { + memset(cmnd, 0, sizeof(*cmnd)); + if (ns->ctrl->quirks & NVME_QUIRK_DEALLOCATE_ZEROES) return nvme_setup_discard(ns, req, cmnd); @@ -899,10 +896,19 @@ static inline blk_status_t nvme_setup_write_zeroes(struct nvme_ns *ns, cpu_to_le64(nvme_sect_to_lba(ns, blk_rq_pos(req))); cmnd->write_zeroes.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1); - if (nvme_ns_has_pi(ns)) + + if (nvme_ns_has_pi(ns)) { cmnd->write_zeroes.control = cpu_to_le16(NVME_RW_PRINFO_PRACT); - else - cmnd->write_zeroes.control = 0; + + switch (ns->pi_type) { + case NVME_NS_DPS_PI_TYPE1: + case NVME_NS_DPS_PI_TYPE2: + cmnd->write_zeroes.reftag = + cpu_to_le32(t10_pi_ref_tag(req)); + break; + } + } + return BLK_STS_OK; } @@ -923,9 +929,15 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns, dsmgmt |= NVME_RW_DSM_FREQ_PREFETCH; cmnd->rw.opcode = op; + cmnd->rw.flags = 0; cmnd->rw.nsid = cpu_to_le32(ns->head->ns_id); + cmnd->rw.rsvd2 = 0; + cmnd->rw.metadata = 0; cmnd->rw.slba = cpu_to_le64(nvme_sect_to_lba(ns, blk_rq_pos(req))); cmnd->rw.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1); + cmnd->rw.reftag = 0; + cmnd->rw.apptag = 0; + cmnd->rw.appmask = 0; if (req_op(req) == REQ_OP_WRITE && ctrl->nr_streams) nvme_assign_write_stream(ctrl, req, &control, &dsmgmt); @@ -979,12 +991,11 @@ EXPORT_SYMBOL_GPL(nvme_cleanup_cmd); blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req) { struct nvme_command *cmd = nvme_req(req)->cmd; + struct nvme_ctrl *ctrl = nvme_req(req)->ctrl; blk_status_t ret = BLK_STS_OK; - if (!(req->rq_flags & RQF_DONTPREP)) { + if (!(req->rq_flags & RQF_DONTPREP)) nvme_clear_nvme_request(req); - memset(cmd, 0, sizeof(*cmd)); - } switch (req_op(req)) { case REQ_OP_DRV_IN: @@ -1027,7 +1038,8 @@ blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req) return BLK_STS_IOERR; } - nvme_req(req)->genctr++; + if (!(ctrl->quirks & NVME_QUIRK_SKIP_CID_GEN)) + nvme_req(req)->genctr++; cmd->common.command_id = nvme_cid(req); trace_nvme_setup_cmd(req, cmd); return ret; @@ -1738,9 +1750,20 @@ static int nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id) */ if (WARN_ON_ONCE(!(id->flbas & NVME_NS_FLBAS_META_EXT))) return -EINVAL; - if (ctrl->max_integrity_segments) - ns->features |= - (NVME_NS_METADATA_SUPPORTED | NVME_NS_EXT_LBAS); + + ns->features |= NVME_NS_EXT_LBAS; + + /* + * The current fabrics transport drivers support namespace + * metadata formats only if nvme_ns_has_pi() returns true. + * Suppress support for all other formats so the namespace will + * have a 0 capacity and not be usable through the block stack. + * + * Note, this check will need to be modified if any drivers + * gain the ability to use other metadata formats. + */ + if (ctrl->max_integrity_segments && nvme_ns_has_pi(ns)) + ns->features |= NVME_NS_METADATA_SUPPORTED; } else { /* * For PCIe controllers, we can't easily remap the separate @@ -2467,6 +2490,20 @@ static const struct nvme_core_quirk_entry core_quirks[] = { .vid = 0x14a4, .fr = "22301111", .quirks = NVME_QUIRK_SIMPLE_SUSPEND, + }, + { + /* + * This Kioxia CD6-V Series / HPE PE8030 device times out and + * aborts I/O during any load, but more easily reproducible + * with discards (fstrim). + * + * The device is left in a state where it is also not possible + * to use "nvme set-feature" to disable APST, but booting with + * nvme_core.default_ps_max_latency=0 works. + */ + .vid = 0x1e0f, + .mn = "KCD6XVUL6T40", + .quirks = NVME_QUIRK_NO_APST, } }; @@ -2599,6 +2636,24 @@ static ssize_t nvme_subsys_show_nqn(struct device *dev, } static SUBSYS_ATTR_RO(subsysnqn, S_IRUGO, nvme_subsys_show_nqn); +static ssize_t nvme_subsys_show_type(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct nvme_subsystem *subsys = + container_of(dev, struct nvme_subsystem, dev); + + switch (subsys->subtype) { + case NVME_NQN_DISC: + return sysfs_emit(buf, "discovery\n"); + case NVME_NQN_NVME: + return sysfs_emit(buf, "nvm\n"); + default: + return sysfs_emit(buf, "reserved\n"); + } +} +static SUBSYS_ATTR_RO(subsystype, S_IRUGO, nvme_subsys_show_type); + #define nvme_subsys_show_str_function(field) \ static ssize_t subsys_##field##_show(struct device *dev, \ struct device_attribute *attr, char *buf) \ @@ -2619,6 +2674,7 @@ static struct attribute *nvme_subsys_attrs[] = { &subsys_attr_serial.attr, &subsys_attr_firmware_rev.attr, &subsys_attr_subsysnqn.attr, + &subsys_attr_subsystype.attr, #ifdef CONFIG_NVME_MULTIPATH &subsys_attr_iopolicy.attr, #endif @@ -2652,8 +2708,9 @@ static bool nvme_validate_cntlid(struct nvme_subsystem *subsys, if (tmp->cntlid == ctrl->cntlid) { dev_err(ctrl->device, - "Duplicate cntlid %u with %s, rejecting\n", - ctrl->cntlid, dev_name(tmp->device)); + "Duplicate cntlid %u with %s, subsys %s, rejecting\n", + ctrl->cntlid, dev_name(tmp->device), + subsys->subnqn); return false; } @@ -2689,6 +2746,21 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) memcpy(subsys->firmware_rev, id->fr, sizeof(subsys->firmware_rev)); subsys->vendor_id = le16_to_cpu(id->vid); subsys->cmic = id->cmic; + + /* Versions prior to 1.4 don't necessarily report a valid type */ + if (id->cntrltype == NVME_CTRL_DISC || + !strcmp(subsys->subnqn, NVME_DISC_SUBSYS_NAME)) + subsys->subtype = NVME_NQN_DISC; + else + subsys->subtype = NVME_NQN_NVME; + + if (nvme_discovery_ctrl(ctrl) && subsys->subtype != NVME_NQN_DISC) { + dev_err(ctrl->device, + "Subsystem %s is not a discovery controller", + subsys->subnqn); + kfree(subsys); + return -EINVAL; + } subsys->awupf = le16_to_cpu(id->awupf); #ifdef CONFIG_NVME_MULTIPATH subsys->iopolicy = NVME_IOPOLICY_NUMA; @@ -3524,7 +3596,9 @@ static struct nvme_ns_head *nvme_find_ns_head(struct nvme_subsystem *subsys, lockdep_assert_held(&subsys->lock); list_for_each_entry(h, &subsys->nsheads, entry) { - if (h->ns_id == nsid && nvme_tryget_ns_head(h)) + if (h->ns_id != nsid) + continue; + if (!list_empty(&h->list) && nvme_tryget_ns_head(h)) return h; } @@ -3547,10 +3621,15 @@ static int __nvme_check_ids(struct nvme_subsystem *subsys, return 0; } +static void nvme_cdev_rel(struct device *dev) +{ + ida_simple_remove(&nvme_ns_chr_minor_ida, MINOR(dev->devt)); +} + void nvme_cdev_del(struct cdev *cdev, struct device *cdev_device) { cdev_device_del(cdev, cdev_device); - ida_simple_remove(&nvme_ns_chr_minor_ida, MINOR(cdev_device->devt)); + put_device(cdev_device); } int nvme_cdev_add(struct cdev *cdev, struct device *cdev_device, @@ -3563,14 +3642,14 @@ int nvme_cdev_add(struct cdev *cdev, struct device *cdev_device, return minor; cdev_device->devt = MKDEV(MAJOR(nvme_ns_chr_devt), minor); cdev_device->class = nvme_ns_chr_class; + cdev_device->release = nvme_cdev_rel; device_initialize(cdev_device); cdev_init(cdev, fops); cdev->owner = owner; ret = cdev_device_add(cdev, cdev_device); - if (ret) { + if (ret) put_device(cdev_device); - ida_simple_remove(&nvme_ns_chr_minor_ida, minor); - } + return ret; } @@ -3602,11 +3681,9 @@ static int nvme_add_ns_cdev(struct nvme_ns *ns) ns->ctrl->instance, ns->head->instance); if (ret) return ret; - ret = nvme_cdev_add(&ns->cdev, &ns->cdev_device, &nvme_ns_chr_fops, - ns->ctrl->ops->module); - if (ret) - kfree_const(ns->cdev_device.kobj.name); - return ret; + + return nvme_cdev_add(&ns->cdev, &ns->cdev_device, &nvme_ns_chr_fops, + ns->ctrl->ops->module); } static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl, @@ -3714,15 +3791,6 @@ out_unlock: return ret; } -static int ns_cmp(void *priv, const struct list_head *a, - const struct list_head *b) -{ - struct nvme_ns *nsa = container_of(a, struct nvme_ns, list); - struct nvme_ns *nsb = container_of(b, struct nvme_ns, list); - - return nsa->head->ns_id - nsb->head->ns_id; -} - struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid) { struct nvme_ns *ns, *ret = NULL; @@ -3743,6 +3811,22 @@ struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid) } EXPORT_SYMBOL_NS_GPL(nvme_find_get_ns, NVME_TARGET_PASSTHRU); +/* + * Add the namespace to the controller list while keeping the list ordered. + */ +static void nvme_ns_add_to_ctrl_list(struct nvme_ns *ns) +{ + struct nvme_ns *tmp; + + list_for_each_entry_reverse(tmp, &ns->ctrl->namespaces, list) { + if (tmp->head->ns_id < ns->head->ns_id) { + list_add(&ns->list, &tmp->list); + return; + } + } + list_add(&ns->list, &ns->ctrl->namespaces); +} + static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid, struct nvme_ns_ids *ids) { @@ -3793,9 +3877,8 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid, goto out_unlink_ns; down_write(&ctrl->namespaces_rwsem); - list_add_tail(&ns->list, &ctrl->namespaces); + nvme_ns_add_to_ctrl_list(ns); up_write(&ctrl->namespaces_rwsem); - nvme_get_ctrl(ctrl); if (device_add_disk(ctrl->device, ns->disk, nvme_ns_id_attr_groups)) @@ -3843,6 +3926,10 @@ static void nvme_ns_remove(struct nvme_ns *ns) mutex_lock(&ns->ctrl->subsys->lock); list_del_rcu(&ns->siblings); + if (list_empty(&ns->head->list)) { + list_del_init(&ns->head->entry); + last_path = true; + } mutex_unlock(&ns->ctrl->subsys->lock); /* guarantee not available in head->list */ @@ -3856,20 +3943,11 @@ static void nvme_ns_remove(struct nvme_ns *ns) nvme_cdev_del(&ns->cdev, &ns->cdev_device); del_gendisk(ns->disk); blk_cleanup_queue(ns->queue); - if (blk_get_integrity(ns->disk)) - blk_integrity_unregister(ns->disk); down_write(&ns->ctrl->namespaces_rwsem); list_del_init(&ns->list); up_write(&ns->ctrl->namespaces_rwsem); - /* Synchronize with nvme_init_ns_head() */ - mutex_lock(&ns->head->subsys->lock); - if (list_empty(&ns->head->list)) { - list_del_init(&ns->head->entry); - last_path = true; - } - mutex_unlock(&ns->head->subsys->lock); if (last_path) nvme_mpath_shutdown_disk(ns->head); nvme_put_ns(ns); @@ -4083,10 +4161,6 @@ static void nvme_scan_work(struct work_struct *work) if (nvme_scan_ns_list(ctrl) != 0) nvme_scan_ns_sequential(ctrl); mutex_unlock(&ctrl->scan_lock); - - down_write(&ctrl->namespaces_rwsem); - list_sort(NULL, &ctrl->namespaces, ns_cmp); - up_write(&ctrl->namespaces_rwsem); } /* @@ -4470,6 +4544,39 @@ out: } EXPORT_SYMBOL_GPL(nvme_init_ctrl); +static void nvme_start_ns_queue(struct nvme_ns *ns) +{ + if (test_and_clear_bit(NVME_NS_STOPPED, &ns->flags)) + blk_mq_unquiesce_queue(ns->queue); +} + +static void nvme_stop_ns_queue(struct nvme_ns *ns) +{ + if (!test_and_set_bit(NVME_NS_STOPPED, &ns->flags)) + blk_mq_quiesce_queue(ns->queue); + else + blk_mq_wait_quiesce_done(ns->queue); +} + +/* + * Prepare a queue for teardown. + * + * This must forcibly unquiesce queues to avoid blocking dispatch, and only set + * the capacity to 0 after that to avoid blocking dispatchers that may be + * holding bd_butex. This will end buffered writers dirtying pages that can't + * be synced. + */ +static void nvme_set_queue_dying(struct nvme_ns *ns) +{ + if (test_and_set_bit(NVME_NS_DEAD, &ns->flags)) + return; + + blk_set_queue_dying(ns->queue); + nvme_start_ns_queue(ns); + + set_capacity_and_notify(ns->disk, 0); +} + /** * nvme_kill_queues(): Ends all namespace queues * @ctrl: the dead controller that needs to end @@ -4485,7 +4592,7 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl) /* Forcibly unquiesce queues to avoid blocking dispatch */ if (ctrl->admin_q && !blk_queue_dying(ctrl->admin_q)) - blk_mq_unquiesce_queue(ctrl->admin_q); + nvme_start_admin_queue(ctrl); list_for_each_entry(ns, &ctrl->namespaces, list) nvme_set_queue_dying(ns); @@ -4548,7 +4655,7 @@ void nvme_stop_queues(struct nvme_ctrl *ctrl) down_read(&ctrl->namespaces_rwsem); list_for_each_entry(ns, &ctrl->namespaces, list) - blk_mq_quiesce_queue(ns->queue); + nvme_stop_ns_queue(ns); up_read(&ctrl->namespaces_rwsem); } EXPORT_SYMBOL_GPL(nvme_stop_queues); @@ -4559,11 +4666,27 @@ void nvme_start_queues(struct nvme_ctrl *ctrl) down_read(&ctrl->namespaces_rwsem); list_for_each_entry(ns, &ctrl->namespaces, list) - blk_mq_unquiesce_queue(ns->queue); + nvme_start_ns_queue(ns); up_read(&ctrl->namespaces_rwsem); } EXPORT_SYMBOL_GPL(nvme_start_queues); +void nvme_stop_admin_queue(struct nvme_ctrl *ctrl) +{ + if (!test_and_set_bit(NVME_CTRL_ADMIN_Q_STOPPED, &ctrl->flags)) + blk_mq_quiesce_queue(ctrl->admin_q); + else + blk_mq_wait_quiesce_done(ctrl->admin_q); +} +EXPORT_SYMBOL_GPL(nvme_stop_admin_queue); + +void nvme_start_admin_queue(struct nvme_ctrl *ctrl) +{ + if (test_and_clear_bit(NVME_CTRL_ADMIN_Q_STOPPED, &ctrl->flags)) + blk_mq_unquiesce_queue(ctrl->admin_q); +} +EXPORT_SYMBOL_GPL(nvme_start_admin_queue); + void nvme_sync_io_queues(struct nvme_ctrl *ctrl) { struct nvme_ns *ns; |
