From b9a1920837bc53430d339380e393a6e4c372939f Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 14 Dec 2011 00:33:39 +0100 Subject: block, cfq: remove delayed unlink Now that all cic's are immediately unlinked from both ioc and queue, lazy dropping from lookup path and trimming on elevator unregister are unnecessary. Kill them and remove now unused elevator_ops->trim(). This also leaves call_for_each_cic() without any user. Removed. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/elevator.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux/elevator.h') diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 1d0f7a2ff73b..581dd1bd3d3e 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -63,7 +63,6 @@ struct elevator_ops elevator_init_fn *elevator_init_fn; elevator_exit_fn *elevator_exit_fn; - void (*trim)(struct io_context *); }; #define ELV_NAME_MAX (16) -- cgit v1.2.3 From b50b636bce6293fa858cc7ff6c3ffe4920d90006 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 14 Dec 2011 00:33:39 +0100 Subject: block, cfq: kill ioc_gone Now that cic's are immediately unlinked under both locks, there's no need to count and drain cic's before module unload. RCU callback completion is waited with rcu_barrier(). While at it, remove residual RCU operations on cic_list. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- block/cfq-iosched.c | 43 +++++-------------------------------------- include/linux/elevator.h | 17 ----------------- 2 files changed, 5 insertions(+), 55 deletions(-) (limited to 'include/linux/elevator.h') diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index ff44435fad50..ae7791a8ded9 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -62,10 +62,6 @@ static const int cfq_hist_divisor = 4; static struct kmem_cache *cfq_pool; static struct kmem_cache *cfq_ioc_pool; -static DEFINE_PER_CPU(unsigned long, cfq_ioc_count); -static struct completion *ioc_gone; -static DEFINE_SPINLOCK(ioc_gone_lock); - #define CFQ_PRIO_LISTS IOPRIO_BE_NR #define cfq_class_idle(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE) #define cfq_class_rt(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_RT) @@ -2671,26 +2667,8 @@ static void cfq_put_queue(struct cfq_queue *cfqq) static void cfq_cic_free_rcu(struct rcu_head *head) { - struct cfq_io_context *cic; - - cic = container_of(head, struct cfq_io_context, rcu_head); - - kmem_cache_free(cfq_ioc_pool, cic); - elv_ioc_count_dec(cfq_ioc_count); - - if (ioc_gone) { - /* - * CFQ scheduler is exiting, grab exit lock and check - * the pending io context count. If it hits zero, - * complete ioc_gone and set it back to NULL - */ - spin_lock(&ioc_gone_lock); - if (ioc_gone && !elv_ioc_count_read(cfq_ioc_count)) { - complete(ioc_gone); - ioc_gone = NULL; - } - spin_unlock(&ioc_gone_lock); - } + kmem_cache_free(cfq_ioc_pool, + container_of(head, struct cfq_io_context, rcu_head)); } static void cfq_cic_free(struct cfq_io_context *cic) @@ -2705,7 +2683,7 @@ static void cfq_release_cic(struct cfq_io_context *cic) BUG_ON(!(dead_key & CIC_DEAD_KEY)); radix_tree_delete(&ioc->radix_root, dead_key >> CIC_DEAD_INDEX_SHIFT); - hlist_del_rcu(&cic->cic_list); + hlist_del(&cic->cic_list); cfq_cic_free(cic); } @@ -2782,7 +2760,6 @@ cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t gfp_mask) INIT_HLIST_NODE(&cic->cic_list); cic->exit = cfq_exit_cic; cic->release = cfq_release_cic; - elv_ioc_count_inc(cfq_ioc_count); } return cic; @@ -3072,7 +3049,7 @@ static int cfq_create_cic(struct cfq_data *cfqd, gfp_t gfp_mask) ret = radix_tree_insert(&ioc->radix_root, q->id, cic); if (likely(!ret)) { - hlist_add_head_rcu(&cic->cic_list, &ioc->cic_list); + hlist_add_head(&cic->cic_list, &ioc->cic_list); list_add(&cic->queue_list, &cfqd->cic_list); cic = NULL; } else if (ret == -EEXIST) { @@ -4156,19 +4133,9 @@ static int __init cfq_init(void) static void __exit cfq_exit(void) { - DECLARE_COMPLETION_ONSTACK(all_gone); blkio_policy_unregister(&blkio_policy_cfq); elv_unregister(&iosched_cfq); - ioc_gone = &all_gone; - /* ioc_gone's update must be visible before reading ioc_count */ - smp_wmb(); - - /* - * this also protects us from entering cfq_slab_kill() with - * pending RCU callbacks - */ - if (elv_ioc_count_read(cfq_ioc_count)) - wait_for_completion(&all_gone); + rcu_barrier(); /* make sure all cic RCU frees are complete */ cfq_slab_kill(); } diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 581dd1bd3d3e..02604c89ddde 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -196,22 +196,5 @@ enum { INIT_LIST_HEAD(&(rq)->csd.list); \ } while (0) -/* - * io context count accounting - */ -#define elv_ioc_count_mod(name, __val) this_cpu_add(name, __val) -#define elv_ioc_count_inc(name) this_cpu_inc(name) -#define elv_ioc_count_dec(name) this_cpu_dec(name) - -#define elv_ioc_count_read(name) \ -({ \ - unsigned long __val = 0; \ - int __cpu; \ - smp_wmb(); \ - for_each_possible_cpu(__cpu) \ - __val += per_cpu(name, __cpu); \ - __val; \ -}) - #endif /* CONFIG_BLOCK */ #endif -- cgit v1.2.3 From 22f746e235a5cbee2a6ca9887b1be2aa7d31fe71 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 14 Dec 2011 00:33:41 +0100 Subject: block: remove elevator_queue->ops elevator_queue->ops points to the same ops struct ->elevator_type.ops is pointing to. The only effect of caching it in elevator_queue is shorter notation - it doesn't save any indirect derefence. Relocate elevator_type->list which used only during module init/exit to the end of the structure, rename elevator_queue->elevator_type to ->type, and replace elevator_queue->ops with elevator_queue->type.ops. This doesn't introduce any functional difference. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- block/blk.h | 10 +++---- block/elevator.c | 74 +++++++++++++++++++++++------------------------- include/linux/elevator.h | 5 ++-- 3 files changed, 43 insertions(+), 46 deletions(-) (limited to 'include/linux/elevator.h') diff --git a/block/blk.h b/block/blk.h index 5bca2668e1bf..4943770e0792 100644 --- a/block/blk.h +++ b/block/blk.h @@ -94,7 +94,7 @@ static inline struct request *__elv_next_request(struct request_queue *q) return NULL; } if (unlikely(blk_queue_dead(q)) || - !q->elevator->ops->elevator_dispatch_fn(q, 0)) + !q->elevator->type->ops.elevator_dispatch_fn(q, 0)) return NULL; } } @@ -103,16 +103,16 @@ static inline void elv_activate_rq(struct request_queue *q, struct request *rq) { struct elevator_queue *e = q->elevator; - if (e->ops->elevator_activate_req_fn) - e->ops->elevator_activate_req_fn(q, rq); + if (e->type->ops.elevator_activate_req_fn) + e->type->ops.elevator_activate_req_fn(q, rq); } static inline void elv_deactivate_rq(struct request_queue *q, struct request *rq) { struct elevator_queue *e = q->elevator; - if (e->ops->elevator_deactivate_req_fn) - e->ops->elevator_deactivate_req_fn(q, rq); + if (e->type->ops.elevator_deactivate_req_fn) + e->type->ops.elevator_deactivate_req_fn(q, rq); } #ifdef CONFIG_FAIL_IO_TIMEOUT diff --git a/block/elevator.c b/block/elevator.c index a16c2d1713e5..31ffe76aed3d 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -61,8 +61,8 @@ static int elv_iosched_allow_merge(struct request *rq, struct bio *bio) struct request_queue *q = rq->q; struct elevator_queue *e = q->elevator; - if (e->ops->elevator_allow_merge_fn) - return e->ops->elevator_allow_merge_fn(q, rq, bio); + if (e->type->ops.elevator_allow_merge_fn) + return e->type->ops.elevator_allow_merge_fn(q, rq, bio); return 1; } @@ -171,7 +171,7 @@ static struct elevator_type *elevator_get(const char *name) static int elevator_init_queue(struct request_queue *q, struct elevator_queue *eq) { - eq->elevator_data = eq->ops->elevator_init_fn(q); + eq->elevator_data = eq->type->ops.elevator_init_fn(q); if (eq->elevator_data) return 0; return -ENOMEM; @@ -203,8 +203,7 @@ static struct elevator_queue *elevator_alloc(struct request_queue *q, if (unlikely(!eq)) goto err; - eq->ops = &e->ops; - eq->elevator_type = e; + eq->type = e; kobject_init(&eq->kobj, &elv_ktype); mutex_init(&eq->sysfs_lock); @@ -228,7 +227,7 @@ static void elevator_release(struct kobject *kobj) struct elevator_queue *e; e = container_of(kobj, struct elevator_queue, kobj); - elevator_put(e->elevator_type); + elevator_put(e->type); kfree(e->hash); kfree(e); } @@ -288,9 +287,8 @@ EXPORT_SYMBOL(elevator_init); void elevator_exit(struct elevator_queue *e) { mutex_lock(&e->sysfs_lock); - if (e->ops->elevator_exit_fn) - e->ops->elevator_exit_fn(e); - e->ops = NULL; + if (e->type->ops.elevator_exit_fn) + e->type->ops.elevator_exit_fn(e); mutex_unlock(&e->sysfs_lock); kobject_put(&e->kobj); @@ -500,8 +498,8 @@ int elv_merge(struct request_queue *q, struct request **req, struct bio *bio) return ELEVATOR_BACK_MERGE; } - if (e->ops->elevator_merge_fn) - return e->ops->elevator_merge_fn(q, req, bio); + if (e->type->ops.elevator_merge_fn) + return e->type->ops.elevator_merge_fn(q, req, bio); return ELEVATOR_NO_MERGE; } @@ -544,8 +542,8 @@ void elv_merged_request(struct request_queue *q, struct request *rq, int type) { struct elevator_queue *e = q->elevator; - if (e->ops->elevator_merged_fn) - e->ops->elevator_merged_fn(q, rq, type); + if (e->type->ops.elevator_merged_fn) + e->type->ops.elevator_merged_fn(q, rq, type); if (type == ELEVATOR_BACK_MERGE) elv_rqhash_reposition(q, rq); @@ -559,8 +557,8 @@ void elv_merge_requests(struct request_queue *q, struct request *rq, struct elevator_queue *e = q->elevator; const int next_sorted = next->cmd_flags & REQ_SORTED; - if (next_sorted && e->ops->elevator_merge_req_fn) - e->ops->elevator_merge_req_fn(q, rq, next); + if (next_sorted && e->type->ops.elevator_merge_req_fn) + e->type->ops.elevator_merge_req_fn(q, rq, next); elv_rqhash_reposition(q, rq); @@ -577,8 +575,8 @@ void elv_bio_merged(struct request_queue *q, struct request *rq, { struct elevator_queue *e = q->elevator; - if (e->ops->elevator_bio_merged_fn) - e->ops->elevator_bio_merged_fn(q, rq, bio); + if (e->type->ops.elevator_bio_merged_fn) + e->type->ops.elevator_bio_merged_fn(q, rq, bio); } void elv_requeue_request(struct request_queue *q, struct request *rq) @@ -604,12 +602,12 @@ void elv_drain_elevator(struct request_queue *q) lockdep_assert_held(q->queue_lock); - while (q->elevator->ops->elevator_dispatch_fn(q, 1)) + while (q->elevator->type->ops.elevator_dispatch_fn(q, 1)) ; if (q->nr_sorted && printed++ < 10) { printk(KERN_ERR "%s: forced dispatching is broken " "(nr_sorted=%u), please report this\n", - q->elevator->elevator_type->elevator_name, q->nr_sorted); + q->elevator->type->elevator_name, q->nr_sorted); } } @@ -698,7 +696,7 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where) * rq cannot be accessed after calling * elevator_add_req_fn. */ - q->elevator->ops->elevator_add_req_fn(q, rq); + q->elevator->type->ops.elevator_add_req_fn(q, rq); break; case ELEVATOR_INSERT_FLUSH: @@ -727,8 +725,8 @@ struct request *elv_latter_request(struct request_queue *q, struct request *rq) { struct elevator_queue *e = q->elevator; - if (e->ops->elevator_latter_req_fn) - return e->ops->elevator_latter_req_fn(q, rq); + if (e->type->ops.elevator_latter_req_fn) + return e->type->ops.elevator_latter_req_fn(q, rq); return NULL; } @@ -736,8 +734,8 @@ struct request *elv_former_request(struct request_queue *q, struct request *rq) { struct elevator_queue *e = q->elevator; - if (e->ops->elevator_former_req_fn) - return e->ops->elevator_former_req_fn(q, rq); + if (e->type->ops.elevator_former_req_fn) + return e->type->ops.elevator_former_req_fn(q, rq); return NULL; } @@ -745,8 +743,8 @@ int elv_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask) { struct elevator_queue *e = q->elevator; - if (e->ops->elevator_set_req_fn) - return e->ops->elevator_set_req_fn(q, rq, gfp_mask); + if (e->type->ops.elevator_set_req_fn) + return e->type->ops.elevator_set_req_fn(q, rq, gfp_mask); rq->elevator_private[0] = NULL; return 0; @@ -756,16 +754,16 @@ void elv_put_request(struct request_queue *q, struct request *rq) { struct elevator_queue *e = q->elevator; - if (e->ops->elevator_put_req_fn) - e->ops->elevator_put_req_fn(rq); + if (e->type->ops.elevator_put_req_fn) + e->type->ops.elevator_put_req_fn(rq); } int elv_may_queue(struct request_queue *q, int rw) { struct elevator_queue *e = q->elevator; - if (e->ops->elevator_may_queue_fn) - return e->ops->elevator_may_queue_fn(q, rw); + if (e->type->ops.elevator_may_queue_fn) + return e->type->ops.elevator_may_queue_fn(q, rw); return ELV_MQUEUE_MAY; } @@ -800,8 +798,8 @@ void elv_completed_request(struct request_queue *q, struct request *rq) if (blk_account_rq(rq)) { q->in_flight[rq_is_sync(rq)]--; if ((rq->cmd_flags & REQ_SORTED) && - e->ops->elevator_completed_req_fn) - e->ops->elevator_completed_req_fn(q, rq); + e->type->ops.elevator_completed_req_fn) + e->type->ops.elevator_completed_req_fn(q, rq); } } @@ -819,7 +817,7 @@ elv_attr_show(struct kobject *kobj, struct attribute *attr, char *page) e = container_of(kobj, struct elevator_queue, kobj); mutex_lock(&e->sysfs_lock); - error = e->ops ? entry->show(e, page) : -ENOENT; + error = e->type ? entry->show(e, page) : -ENOENT; mutex_unlock(&e->sysfs_lock); return error; } @@ -837,7 +835,7 @@ elv_attr_store(struct kobject *kobj, struct attribute *attr, e = container_of(kobj, struct elevator_queue, kobj); mutex_lock(&e->sysfs_lock); - error = e->ops ? entry->store(e, page, length) : -ENOENT; + error = e->type ? entry->store(e, page, length) : -ENOENT; mutex_unlock(&e->sysfs_lock); return error; } @@ -858,7 +856,7 @@ int __elv_register_queue(struct request_queue *q, struct elevator_queue *e) error = kobject_add(&e->kobj, &q->kobj, "%s", "iosched"); if (!error) { - struct elv_fs_entry *attr = e->elevator_type->elevator_attrs; + struct elv_fs_entry *attr = e->type->elevator_attrs; if (attr) { while (attr->attr.name) { if (sysfs_create_file(&e->kobj, &attr->attr)) @@ -959,7 +957,7 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e) elevator_exit(old_elevator); elv_quiesce_end(q); - blk_add_trace_msg(q, "elv switch: %s", e->elevator_type->elevator_name); + blk_add_trace_msg(q, "elv switch: %s", e->type->elevator_name); return 0; @@ -993,7 +991,7 @@ int elevator_change(struct request_queue *q, const char *name) return -EINVAL; } - if (!strcmp(elevator_name, q->elevator->elevator_type->elevator_name)) { + if (!strcmp(elevator_name, q->elevator->type->elevator_name)) { elevator_put(e); return 0; } @@ -1028,7 +1026,7 @@ ssize_t elv_iosched_show(struct request_queue *q, char *name) if (!q->elevator || !blk_queue_stackable(q)) return sprintf(name, "none\n"); - elv = e->elevator_type; + elv = e->type; spin_lock(&elv_list_lock); list_for_each_entry(__e, &elv_list, list) { diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 02604c89ddde..04958ef53e62 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -78,11 +78,11 @@ struct elv_fs_entry { */ struct elevator_type { - struct list_head list; struct elevator_ops ops; struct elv_fs_entry *elevator_attrs; char elevator_name[ELV_NAME_MAX]; struct module *elevator_owner; + struct list_head list; }; /* @@ -90,10 +90,9 @@ struct elevator_type */ struct elevator_queue { - struct elevator_ops *ops; + struct elevator_type *type; void *elevator_data; struct kobject kobj; - struct elevator_type *elevator_type; struct mutex sysfs_lock; struct hlist_head *hash; unsigned int registered:1; -- cgit v1.2.3 From 3d3c2379feb177a5fd55bb0ed76776dc9d4f3243 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 14 Dec 2011 00:33:42 +0100 Subject: block, cfq: move icq cache management to block core Let elevators set ->icq_size and ->icq_align in elevator_type and elv_register() and elv_unregister() respectively create and destroy kmem_cache for icq. * elv_register() now can return failure. All callers updated. * icq caches are automatically named "ELVNAME_io_cq". * cfq_slab_setup/kill() are collapsed into cfq_init/exit(). * While at it, minor indentation change for iosched_cfq.elevator_name for consistency. This will help moving icq management to block core. This doesn't introduce any functional change. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- block/cfq-iosched.c | 48 +++++++++++++++--------------------------------- block/deadline-iosched.c | 4 +--- block/elevator.c | 37 +++++++++++++++++++++++++++++++++++-- block/noop-iosched.c | 4 +--- include/linux/elevator.h | 11 ++++++++++- 5 files changed, 62 insertions(+), 42 deletions(-) (limited to 'include/linux/elevator.h') diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 048fa699adf9..06e59abcb57f 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -3914,34 +3914,6 @@ static void *cfq_init_queue(struct request_queue *q) return cfqd; } -static void cfq_slab_kill(void) -{ - /* - * Caller already ensured that pending RCU callbacks are completed, - * so we should have no busy allocations at this point. - */ - if (cfq_pool) - kmem_cache_destroy(cfq_pool); - if (cfq_icq_pool) - kmem_cache_destroy(cfq_icq_pool); -} - -static int __init cfq_slab_setup(void) -{ - cfq_pool = KMEM_CACHE(cfq_queue, 0); - if (!cfq_pool) - goto fail; - - cfq_icq_pool = KMEM_CACHE(cfq_io_cq, 0); - if (!cfq_icq_pool) - goto fail; - - return 0; -fail: - cfq_slab_kill(); - return -ENOMEM; -} - /* * sysfs parts below --> */ @@ -4053,8 +4025,10 @@ static struct elevator_type iosched_cfq = { .elevator_init_fn = cfq_init_queue, .elevator_exit_fn = cfq_exit_queue, }, + .icq_size = sizeof(struct cfq_io_cq), + .icq_align = __alignof__(struct cfq_io_cq), .elevator_attrs = cfq_attrs, - .elevator_name = "cfq", + .elevator_name = "cfq", .elevator_owner = THIS_MODULE, }; @@ -4072,6 +4046,8 @@ static struct blkio_policy_type blkio_policy_cfq; static int __init cfq_init(void) { + int ret; + /* * could be 0 on HZ < 1000 setups */ @@ -4086,10 +4062,17 @@ static int __init cfq_init(void) #else cfq_group_idle = 0; #endif - if (cfq_slab_setup()) + cfq_pool = KMEM_CACHE(cfq_queue, 0); + if (!cfq_pool) return -ENOMEM; - elv_register(&iosched_cfq); + ret = elv_register(&iosched_cfq); + if (ret) { + kmem_cache_destroy(cfq_pool); + return ret; + } + cfq_icq_pool = iosched_cfq.icq_cache; + blkio_policy_register(&blkio_policy_cfq); return 0; @@ -4099,8 +4082,7 @@ static void __exit cfq_exit(void) { blkio_policy_unregister(&blkio_policy_cfq); elv_unregister(&iosched_cfq); - rcu_barrier(); /* make sure all cic RCU frees are complete */ - cfq_slab_kill(); + kmem_cache_destroy(cfq_pool); } module_init(cfq_init); diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c index c644137d9cd6..7bf12d793fcd 100644 --- a/block/deadline-iosched.c +++ b/block/deadline-iosched.c @@ -448,9 +448,7 @@ static struct elevator_type iosched_deadline = { static int __init deadline_init(void) { - elv_register(&iosched_deadline); - - return 0; + return elv_register(&iosched_deadline); } static void __exit deadline_exit(void) diff --git a/block/elevator.c b/block/elevator.c index c5c6214829cb..cca049fb45c8 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -886,15 +886,36 @@ void elv_unregister_queue(struct request_queue *q) } EXPORT_SYMBOL(elv_unregister_queue); -void elv_register(struct elevator_type *e) +int elv_register(struct elevator_type *e) { char *def = ""; + /* create icq_cache if requested */ + if (e->icq_size) { + if (WARN_ON(e->icq_size < sizeof(struct io_cq)) || + WARN_ON(e->icq_align < __alignof__(struct io_cq))) + return -EINVAL; + + snprintf(e->icq_cache_name, sizeof(e->icq_cache_name), + "%s_io_cq", e->elevator_name); + e->icq_cache = kmem_cache_create(e->icq_cache_name, e->icq_size, + e->icq_align, 0, NULL); + if (!e->icq_cache) + return -ENOMEM; + } + + /* register, don't allow duplicate names */ spin_lock(&elv_list_lock); - BUG_ON(elevator_find(e->elevator_name)); + if (elevator_find(e->elevator_name)) { + spin_unlock(&elv_list_lock); + if (e->icq_cache) + kmem_cache_destroy(e->icq_cache); + return -EBUSY; + } list_add_tail(&e->list, &elv_list); spin_unlock(&elv_list_lock); + /* print pretty message */ if (!strcmp(e->elevator_name, chosen_elevator) || (!*chosen_elevator && !strcmp(e->elevator_name, CONFIG_DEFAULT_IOSCHED))) @@ -902,14 +923,26 @@ void elv_register(struct elevator_type *e) printk(KERN_INFO "io scheduler %s registered%s\n", e->elevator_name, def); + return 0; } EXPORT_SYMBOL_GPL(elv_register); void elv_unregister(struct elevator_type *e) { + /* unregister */ spin_lock(&elv_list_lock); list_del_init(&e->list); spin_unlock(&elv_list_lock); + + /* + * Destroy icq_cache if it exists. icq's are RCU managed. Make + * sure all RCU operations are complete before proceeding. + */ + if (e->icq_cache) { + rcu_barrier(); + kmem_cache_destroy(e->icq_cache); + e->icq_cache = NULL; + } } EXPORT_SYMBOL_GPL(elv_unregister); diff --git a/block/noop-iosched.c b/block/noop-iosched.c index 06389e9ef96d..413a0b1d788c 100644 --- a/block/noop-iosched.c +++ b/block/noop-iosched.c @@ -94,9 +94,7 @@ static struct elevator_type elevator_noop = { static int __init noop_init(void) { - elv_register(&elevator_noop); - - return 0; + return elv_register(&elevator_noop); } static void __exit noop_exit(void) diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 04958ef53e62..d3d3e28cbfd4 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -78,10 +78,19 @@ struct elv_fs_entry { */ struct elevator_type { + /* managed by elevator core */ + struct kmem_cache *icq_cache; + + /* fields provided by elevator implementation */ struct elevator_ops ops; + size_t icq_size; + size_t icq_align; struct elv_fs_entry *elevator_attrs; char elevator_name[ELV_NAME_MAX]; struct module *elevator_owner; + + /* managed by elevator core */ + char icq_cache_name[ELV_NAME_MAX + 5]; /* elvname + "_io_cq" */ struct list_head list; }; @@ -127,7 +136,7 @@ extern void elv_drain_elevator(struct request_queue *); /* * io scheduler registration */ -extern void elv_register(struct elevator_type *); +extern int elv_register(struct elevator_type *); extern void elv_unregister(struct elevator_type *); /* -- cgit v1.2.3 From 7e5a8794492e43e9eebb68a98a23be055888ccd0 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 14 Dec 2011 00:33:42 +0100 Subject: block, cfq: move io_cq exit/release to blk-ioc.c With kmem_cache managed by blk-ioc, io_cq exit/release can be moved to blk-ioc too. The odd ->io_cq->exit/release() callbacks are replaced with elevator_ops->elevator_exit_icq_fn() with unlinking from both ioc and q, and freeing automatically handled by blk-ioc. The elevator operation only need to perform exit operation specific to the elevator - in cfq's case, exiting the cfqq's. Also, clearing of io_cq's on q detach is moved to block core and automatically performed on elevator switch and q release. Because the q io_cq points to might be freed before RCU callback for the io_cq runs, blk-ioc code should remember to which cache the io_cq needs to be freed when the io_cq is released. New field io_cq->__rcu_icq_cache is added for this purpose. As both the new field and rcu_head are used only after io_cq is released and the q/ioc_node fields aren't, they are put into unions. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- block/blk-ioc.c | 76 ++++++++++++++++++++++++++++++++++++++++++----- block/blk-sysfs.c | 6 +++- block/blk.h | 1 + block/cfq-iosched.c | 47 ++--------------------------- block/elevator.c | 3 +- include/linux/elevator.h | 5 ++++ include/linux/iocontext.h | 20 +++++++++---- 7 files changed, 97 insertions(+), 61 deletions(-) (limited to 'include/linux/elevator.h') diff --git a/block/blk-ioc.c b/block/blk-ioc.c index 87ecc98b8ade..0910a5584d38 100644 --- a/block/blk-ioc.c +++ b/block/blk-ioc.c @@ -44,6 +44,51 @@ EXPORT_SYMBOL(get_io_context); #define ioc_release_depth_dec(q) do { } while (0) #endif +static void icq_free_icq_rcu(struct rcu_head *head) +{ + struct io_cq *icq = container_of(head, struct io_cq, __rcu_head); + + kmem_cache_free(icq->__rcu_icq_cache, icq); +} + +/* + * Exit and free an icq. Called with both ioc and q locked. + */ +static void ioc_exit_icq(struct io_cq *icq) +{ + struct io_context *ioc = icq->ioc; + struct request_queue *q = icq->q; + struct elevator_type *et = q->elevator->type; + + lockdep_assert_held(&ioc->lock); + lockdep_assert_held(q->queue_lock); + + radix_tree_delete(&ioc->icq_tree, icq->q->id); + hlist_del_init(&icq->ioc_node); + list_del_init(&icq->q_node); + + /* + * Both setting lookup hint to and clearing it from @icq are done + * under queue_lock. If it's not pointing to @icq now, it never + * will. Hint assignment itself can race safely. + */ + if (rcu_dereference_raw(ioc->icq_hint) == icq) + rcu_assign_pointer(ioc->icq_hint, NULL); + + if (et->ops.elevator_exit_icq_fn) { + ioc_release_depth_inc(q); + et->ops.elevator_exit_icq_fn(icq); + ioc_release_depth_dec(q); + } + + /* + * @icq->q might have gone away by the time RCU callback runs + * making it impossible to determine icq_cache. Record it in @icq. + */ + icq->__rcu_icq_cache = et->icq_cache; + call_rcu(&icq->__rcu_head, icq_free_icq_rcu); +} + /* * Slow path for ioc release in put_io_context(). Performs double-lock * dancing to unlink all icq's and then frees ioc. @@ -87,10 +132,7 @@ static void ioc_release_fn(struct work_struct *work) spin_lock(&ioc->lock); continue; } - ioc_release_depth_inc(this_q); - icq->exit(icq); - icq->release(icq); - ioc_release_depth_dec(this_q); + ioc_exit_icq(icq); } if (last_q) { @@ -167,10 +209,7 @@ void put_io_context(struct io_context *ioc, struct request_queue *locked_q) last_q = this_q; continue; } - ioc_release_depth_inc(this_q); - icq->exit(icq); - icq->release(icq); - ioc_release_depth_dec(this_q); + ioc_exit_icq(icq); } if (last_q && last_q != locked_q) @@ -203,6 +242,27 @@ void exit_io_context(struct task_struct *task) put_io_context(ioc, NULL); } +/** + * ioc_clear_queue - break any ioc association with the specified queue + * @q: request_queue being cleared + * + * Walk @q->icq_list and exit all io_cq's. Must be called with @q locked. + */ +void ioc_clear_queue(struct request_queue *q) +{ + lockdep_assert_held(q->queue_lock); + + while (!list_empty(&q->icq_list)) { + struct io_cq *icq = list_entry(q->icq_list.next, + struct io_cq, q_node); + struct io_context *ioc = icq->ioc; + + spin_lock(&ioc->lock); + ioc_exit_icq(icq); + spin_unlock(&ioc->lock); + } +} + void create_io_context_slowpath(struct task_struct *task, gfp_t gfp_flags, int node) { diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 5b4b4ab5e785..cf150011d808 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -479,8 +479,12 @@ static void blk_release_queue(struct kobject *kobj) blk_sync_queue(q); - if (q->elevator) + if (q->elevator) { + spin_lock_irq(q->queue_lock); + ioc_clear_queue(q); + spin_unlock_irq(q->queue_lock); elevator_exit(q->elevator); + } blk_throtl_exit(q); diff --git a/block/blk.h b/block/blk.h index 3c510a4b5054..ed4d9bf2ab16 100644 --- a/block/blk.h +++ b/block/blk.h @@ -200,6 +200,7 @@ static inline int blk_do_io_stat(struct request *rq) */ void get_io_context(struct io_context *ioc); struct io_cq *ioc_lookup_icq(struct io_context *ioc, struct request_queue *q); +void ioc_clear_queue(struct request_queue *q); void create_io_context_slowpath(struct task_struct *task, gfp_t gfp_mask, int node); diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 06e59abcb57f..f6d315551496 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -2674,26 +2674,6 @@ static void cfq_put_queue(struct cfq_queue *cfqq) cfq_put_cfqg(cfqg); } -static void cfq_icq_free_rcu(struct rcu_head *head) -{ - kmem_cache_free(cfq_icq_pool, - icq_to_cic(container_of(head, struct io_cq, rcu_head))); -} - -static void cfq_icq_free(struct io_cq *icq) -{ - call_rcu(&icq->rcu_head, cfq_icq_free_rcu); -} - -static void cfq_release_icq(struct io_cq *icq) -{ - struct io_context *ioc = icq->ioc; - - radix_tree_delete(&ioc->icq_tree, icq->q->id); - hlist_del(&icq->ioc_node); - cfq_icq_free(icq); -} - static void cfq_put_cooperator(struct cfq_queue *cfqq) { struct cfq_queue *__cfqq, *next; @@ -2731,17 +2711,6 @@ static void cfq_exit_icq(struct io_cq *icq) { struct cfq_io_cq *cic = icq_to_cic(icq); struct cfq_data *cfqd = cic_to_cfqd(cic); - struct io_context *ioc = icq->ioc; - - list_del_init(&icq->q_node); - - /* - * Both setting lookup hint to and clearing it from @icq are done - * under queue_lock. If it's not pointing to @icq now, it never - * will. Hint assignment itself can race safely. - */ - if (rcu_dereference_raw(ioc->icq_hint) == icq) - rcu_assign_pointer(ioc->icq_hint, NULL); if (cic->cfqq[BLK_RW_ASYNC]) { cfq_exit_cfqq(cfqd, cic->cfqq[BLK_RW_ASYNC]); @@ -2764,8 +2733,6 @@ static struct cfq_io_cq *cfq_alloc_cic(struct cfq_data *cfqd, gfp_t gfp_mask) cic->ttime.last_end_request = jiffies; INIT_LIST_HEAD(&cic->icq.q_node); INIT_HLIST_NODE(&cic->icq.ioc_node); - cic->icq.exit = cfq_exit_icq; - cic->icq.release = cfq_release_icq; } return cic; @@ -3034,7 +3001,7 @@ out: if (ret) printk(KERN_ERR "cfq: icq link failed!\n"); if (icq) - cfq_icq_free(icq); + kmem_cache_free(cfq_icq_pool, icq); return ret; } @@ -3774,17 +3741,6 @@ static void cfq_exit_queue(struct elevator_queue *e) if (cfqd->active_queue) __cfq_slice_expired(cfqd, cfqd->active_queue, 0); - while (!list_empty(&q->icq_list)) { - struct io_cq *icq = list_entry(q->icq_list.next, - struct io_cq, q_node); - struct io_context *ioc = icq->ioc; - - spin_lock(&ioc->lock); - cfq_exit_icq(icq); - cfq_release_icq(icq); - spin_unlock(&ioc->lock); - } - cfq_put_async_queues(cfqd); cfq_release_cfq_groups(cfqd); @@ -4019,6 +3975,7 @@ static struct elevator_type iosched_cfq = { .elevator_completed_req_fn = cfq_completed_request, .elevator_former_req_fn = elv_rb_former_request, .elevator_latter_req_fn = elv_rb_latter_request, + .elevator_exit_icq_fn = cfq_exit_icq, .elevator_set_req_fn = cfq_set_request, .elevator_put_req_fn = cfq_put_request, .elevator_may_queue_fn = cfq_may_queue, diff --git a/block/elevator.c b/block/elevator.c index cca049fb45c8..91e18f8af9be 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -979,8 +979,9 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e) goto fail_register; } - /* done, replace the old one with new one and turn off BYPASS */ + /* done, clear io_cq's, switch elevators and turn off BYPASS */ spin_lock_irq(q->queue_lock); + ioc_clear_queue(q); old_elevator = q->elevator; q->elevator = e; spin_unlock_irq(q->queue_lock); diff --git a/include/linux/elevator.h b/include/linux/elevator.h index d3d3e28cbfd4..06e4dd568717 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -5,6 +5,8 @@ #ifdef CONFIG_BLOCK +struct io_cq; + typedef int (elevator_merge_fn) (struct request_queue *, struct request **, struct bio *); @@ -24,6 +26,7 @@ typedef struct request *(elevator_request_list_fn) (struct request_queue *, stru typedef void (elevator_completed_req_fn) (struct request_queue *, struct request *); typedef int (elevator_may_queue_fn) (struct request_queue *, int); +typedef void (elevator_exit_icq_fn) (struct io_cq *); typedef int (elevator_set_req_fn) (struct request_queue *, struct request *, gfp_t); typedef void (elevator_put_req_fn) (struct request *); typedef void (elevator_activate_req_fn) (struct request_queue *, struct request *); @@ -56,6 +59,8 @@ struct elevator_ops elevator_request_list_fn *elevator_former_req_fn; elevator_request_list_fn *elevator_latter_req_fn; + elevator_exit_icq_fn *elevator_exit_icq_fn; + elevator_set_req_fn *elevator_set_req_fn; elevator_put_req_fn *elevator_put_req_fn; diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h index d15ca6591f96..ac390a34c0e7 100644 --- a/include/linux/iocontext.h +++ b/include/linux/iocontext.h @@ -14,14 +14,22 @@ struct io_cq { struct request_queue *q; struct io_context *ioc; - struct list_head q_node; - struct hlist_node ioc_node; + /* + * q_node and ioc_node link io_cq through icq_list of q and ioc + * respectively. Both fields are unused once ioc_exit_icq() is + * called and shared with __rcu_icq_cache and __rcu_head which are + * used for RCU free of io_cq. + */ + union { + struct list_head q_node; + struct kmem_cache *__rcu_icq_cache; + }; + union { + struct hlist_node ioc_node; + struct rcu_head __rcu_head; + }; unsigned long changed; - struct rcu_head rcu_head; - - void (*exit)(struct io_cq *); - void (*release)(struct io_cq *); }; /* -- cgit v1.2.3 From 9b84cacd013996f244d85b3d873287c2a8f88658 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 14 Dec 2011 00:33:42 +0100 Subject: block, cfq: restructure io_cq creation path for io_context interface cleanup Add elevator_ops->elevator_init_icq_fn() and restructure cfq_create_cic() and rename it to ioc_create_icq(). The new function expects its caller to pass in io_context, uses elevator_type->icq_cache, handles generic init, calls the new elevator operation for elevator specific initialization, and returns pointer to created or looked up icq. This leaves cfq_icq_pool variable without any user. Removed. This prepares for io_context interface cleanup and doesn't introduce any functional difference. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- block/cfq-iosched.c | 94 +++++++++++++++++++++--------------------------- include/linux/elevator.h | 2 ++ 2 files changed, 43 insertions(+), 53 deletions(-) (limited to 'include/linux/elevator.h') diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index f6d315551496..11f49d036845 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -59,7 +59,6 @@ static const int cfq_hist_divisor = 4; #define RQ_CFQG(rq) (struct cfq_group *) ((rq)->elv.priv[1]) static struct kmem_cache *cfq_pool; -static struct kmem_cache *cfq_icq_pool; #define CFQ_PRIO_LISTS IOPRIO_BE_NR #define cfq_class_idle(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE) @@ -2707,6 +2706,13 @@ static void cfq_exit_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq) cfq_put_queue(cfqq); } +static void cfq_init_icq(struct io_cq *icq) +{ + struct cfq_io_cq *cic = icq_to_cic(icq); + + cic->ttime.last_end_request = jiffies; +} + static void cfq_exit_icq(struct io_cq *icq) { struct cfq_io_cq *cic = icq_to_cic(icq); @@ -2723,21 +2729,6 @@ static void cfq_exit_icq(struct io_cq *icq) } } -static struct cfq_io_cq *cfq_alloc_cic(struct cfq_data *cfqd, gfp_t gfp_mask) -{ - struct cfq_io_cq *cic; - - cic = kmem_cache_alloc_node(cfq_icq_pool, gfp_mask | __GFP_ZERO, - cfqd->queue->node); - if (cic) { - cic->ttime.last_end_request = jiffies; - INIT_LIST_HEAD(&cic->icq.q_node); - INIT_HLIST_NODE(&cic->icq.ioc_node); - } - - return cic; -} - static void cfq_init_prio_data(struct cfq_queue *cfqq, struct io_context *ioc) { struct task_struct *tsk = current; @@ -2945,64 +2936,62 @@ cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct io_context *ioc, } /** - * cfq_create_cic - create and link a cfq_io_cq - * @cfqd: cfqd of interest + * ioc_create_icq - create and link io_cq + * @q: request_queue of interest * @gfp_mask: allocation mask * - * Make sure cfq_io_cq linking %current->io_context and @cfqd exists. If - * ioc and/or cic doesn't exist, they will be created using @gfp_mask. + * Make sure io_cq linking %current->io_context and @q exists. If either + * io_context and/or icq don't exist, they will be created using @gfp_mask. + * + * The caller is responsible for ensuring @ioc won't go away and @q is + * alive and will stay alive until this function returns. */ -static int cfq_create_cic(struct cfq_data *cfqd, gfp_t gfp_mask) +static struct io_cq *ioc_create_icq(struct request_queue *q, gfp_t gfp_mask) { - struct request_queue *q = cfqd->queue; - struct io_cq *icq = NULL; - struct cfq_io_cq *cic; + struct elevator_type *et = q->elevator->type; struct io_context *ioc; - int ret = -ENOMEM; - - might_sleep_if(gfp_mask & __GFP_WAIT); + struct io_cq *icq; /* allocate stuff */ ioc = create_io_context(current, gfp_mask, q->node); if (!ioc) - goto out; + return NULL; - cic = cfq_alloc_cic(cfqd, gfp_mask); - if (!cic) - goto out; - icq = &cic->icq; + icq = kmem_cache_alloc_node(et->icq_cache, gfp_mask | __GFP_ZERO, + q->node); + if (!icq) + return NULL; - ret = radix_tree_preload(gfp_mask); - if (ret) - goto out; + if (radix_tree_preload(gfp_mask) < 0) { + kmem_cache_free(et->icq_cache, icq); + return NULL; + } icq->ioc = ioc; - icq->q = cfqd->queue; + icq->q = q; + INIT_LIST_HEAD(&icq->q_node); + INIT_HLIST_NODE(&icq->ioc_node); /* lock both q and ioc and try to link @icq */ spin_lock_irq(q->queue_lock); spin_lock(&ioc->lock); - ret = radix_tree_insert(&ioc->icq_tree, q->id, icq); - if (likely(!ret)) { + if (likely(!radix_tree_insert(&ioc->icq_tree, q->id, icq))) { hlist_add_head(&icq->ioc_node, &ioc->icq_list); list_add(&icq->q_node, &q->icq_list); - icq = NULL; - } else if (ret == -EEXIST) { - /* someone else already did it */ - ret = 0; + if (et->ops.elevator_init_icq_fn) + et->ops.elevator_init_icq_fn(icq); + } else { + kmem_cache_free(et->icq_cache, icq); + icq = ioc_lookup_icq(ioc, q); + if (!icq) + printk(KERN_ERR "cfq: icq link failed!\n"); } spin_unlock(&ioc->lock); spin_unlock_irq(q->queue_lock); - radix_tree_preload_end(); -out: - if (ret) - printk(KERN_ERR "cfq: icq link failed!\n"); - if (icq) - kmem_cache_free(cfq_icq_pool, icq); - return ret; + return icq; } /** @@ -3022,7 +3011,6 @@ static struct cfq_io_cq *cfq_get_cic(struct cfq_data *cfqd, gfp_t gfp_mask) struct request_queue *q = cfqd->queue; struct cfq_io_cq *cic = NULL; struct io_context *ioc; - int err; lockdep_assert_held(q->queue_lock); @@ -3037,9 +3025,9 @@ static struct cfq_io_cq *cfq_get_cic(struct cfq_data *cfqd, gfp_t gfp_mask) /* slow path - unlock, create missing ones and retry */ spin_unlock_irq(q->queue_lock); - err = cfq_create_cic(cfqd, gfp_mask); + cic = icq_to_cic(ioc_create_icq(q, gfp_mask)); spin_lock_irq(q->queue_lock); - if (err) + if (!cic) return NULL; } @@ -3975,6 +3963,7 @@ static struct elevator_type iosched_cfq = { .elevator_completed_req_fn = cfq_completed_request, .elevator_former_req_fn = elv_rb_former_request, .elevator_latter_req_fn = elv_rb_latter_request, + .elevator_init_icq_fn = cfq_init_icq, .elevator_exit_icq_fn = cfq_exit_icq, .elevator_set_req_fn = cfq_set_request, .elevator_put_req_fn = cfq_put_request, @@ -4028,7 +4017,6 @@ static int __init cfq_init(void) kmem_cache_destroy(cfq_pool); return ret; } - cfq_icq_pool = iosched_cfq.icq_cache; blkio_policy_register(&blkio_policy_cfq); diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 06e4dd568717..c8f1e67a8ebe 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -26,6 +26,7 @@ typedef struct request *(elevator_request_list_fn) (struct request_queue *, stru typedef void (elevator_completed_req_fn) (struct request_queue *, struct request *); typedef int (elevator_may_queue_fn) (struct request_queue *, int); +typedef void (elevator_init_icq_fn) (struct io_cq *); typedef void (elevator_exit_icq_fn) (struct io_cq *); typedef int (elevator_set_req_fn) (struct request_queue *, struct request *, gfp_t); typedef void (elevator_put_req_fn) (struct request *); @@ -59,6 +60,7 @@ struct elevator_ops elevator_request_list_fn *elevator_former_req_fn; elevator_request_list_fn *elevator_latter_req_fn; + elevator_init_icq_fn *elevator_init_icq_fn; elevator_exit_icq_fn *elevator_exit_icq_fn; elevator_set_req_fn *elevator_set_req_fn; -- cgit v1.2.3 From f1f8cc94651738b418ba54c039df536303b91704 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 14 Dec 2011 00:33:42 +0100 Subject: block, cfq: move icq creation and rq->elv.icq association to block core Now block layer knows everything necessary to create and associate icq's with requests. Move ioc_create_icq() to blk-ioc.c and update get_request() such that, if elevator_type->icq_size is set, requests are automatically associated with their matching icq's before elv_set_request(). io_context reference is also managed by block core on request alloc/free. * Only ioprio/cgroup changed handling remains from cfq_get_cic(). Collapsed into cfq_set_request(). * This removes queue kicking on icq allocation failure (for now). As icq allocation failure is rare and the only effect of queue kicking achieved was possibily accelerating queue processing, this change shouldn't be noticeable. There is a larger underlying problem. Unlike request allocation, icq allocation is not guaranteed to succeed eventually after retries. The number of icq is unbound and thus mempool can't be the solution either. This effectively adds allocation dependency on memory free path and thus possibility of deadlock. This usually wouldn't happen because icq allocation is not a hot path and, even when the condition triggers, it's highly unlikely that none of the writeback workers already has icq. However, this is still possible especially if elevator is being switched under high memory pressure, so we better get it fixed. Probably the only solution is just bypassing elevator and appending to dispatch queue on any elevator allocation failure. * Comment added to explain how icq's are managed and synchronized. This completes cleanup of io_context interface. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- block/blk-core.c | 46 +++++++++++++--- block/blk-ioc.c | 60 ++++++++++++++++++++- block/blk.h | 1 + block/cfq-iosched.c | 135 ++++------------------------------------------ include/linux/elevator.h | 8 +-- include/linux/iocontext.h | 59 ++++++++++++++++++++ 6 files changed, 173 insertions(+), 136 deletions(-) (limited to 'include/linux/elevator.h') diff --git a/block/blk-core.c b/block/blk-core.c index 3c26c7f48703..8fbdac7010bb 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -640,13 +640,18 @@ EXPORT_SYMBOL(blk_get_queue); static inline void blk_free_request(struct request_queue *q, struct request *rq) { - if (rq->cmd_flags & REQ_ELVPRIV) + if (rq->cmd_flags & REQ_ELVPRIV) { elv_put_request(q, rq); + if (rq->elv.icq) + put_io_context(rq->elv.icq->ioc, q); + } + mempool_free(rq, q->rq.rq_pool); } static struct request * -blk_alloc_request(struct request_queue *q, unsigned int flags, gfp_t gfp_mask) +blk_alloc_request(struct request_queue *q, struct io_cq *icq, + unsigned int flags, gfp_t gfp_mask) { struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask); @@ -657,10 +662,15 @@ blk_alloc_request(struct request_queue *q, unsigned int flags, gfp_t gfp_mask) rq->cmd_flags = flags | REQ_ALLOCED; - if ((flags & REQ_ELVPRIV) && - unlikely(elv_set_request(q, rq, gfp_mask))) { - mempool_free(rq, q->rq.rq_pool); - return NULL; + if (flags & REQ_ELVPRIV) { + rq->elv.icq = icq; + if (unlikely(elv_set_request(q, rq, gfp_mask))) { + mempool_free(rq, q->rq.rq_pool); + return NULL; + } + /* @rq->elv.icq holds on to io_context until @rq is freed */ + if (icq) + get_io_context(icq->ioc); } return rq; @@ -772,11 +782,14 @@ static struct request *get_request(struct request_queue *q, int rw_flags, { struct request *rq = NULL; struct request_list *rl = &q->rq; + struct elevator_type *et; struct io_context *ioc; + struct io_cq *icq = NULL; const bool is_sync = rw_is_sync(rw_flags) != 0; bool retried = false; int may_queue; retry: + et = q->elevator->type; ioc = current->io_context; if (unlikely(blk_queue_dead(q))) @@ -837,17 +850,36 @@ retry: rl->count[is_sync]++; rl->starved[is_sync] = 0; + /* + * Decide whether the new request will be managed by elevator. If + * so, mark @rw_flags and increment elvpriv. Non-zero elvpriv will + * prevent the current elevator from being destroyed until the new + * request is freed. This guarantees icq's won't be destroyed and + * makes creating new ones safe. + * + * Also, lookup icq while holding queue_lock. If it doesn't exist, + * it will be created after releasing queue_lock. + */ if (blk_rq_should_init_elevator(bio) && !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags)) { rw_flags |= REQ_ELVPRIV; rl->elvpriv++; + if (et->icq_cache && ioc) + icq = ioc_lookup_icq(ioc, q); } if (blk_queue_io_stat(q)) rw_flags |= REQ_IO_STAT; spin_unlock_irq(q->queue_lock); - rq = blk_alloc_request(q, rw_flags, gfp_mask); + /* create icq if missing */ + if (unlikely(et->icq_cache && !icq)) + icq = ioc_create_icq(q, gfp_mask); + + /* rqs are guaranteed to have icq on elv_set_request() if requested */ + if (likely(!et->icq_cache || icq)) + rq = blk_alloc_request(q, icq, rw_flags, gfp_mask); + if (unlikely(!rq)) { /* * Allocation failed presumably due to memory. Undo anything diff --git a/block/blk-ioc.c b/block/blk-ioc.c index 0910a5584d38..c04d16b02225 100644 --- a/block/blk-ioc.c +++ b/block/blk-ioc.c @@ -289,7 +289,6 @@ void create_io_context_slowpath(struct task_struct *task, gfp_t gfp_flags, kmem_cache_free(iocontext_cachep, ioc); task_unlock(task); } -EXPORT_SYMBOL(create_io_context_slowpath); /** * get_task_io_context - get io_context of a task @@ -362,6 +361,65 @@ out: } EXPORT_SYMBOL(ioc_lookup_icq); +/** + * ioc_create_icq - create and link io_cq + * @q: request_queue of interest + * @gfp_mask: allocation mask + * + * Make sure io_cq linking %current->io_context and @q exists. If either + * io_context and/or icq don't exist, they will be created using @gfp_mask. + * + * The caller is responsible for ensuring @ioc won't go away and @q is + * alive and will stay alive until this function returns. + */ +struct io_cq *ioc_create_icq(struct request_queue *q, gfp_t gfp_mask) +{ + struct elevator_type *et = q->elevator->type; + struct io_context *ioc; + struct io_cq *icq; + + /* allocate stuff */ + ioc = create_io_context(current, gfp_mask, q->node); + if (!ioc) + return NULL; + + icq = kmem_cache_alloc_node(et->icq_cache, gfp_mask | __GFP_ZERO, + q->node); + if (!icq) + return NULL; + + if (radix_tree_preload(gfp_mask) < 0) { + kmem_cache_free(et->icq_cache, icq); + return NULL; + } + + icq->ioc = ioc; + icq->q = q; + INIT_LIST_HEAD(&icq->q_node); + INIT_HLIST_NODE(&icq->ioc_node); + + /* lock both q and ioc and try to link @icq */ + spin_lock_irq(q->queue_lock); + spin_lock(&ioc->lock); + + if (likely(!radix_tree_insert(&ioc->icq_tree, q->id, icq))) { + hlist_add_head(&icq->ioc_node, &ioc->icq_list); + list_add(&icq->q_node, &q->icq_list); + if (et->ops.elevator_init_icq_fn) + et->ops.elevator_init_icq_fn(icq); + } else { + kmem_cache_free(et->icq_cache, icq); + icq = ioc_lookup_icq(ioc, q); + if (!icq) + printk(KERN_ERR "cfq: icq link failed!\n"); + } + + spin_unlock(&ioc->lock); + spin_unlock_irq(q->queue_lock); + radix_tree_preload_end(); + return icq; +} + void ioc_set_changed(struct io_context *ioc, int which) { struct io_cq *icq; diff --git a/block/blk.h b/block/blk.h index ed4d9bf2ab16..7efd772336de 100644 --- a/block/blk.h +++ b/block/blk.h @@ -200,6 +200,7 @@ static inline int blk_do_io_stat(struct request *rq) */ void get_io_context(struct io_context *ioc); struct io_cq *ioc_lookup_icq(struct io_context *ioc, struct request_queue *q); +struct io_cq *ioc_create_icq(struct request_queue *q, gfp_t gfp_mask); void ioc_clear_queue(struct request_queue *q); void create_io_context_slowpath(struct task_struct *task, gfp_t gfp_mask, diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 11f49d036845..f3b44c394e6d 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -2935,117 +2935,6 @@ cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct io_context *ioc, return cfqq; } -/** - * ioc_create_icq - create and link io_cq - * @q: request_queue of interest - * @gfp_mask: allocation mask - * - * Make sure io_cq linking %current->io_context and @q exists. If either - * io_context and/or icq don't exist, they will be created using @gfp_mask. - * - * The caller is responsible for ensuring @ioc won't go away and @q is - * alive and will stay alive until this function returns. - */ -static struct io_cq *ioc_create_icq(struct request_queue *q, gfp_t gfp_mask) -{ - struct elevator_type *et = q->elevator->type; - struct io_context *ioc; - struct io_cq *icq; - - /* allocate stuff */ - ioc = create_io_context(current, gfp_mask, q->node); - if (!ioc) - return NULL; - - icq = kmem_cache_alloc_node(et->icq_cache, gfp_mask | __GFP_ZERO, - q->node); - if (!icq) - return NULL; - - if (radix_tree_preload(gfp_mask) < 0) { - kmem_cache_free(et->icq_cache, icq); - return NULL; - } - - icq->ioc = ioc; - icq->q = q; - INIT_LIST_HEAD(&icq->q_node); - INIT_HLIST_NODE(&icq->ioc_node); - - /* lock both q and ioc and try to link @icq */ - spin_lock_irq(q->queue_lock); - spin_lock(&ioc->lock); - - if (likely(!radix_tree_insert(&ioc->icq_tree, q->id, icq))) { - hlist_add_head(&icq->ioc_node, &ioc->icq_list); - list_add(&icq->q_node, &q->icq_list); - if (et->ops.elevator_init_icq_fn) - et->ops.elevator_init_icq_fn(icq); - } else { - kmem_cache_free(et->icq_cache, icq); - icq = ioc_lookup_icq(ioc, q); - if (!icq) - printk(KERN_ERR "cfq: icq link failed!\n"); - } - - spin_unlock(&ioc->lock); - spin_unlock_irq(q->queue_lock); - radix_tree_preload_end(); - return icq; -} - -/** - * cfq_get_cic - acquire cfq_io_cq and bump refcnt on io_context - * @cfqd: cfqd to setup cic for - * @gfp_mask: allocation mask - * - * Return cfq_io_cq associating @cfqd and %current->io_context and - * bump refcnt on io_context. If ioc or cic doesn't exist, they're created - * using @gfp_mask. - * - * Must be called under queue_lock which may be released and re-acquired. - * This function also may sleep depending on @gfp_mask. - */ -static struct cfq_io_cq *cfq_get_cic(struct cfq_data *cfqd, gfp_t gfp_mask) -{ - struct request_queue *q = cfqd->queue; - struct cfq_io_cq *cic = NULL; - struct io_context *ioc; - - lockdep_assert_held(q->queue_lock); - - while (true) { - /* fast path */ - ioc = current->io_context; - if (likely(ioc)) { - cic = cfq_cic_lookup(cfqd, ioc); - if (likely(cic)) - break; - } - - /* slow path - unlock, create missing ones and retry */ - spin_unlock_irq(q->queue_lock); - cic = icq_to_cic(ioc_create_icq(q, gfp_mask)); - spin_lock_irq(q->queue_lock); - if (!cic) - return NULL; - } - - /* bump @ioc's refcnt and handle changed notifications */ - get_io_context(ioc); - - if (unlikely(cic->icq.changed)) { - if (test_and_clear_bit(ICQ_IOPRIO_CHANGED, &cic->icq.changed)) - changed_ioprio(cic); -#ifdef CONFIG_CFQ_GROUP_IOSCHED - if (test_and_clear_bit(ICQ_CGROUP_CHANGED, &cic->icq.changed)) - changed_cgroup(cic); -#endif - } - - return cic; -} - static void __cfq_update_io_thinktime(struct cfq_ttime *ttime, unsigned long slice_idle) { @@ -3524,8 +3413,6 @@ static void cfq_put_request(struct request *rq) BUG_ON(!cfqq->allocated[rw]); cfqq->allocated[rw]--; - put_io_context(RQ_CIC(rq)->icq.ioc, cfqq->cfqd->queue); - /* Put down rq reference on cfqg */ cfq_put_cfqg(RQ_CFQG(rq)); rq->elv.priv[0] = NULL; @@ -3574,7 +3461,7 @@ static int cfq_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask) { struct cfq_data *cfqd = q->elevator->elevator_data; - struct cfq_io_cq *cic; + struct cfq_io_cq *cic = icq_to_cic(rq->elv.icq); const int rw = rq_data_dir(rq); const bool is_sync = rq_is_sync(rq); struct cfq_queue *cfqq; @@ -3582,9 +3469,16 @@ cfq_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask) might_sleep_if(gfp_mask & __GFP_WAIT); spin_lock_irq(q->queue_lock); - cic = cfq_get_cic(cfqd, gfp_mask); - if (!cic) - goto queue_fail; + + /* handle changed notifications */ + if (unlikely(cic->icq.changed)) { + if (test_and_clear_bit(ICQ_IOPRIO_CHANGED, &cic->icq.changed)) + changed_ioprio(cic); +#ifdef CONFIG_CFQ_GROUP_IOSCHED + if (test_and_clear_bit(ICQ_CGROUP_CHANGED, &cic->icq.changed)) + changed_cgroup(cic); +#endif + } new_queue: cfqq = cic_to_cfqq(cic, is_sync); @@ -3615,17 +3509,10 @@ new_queue: cfqq->allocated[rw]++; cfqq->ref++; - rq->elv.icq = &cic->icq; rq->elv.priv[0] = cfqq; rq->elv.priv[1] = cfq_ref_get_cfqg(cfqq->cfqg); spin_unlock_irq(q->queue_lock); return 0; - -queue_fail: - cfq_schedule_dispatch(cfqd); - spin_unlock_irq(q->queue_lock); - cfq_log(cfqd, "set_request fail"); - return 1; } static void cfq_kick_queue(struct work_struct *work) diff --git a/include/linux/elevator.h b/include/linux/elevator.h index c8f1e67a8ebe..c24f3d7fbf1e 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -60,8 +60,8 @@ struct elevator_ops elevator_request_list_fn *elevator_former_req_fn; elevator_request_list_fn *elevator_latter_req_fn; - elevator_init_icq_fn *elevator_init_icq_fn; - elevator_exit_icq_fn *elevator_exit_icq_fn; + elevator_init_icq_fn *elevator_init_icq_fn; /* see iocontext.h */ + elevator_exit_icq_fn *elevator_exit_icq_fn; /* ditto */ elevator_set_req_fn *elevator_set_req_fn; elevator_put_req_fn *elevator_put_req_fn; @@ -90,8 +90,8 @@ struct elevator_type /* fields provided by elevator implementation */ struct elevator_ops ops; - size_t icq_size; - size_t icq_align; + size_t icq_size; /* see iocontext.h */ + size_t icq_align; /* ditto */ struct elv_fs_entry *elevator_attrs; char elevator_name[ELV_NAME_MAX]; struct module *elevator_owner; diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h index ac390a34c0e7..7e1371c4bccf 100644 --- a/include/linux/iocontext.h +++ b/include/linux/iocontext.h @@ -10,6 +10,65 @@ enum { ICQ_CGROUP_CHANGED, }; +/* + * An io_cq (icq) is association between an io_context (ioc) and a + * request_queue (q). This is used by elevators which need to track + * information per ioc - q pair. + * + * Elevator can request use of icq by setting elevator_type->icq_size and + * ->icq_align. Both size and align must be larger than that of struct + * io_cq and elevator can use the tail area for private information. The + * recommended way to do this is defining a struct which contains io_cq as + * the first member followed by private members and using its size and + * align. For example, + * + * struct snail_io_cq { + * struct io_cq icq; + * int poke_snail; + * int feed_snail; + * }; + * + * struct elevator_type snail_elv_type { + * .ops = { ... }, + * .icq_size = sizeof(struct snail_io_cq), + * .icq_align = __alignof__(struct snail_io_cq), + * ... + * }; + * + * If icq_size is set, block core will manage icq's. All requests will + * have its ->elv.icq field set before elevator_ops->elevator_set_req_fn() + * is called and be holding a reference to the associated io_context. + * + * Whenever a new icq is created, elevator_ops->elevator_init_icq_fn() is + * called and, on destruction, ->elevator_exit_icq_fn(). Both functions + * are called with both the associated io_context and queue locks held. + * + * Elevator is allowed to lookup icq using ioc_lookup_icq() while holding + * queue lock but the returned icq is valid only until the queue lock is + * released. Elevators can not and should not try to create or destroy + * icq's. + * + * As icq's are linked from both ioc and q, the locking rules are a bit + * complex. + * + * - ioc lock nests inside q lock. + * + * - ioc->icq_list and icq->ioc_node are protected by ioc lock. + * q->icq_list and icq->q_node by q lock. + * + * - ioc->icq_tree and ioc->icq_hint are protected by ioc lock, while icq + * itself is protected by q lock. However, both the indexes and icq + * itself are also RCU managed and lookup can be performed holding only + * the q lock. + * + * - icq's are not reference counted. They are destroyed when either the + * ioc or q goes away. Each request with icq set holds an extra + * reference to ioc to ensure it stays until the request is completed. + * + * - Linking and unlinking icq's are performed while holding both ioc and q + * locks. Due to the lock ordering, q exit is simple but ioc exit + * requires reverse-order double lock dance. + */ struct io_cq { struct request_queue *q; struct io_context *ioc; -- cgit v1.2.3