diff options
| author | Linus Torvalds <torvalds@ppc970.osdl.org> | 2004-10-18 20:50:22 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2004-10-18 20:50:22 -0700 |
| commit | 098fc560ef2bbd1bde80845c898fa95db616eb6c (patch) | |
| tree | ca722c6fdbdffe9b7cfd31d61e8f4aae906a319c /drivers/block | |
| parent | bffe01870598b7a0a77073e25ee94e026bc98e6b (diff) | |
| parent | 2a136606fe21b603a0ce484fc578f862f8e8384d (diff) | |
Trivial Makefile merge
Diffstat (limited to 'drivers/block')
| -rw-r--r-- | drivers/block/Kconfig | 33 | ||||
| -rw-r--r-- | drivers/block/Kconfig.iosched | 8 | ||||
| -rw-r--r-- | drivers/block/Makefile | 1 | ||||
| -rw-r--r-- | drivers/block/as-iosched.c | 122 | ||||
| -rw-r--r-- | drivers/block/cfq-iosched.c | 1574 | ||||
| -rw-r--r-- | drivers/block/cpqarray.c | 14 | ||||
| -rw-r--r-- | drivers/block/deadline-iosched.c | 136 | ||||
| -rw-r--r-- | drivers/block/elevator.c | 318 | ||||
| -rw-r--r-- | drivers/block/ll_rw_blk.c | 253 | ||||
| -rw-r--r-- | drivers/block/noop-iosched.c | 33 | ||||
| -rw-r--r-- | drivers/block/pktcdvd.c | 2679 | ||||
| -rw-r--r-- | drivers/block/ub.c | 165 |
12 files changed, 4735 insertions, 601 deletions
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index a1d50242b8cd..6a43c807497d 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -356,6 +356,39 @@ config LBD your machine, or if you want to have a raid or loopback device bigger than 2TB. Otherwise say N. +config CDROM_PKTCDVD + tristate "Packet writing on CD/DVD media" + help + If you have a CDROM drive that supports packet writing, say Y to + include preliminary support. It should work with any MMC/Mt Fuji + compliant ATAPI or SCSI drive, which is just about any newer CD + writer. + + Currently only writing to CD-RW, DVD-RW and DVD+RW discs is possible. + DVD-RW disks must be in restricted overwrite mode. + + To compile this driver as a module, choose M here: the + module will be called pktcdvd. + +config CDROM_PKTCDVD_BUFFERS + int "Free buffers for data gathering" + depends on CDROM_PKTCDVD + default "8" + help + This controls the maximum number of active concurrent packets. More + concurrent packets can increase write performance, but also require + more memory. Each concurrent packet will require approximately 64Kb + of non-swappable kernel memory, memory which will be allocated at + pktsetup time. + +config CDROM_PKTCDVD_WCACHE + bool "Enable write caching" + depends on CDROM_PKTCDVD + help + If enabled, write caching will be set for the CD-R/W device. For now + this option is dangerous unless the CD-RW media is known good, as we + don't do deferred write error handling yet. + source "drivers/s390/block/Kconfig" endmenu diff --git a/drivers/block/Kconfig.iosched b/drivers/block/Kconfig.iosched index d938c5fd130b..e0ba6c93717e 100644 --- a/drivers/block/Kconfig.iosched +++ b/drivers/block/Kconfig.iosched @@ -1,5 +1,5 @@ config IOSCHED_NOOP - bool "No-op I/O scheduler" if EMBEDDED + bool default y ---help--- The no-op I/O scheduler is a minimal scheduler that does basic merging @@ -9,7 +9,7 @@ config IOSCHED_NOOP the kernel. config IOSCHED_AS - bool "Anticipatory I/O scheduler" if EMBEDDED + tristate "Anticipatory I/O scheduler" default y ---help--- The anticipatory I/O scheduler is the default disk scheduler. It is @@ -18,7 +18,7 @@ config IOSCHED_AS slower in some cases especially some database loads. config IOSCHED_DEADLINE - bool "Deadline I/O scheduler" if EMBEDDED + tristate "Deadline I/O scheduler" default y ---help--- The deadline I/O scheduler is simple and compact, and is often as @@ -28,7 +28,7 @@ config IOSCHED_DEADLINE anticipatory I/O scheduler and so is a good choice. config IOSCHED_CFQ - bool "CFQ I/O scheduler" if EMBEDDED + tristate "CFQ I/O scheduler" default y ---help--- The CFQ I/O scheduler tries to distribute bandwidth equally diff --git a/drivers/block/Makefile b/drivers/block/Makefile index c8fbbf14ce94..1cf09a1c065b 100644 --- a/drivers/block/Makefile +++ b/drivers/block/Makefile @@ -35,6 +35,7 @@ obj-$(CONFIG_BLK_DEV_XD) += xd.o obj-$(CONFIG_BLK_CPQ_DA) += cpqarray.o obj-$(CONFIG_BLK_CPQ_CISS_DA) += cciss.o obj-$(CONFIG_BLK_DEV_DAC960) += DAC960.o +obj-$(CONFIG_CDROM_PKTCDVD) += pktcdvd.o obj-$(CONFIG_BLK_DEV_UMEM) += umem.o obj-$(CONFIG_BLK_DEV_NBD) += nbd.o diff --git a/drivers/block/as-iosched.c b/drivers/block/as-iosched.c index 0ef6a665d93e..0aa3ee8c309b 100644 --- a/drivers/block/as-iosched.c +++ b/drivers/block/as-iosched.c @@ -614,7 +614,7 @@ static void as_antic_stop(struct as_data *ad) static void as_antic_timeout(unsigned long data) { struct request_queue *q = (struct request_queue *)data; - struct as_data *ad = q->elevator.elevator_data; + struct as_data *ad = q->elevator->elevator_data; unsigned long flags; spin_lock_irqsave(q->queue_lock, flags); @@ -945,7 +945,7 @@ static void update_write_batch(struct as_data *ad) */ static void as_completed_request(request_queue_t *q, struct request *rq) { - struct as_data *ad = q->elevator.elevator_data; + struct as_data *ad = q->elevator->elevator_data; struct as_rq *arq = RQ_DATA(rq); WARN_ON(!list_empty(&rq->queuelist)); @@ -1030,7 +1030,7 @@ static void as_remove_queued_request(request_queue_t *q, struct request *rq) { struct as_rq *arq = RQ_DATA(rq); const int data_dir = arq->is_sync; - struct as_data *ad = q->elevator.elevator_data; + struct as_data *ad = q->elevator->elevator_data; WARN_ON(arq->state != AS_RQ_QUEUED); @@ -1361,7 +1361,7 @@ fifo_expired: static struct request *as_next_request(request_queue_t *q) { - struct as_data *ad = q->elevator.elevator_data; + struct as_data *ad = q->elevator->elevator_data; struct request *rq = NULL; /* @@ -1469,7 +1469,7 @@ static void as_add_request(struct as_data *ad, struct as_rq *arq) */ static void as_requeue_request(request_queue_t *q, struct request *rq) { - struct as_data *ad = q->elevator.elevator_data; + struct as_data *ad = q->elevator->elevator_data; struct as_rq *arq = RQ_DATA(rq); if (arq) { @@ -1509,7 +1509,7 @@ static void as_account_queued_request(struct as_data *ad, struct request *rq) static void as_insert_request(request_queue_t *q, struct request *rq, int where) { - struct as_data *ad = q->elevator.elevator_data; + struct as_data *ad = q->elevator->elevator_data; struct as_rq *arq = RQ_DATA(rq); if (arq) { @@ -1562,7 +1562,7 @@ as_insert_request(request_queue_t *q, struct request *rq, int where) */ static int as_queue_empty(request_queue_t *q) { - struct as_data *ad = q->elevator.elevator_data; + struct as_data *ad = q->elevator->elevator_data; if (!list_empty(&ad->fifo_list[REQ_ASYNC]) || !list_empty(&ad->fifo_list[REQ_SYNC]) @@ -1601,7 +1601,7 @@ as_latter_request(request_queue_t *q, struct request *rq) static int as_merge(request_queue_t *q, struct request **req, struct bio *bio) { - struct as_data *ad = q->elevator.elevator_data; + struct as_data *ad = q->elevator->elevator_data; sector_t rb_key = bio->bi_sector + bio_sectors(bio); struct request *__rq; int ret; @@ -1656,7 +1656,7 @@ out_insert: static void as_merged_request(request_queue_t *q, struct request *req) { - struct as_data *ad = q->elevator.elevator_data; + struct as_data *ad = q->elevator->elevator_data; struct as_rq *arq = RQ_DATA(req); /* @@ -1701,7 +1701,7 @@ static void as_merged_requests(request_queue_t *q, struct request *req, struct request *next) { - struct as_data *ad = q->elevator.elevator_data; + struct as_data *ad = q->elevator->elevator_data; struct as_rq *arq = RQ_DATA(req); struct as_rq *anext = RQ_DATA(next); @@ -1788,7 +1788,7 @@ static void as_work_handler(void *data) static void as_put_request(request_queue_t *q, struct request *rq) { - struct as_data *ad = q->elevator.elevator_data; + struct as_data *ad = q->elevator->elevator_data; struct as_rq *arq = RQ_DATA(rq); if (!arq) { @@ -1807,7 +1807,7 @@ static void as_put_request(request_queue_t *q, struct request *rq) static int as_set_request(request_queue_t *q, struct request *rq, int gfp_mask) { - struct as_data *ad = q->elevator.elevator_data; + struct as_data *ad = q->elevator->elevator_data; struct as_rq *arq = mempool_alloc(ad->arq_pool, gfp_mask); if (arq) { @@ -1828,21 +1828,21 @@ static int as_set_request(request_queue_t *q, struct request *rq, int gfp_mask) static int as_may_queue(request_queue_t *q, int rw) { - int ret = 0; - struct as_data *ad = q->elevator.elevator_data; + int ret = ELV_MQUEUE_MAY; + struct as_data *ad = q->elevator->elevator_data; struct io_context *ioc; if (ad->antic_status == ANTIC_WAIT_REQ || ad->antic_status == ANTIC_WAIT_NEXT) { ioc = as_get_io_context(); if (ad->io_context == ioc) - ret = 1; + ret = ELV_MQUEUE_MUST; put_io_context(ioc); } return ret; } -static void as_exit(request_queue_t *q, elevator_t *e) +static void as_exit_queue(elevator_t *e) { struct as_data *ad = e->elevator_data; @@ -1862,7 +1862,7 @@ static void as_exit(request_queue_t *q, elevator_t *e) * initialize elevator private data (as_data), and alloc a arq for * each request on the free lists */ -static int as_init(request_queue_t *q, elevator_t *e) +static int as_init_queue(request_queue_t *q, elevator_t *e) { struct as_data *ad; int i; @@ -1962,10 +1962,10 @@ static ssize_t as_est_show(struct as_data *ad, char *page) return pos; } -#define SHOW_FUNCTION(__FUNC, __VAR) \ +#define SHOW_FUNCTION(__FUNC, __VAR) \ static ssize_t __FUNC(struct as_data *ad, char *page) \ -{ \ - return as_var_show(__VAR, (page)); \ +{ \ + return as_var_show(jiffies_to_msecs((__VAR)), (page)); \ } SHOW_FUNCTION(as_readexpire_show, ad->fifo_expire[REQ_SYNC]); SHOW_FUNCTION(as_writeexpire_show, ad->fifo_expire[REQ_ASYNC]); @@ -1982,6 +1982,7 @@ static ssize_t __FUNC(struct as_data *ad, const char *page, size_t count) \ *(__PTR) = (MIN); \ else if (*(__PTR) > (MAX)) \ *(__PTR) = (MAX); \ + *(__PTR) = msecs_to_jiffies(*(__PTR)); \ return ret; \ } STORE_FUNCTION(as_readexpire_store, &ad->fifo_expire[REQ_SYNC], 0, INT_MAX); @@ -2070,39 +2071,64 @@ static struct kobj_type as_ktype = { .default_attrs = default_attrs, }; -static int __init as_slab_setup(void) +static struct elevator_type iosched_as = { + .ops = { + .elevator_merge_fn = as_merge, + .elevator_merged_fn = as_merged_request, + .elevator_merge_req_fn = as_merged_requests, + .elevator_next_req_fn = as_next_request, + .elevator_add_req_fn = as_insert_request, + .elevator_remove_req_fn = as_remove_request, + .elevator_requeue_req_fn = as_requeue_request, + .elevator_queue_empty_fn = as_queue_empty, + .elevator_completed_req_fn = as_completed_request, + .elevator_former_req_fn = as_former_request, + .elevator_latter_req_fn = as_latter_request, + .elevator_set_req_fn = as_set_request, + .elevator_put_req_fn = as_put_request, + .elevator_may_queue_fn = as_may_queue, + .elevator_init_fn = as_init_queue, + .elevator_exit_fn = as_exit_queue, + }, + + .elevator_ktype = &as_ktype, + .elevator_name = "anticipatory", + .elevator_owner = THIS_MODULE, +}; + +int as_init(void) { + int ret; + arq_pool = kmem_cache_create("as_arq", sizeof(struct as_rq), 0, 0, NULL, NULL); - if (!arq_pool) - panic("as: can't init slab pool\n"); + return -ENOMEM; - return 0; + ret = elv_register(&iosched_as); + if (!ret) { + /* + * don't allow AS to get unregistered, since we would have + * to browse all tasks in the system and release their + * as_io_context first + */ + __module_get(THIS_MODULE); + return 0; + } + + kmem_cache_destroy(arq_pool); + return ret; } -subsys_initcall(as_slab_setup); - -elevator_t iosched_as = { - .elevator_merge_fn = as_merge, - .elevator_merged_fn = as_merged_request, - .elevator_merge_req_fn = as_merged_requests, - .elevator_next_req_fn = as_next_request, - .elevator_add_req_fn = as_insert_request, - .elevator_remove_req_fn = as_remove_request, - .elevator_requeue_req_fn = as_requeue_request, - .elevator_queue_empty_fn = as_queue_empty, - .elevator_completed_req_fn = as_completed_request, - .elevator_former_req_fn = as_former_request, - .elevator_latter_req_fn = as_latter_request, - .elevator_set_req_fn = as_set_request, - .elevator_put_req_fn = as_put_request, - .elevator_may_queue_fn = as_may_queue, - .elevator_init_fn = as_init, - .elevator_exit_fn = as_exit, - - .elevator_ktype = &as_ktype, - .elevator_name = "anticipatory", -}; +void as_exit(void) +{ + kmem_cache_destroy(arq_pool); + elv_unregister(&iosched_as); +} + +module_init(as_init); +module_exit(as_exit); -EXPORT_SYMBOL(iosched_as); +MODULE_AUTHOR("Nick Piggin"); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("anticipatory IO scheduler"); diff --git a/drivers/block/cfq-iosched.c b/drivers/block/cfq-iosched.c index 068f4eae0b5c..cf7fc7609e67 100644 --- a/drivers/block/cfq-iosched.c +++ b/drivers/block/cfq-iosched.c @@ -22,96 +22,216 @@ #include <linux/rbtree.h> #include <linux/mempool.h> +static unsigned long max_elapsed_crq; +static unsigned long max_elapsed_dispatch; + /* * tunables */ -static int cfq_quantum = 4; -static int cfq_queued = 8; +static int cfq_quantum = 4; /* max queue in one round of service */ +static int cfq_queued = 8; /* minimum rq allocate limit per-queue*/ +static int cfq_service = HZ; /* period over which service is avg */ +static int cfq_fifo_expire_r = HZ / 2; /* fifo timeout for sync requests */ +static int cfq_fifo_expire_w = 5 * HZ; /* fifo timeout for async requests */ +static int cfq_fifo_rate = HZ / 8; /* fifo expiry rate */ +static int cfq_back_max = 16 * 1024; /* maximum backwards seek, in KiB */ +static int cfq_back_penalty = 2; /* penalty of a backwards seek */ +/* + * for the hash of cfqq inside the cfqd + */ #define CFQ_QHASH_SHIFT 6 #define CFQ_QHASH_ENTRIES (1 << CFQ_QHASH_SHIFT) -#define list_entry_qhash(entry) list_entry((entry), struct cfq_queue, cfq_hash) +#define list_entry_qhash(entry) hlist_entry((entry), struct cfq_queue, cfq_hash) -#define CFQ_MHASH_SHIFT 8 +/* + * for the hash of crq inside the cfqq + */ +#define CFQ_MHASH_SHIFT 6 #define CFQ_MHASH_BLOCK(sec) ((sec) >> 3) #define CFQ_MHASH_ENTRIES (1 << CFQ_MHASH_SHIFT) -#define CFQ_MHASH_FN(sec) (hash_long(CFQ_MHASH_BLOCK((sec)),CFQ_MHASH_SHIFT)) -#define ON_MHASH(crq) !list_empty(&(crq)->hash) +#define CFQ_MHASH_FN(sec) hash_long(CFQ_MHASH_BLOCK(sec), CFQ_MHASH_SHIFT) #define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors) -#define list_entry_hash(ptr) list_entry((ptr), struct cfq_rq, hash) +#define list_entry_hash(ptr) hlist_entry((ptr), struct cfq_rq, hash) #define list_entry_cfqq(ptr) list_entry((ptr), struct cfq_queue, cfq_list) -#define RQ_DATA(rq) ((struct cfq_rq *) (rq)->elevator_private) +#define RQ_DATA(rq) (rq)->elevator_private + +/* + * rb-tree defines + */ +#define RB_NONE (2) +#define RB_EMPTY(node) ((node)->rb_node == NULL) +#define RB_CLEAR_COLOR(node) (node)->rb_color = RB_NONE +#define RB_CLEAR(node) do { \ + (node)->rb_parent = NULL; \ + RB_CLEAR_COLOR((node)); \ + (node)->rb_right = NULL; \ + (node)->rb_left = NULL; \ +} while (0) +#define RB_CLEAR_ROOT(root) ((root)->rb_node = NULL) +#define ON_RB(node) ((node)->rb_color != RB_NONE) +#define rb_entry_crq(node) rb_entry((node), struct cfq_rq, rb_node) +#define rq_rb_key(rq) (rq)->sector + +/* + * threshold for switching off non-tag accounting + */ +#define CFQ_MAX_TAG (4) + +/* + * sort key types and names + */ +enum { + CFQ_KEY_PGID, + CFQ_KEY_TGID, + CFQ_KEY_UID, + CFQ_KEY_GID, + CFQ_KEY_LAST, +}; + +static char *cfq_key_types[] = { "pgid", "tgid", "uid", "gid", NULL }; + +/* + * spare queue + */ +#define CFQ_KEY_SPARE (~0UL) static kmem_cache_t *crq_pool; static kmem_cache_t *cfq_pool; -static mempool_t *cfq_mpool; +static kmem_cache_t *cfq_ioc_pool; struct cfq_data { struct list_head rr_list; - struct list_head *dispatch; - struct list_head *cfq_hash; + struct list_head empty_list; - struct list_head *crq_hash; + struct hlist_head *cfq_hash; + struct hlist_head *crq_hash; + /* queues on rr_list (ie they have pending requests */ unsigned int busy_queues; + unsigned int max_queued; + atomic_t ref; + + int key_type; + mempool_t *crq_pool; request_queue_t *queue; + sector_t last_sector; + + int rq_in_driver; + /* - * tunables + * tunables, see top of file */ unsigned int cfq_quantum; unsigned int cfq_queued; + unsigned int cfq_fifo_expire_r; + unsigned int cfq_fifo_expire_w; + unsigned int cfq_fifo_batch_expire; + unsigned int cfq_back_penalty; + unsigned int cfq_back_max; + unsigned int find_best_crq; + + unsigned int cfq_tagged; }; struct cfq_queue { - struct list_head cfq_hash; + /* reference count */ + atomic_t ref; + /* parent cfq_data */ + struct cfq_data *cfqd; + /* hash of mergeable requests */ + struct hlist_node cfq_hash; + /* hash key */ + unsigned long key; + /* whether queue is on rr (or empty) list */ + int on_rr; + /* on either rr or empty list of cfqd */ struct list_head cfq_list; + /* sorted list of pending requests */ struct rb_root sort_list; - int pid; + /* if fifo isn't expired, next request to serve */ + struct cfq_rq *next_crq; + /* requests queued in sort_list */ int queued[2]; -#if 0 - /* - * with a simple addition like this, we can do io priorities. almost. - * does need a split request free list, too. - */ - int io_prio -#endif + /* currently allocated requests */ + int allocated[2]; + /* fifo list of requests in sort_list */ + struct list_head fifo[2]; + /* last time fifo expired */ + unsigned long last_fifo_expire; + + int key_type; + + unsigned long service_start; + unsigned long service_used; + + unsigned int max_rate; + + /* number of requests that have been handed to the driver */ + int in_flight; + /* number of currently allocated requests */ + int alloc_limit[2]; }; struct cfq_rq { struct rb_node rb_node; sector_t rb_key; - struct request *request; + struct hlist_node hash; struct cfq_queue *cfq_queue; + struct cfq_io_context *io_context; + + unsigned long service_start; + unsigned long queue_start; - struct list_head hash; + unsigned int in_flight : 1; + unsigned int accounted : 1; + unsigned int is_sync : 1; + unsigned int is_write : 1; }; -static void cfq_put_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq); -static struct cfq_queue *cfq_find_cfq_hash(struct cfq_data *cfqd, int pid); -static void cfq_dispatch_sort(struct cfq_data *cfqd, struct cfq_queue *cfqq, - struct cfq_rq *crq); +static struct cfq_queue *cfq_find_cfq_hash(struct cfq_data *, unsigned long); +static void cfq_dispatch_sort(request_queue_t *, struct cfq_rq *); +static void cfq_update_next_crq(struct cfq_rq *); +static void cfq_put_cfqd(struct cfq_data *cfqd); /* - * lots of deadline iosched dupes, can be abstracted later... + * what the fairness is based on (ie how processes are grouped and + * differentiated) */ -static inline void __cfq_del_crq_hash(struct cfq_rq *crq) +static inline unsigned long +cfq_hash_key(struct cfq_data *cfqd, struct task_struct *tsk) { - list_del_init(&crq->hash); + /* + * optimize this so that ->key_type is the offset into the struct + */ + switch (cfqd->key_type) { + case CFQ_KEY_PGID: + return process_group(tsk); + default: + case CFQ_KEY_TGID: + return tsk->tgid; + case CFQ_KEY_UID: + return tsk->uid; + case CFQ_KEY_GID: + return tsk->gid; + } } +/* + * lots of deadline iosched dupes, can be abstracted later... + */ static inline void cfq_del_crq_hash(struct cfq_rq *crq) { - if (ON_MHASH(crq)) - __cfq_del_crq_hash(crq); + hlist_del_init(&crq->hash); } static void cfq_remove_merge_hints(request_queue_t *q, struct cfq_rq *crq) @@ -120,32 +240,32 @@ static void cfq_remove_merge_hints(request_queue_t *q, struct cfq_rq *crq) if (q->last_merge == crq->request) q->last_merge = NULL; + + cfq_update_next_crq(crq); } static inline void cfq_add_crq_hash(struct cfq_data *cfqd, struct cfq_rq *crq) { - struct request *rq = crq->request; + const int hash_idx = CFQ_MHASH_FN(rq_hash_key(crq->request)); - BUG_ON(ON_MHASH(crq)); + BUG_ON(!hlist_unhashed(&crq->hash)); - list_add(&crq->hash, &cfqd->crq_hash[CFQ_MHASH_FN(rq_hash_key(rq))]); + hlist_add_head(&crq->hash, &cfqd->crq_hash[hash_idx]); } static struct request *cfq_find_rq_hash(struct cfq_data *cfqd, sector_t offset) { - struct list_head *hash_list = &cfqd->crq_hash[CFQ_MHASH_FN(offset)]; - struct list_head *entry, *next = hash_list->next; + struct hlist_head *hash_list = &cfqd->crq_hash[CFQ_MHASH_FN(offset)]; + struct hlist_node *entry, *next; - while ((entry = next) != hash_list) { + hlist_for_each_safe(entry, next, hash_list) { struct cfq_rq *crq = list_entry_hash(entry); struct request *__rq = crq->request; - next = entry->next; - - BUG_ON(!ON_MHASH(crq)); + BUG_ON(hlist_unhashed(&crq->hash)); if (!rq_mergeable(__rq)) { - __cfq_del_crq_hash(crq); + cfq_del_crq_hash(crq); continue; } @@ -157,29 +277,257 @@ static struct request *cfq_find_rq_hash(struct cfq_data *cfqd, sector_t offset) } /* - * rb tree support functions + * Lifted from AS - choose which of crq1 and crq2 that is best served now. + * We choose the request that is closest to the head right now. Distance + * behind the head are penalized and only allowed to a certain extent. */ -#define RB_NONE (2) -#define RB_EMPTY(node) ((node)->rb_node == NULL) -#define RB_CLEAR(node) ((node)->rb_color = RB_NONE) -#define RB_CLEAR_ROOT(root) ((root)->rb_node = NULL) -#define ON_RB(node) ((node)->rb_color != RB_NONE) -#define rb_entry_crq(node) rb_entry((node), struct cfq_rq, rb_node) -#define rq_rb_key(rq) (rq)->sector +static struct cfq_rq * +cfq_choose_req(struct cfq_data *cfqd, struct cfq_rq *crq1, struct cfq_rq *crq2) +{ + sector_t last, s1, s2, d1 = 0, d2 = 0; + int r1_wrap = 0, r2_wrap = 0; /* requests are behind the disk head */ + unsigned long back_max; + + if (crq1 == NULL || crq1 == crq2) + return crq2; + if (crq2 == NULL) + return crq1; -static inline void cfq_del_crq_rb(struct cfq_queue *cfqq, struct cfq_rq *crq) + s1 = crq1->request->sector; + s2 = crq2->request->sector; + + last = cfqd->last_sector; + +#if 0 + if (!list_empty(&cfqd->queue->queue_head)) { + struct list_head *entry = &cfqd->queue->queue_head; + unsigned long distance = ~0UL; + struct request *rq; + + while ((entry = entry->prev) != &cfqd->queue->queue_head) { + rq = list_entry_rq(entry); + + if (blk_barrier_rq(rq)) + break; + + if (distance < abs(s1 - rq->sector + rq->nr_sectors)) { + distance = abs(s1 - rq->sector +rq->nr_sectors); + last = rq->sector + rq->nr_sectors; + } + if (distance < abs(s2 - rq->sector + rq->nr_sectors)) { + distance = abs(s2 - rq->sector +rq->nr_sectors); + last = rq->sector + rq->nr_sectors; + } + } + } +#endif + + /* + * by definition, 1KiB is 2 sectors + */ + back_max = cfqd->cfq_back_max * 2; + + /* + * Strict one way elevator _except_ in the case where we allow + * short backward seeks which are biased as twice the cost of a + * similar forward seek. + */ + if (s1 >= last) + d1 = s1 - last; + else if (s1 + back_max >= last) + d1 = (last - s1) * cfqd->cfq_back_penalty; + else + r1_wrap = 1; + + if (s2 >= last) + d2 = s2 - last; + else if (s2 + back_max >= last) + d2 = (last - s2) * cfqd->cfq_back_penalty; + else + r2_wrap = 1; + + /* Found required data */ + if (!r1_wrap && r2_wrap) + return crq1; + else if (!r2_wrap && r1_wrap) + return crq2; + else if (r1_wrap && r2_wrap) { + /* both behind the head */ + if (s1 <= s2) + return crq1; + else + return crq2; + } + + /* Both requests in front of the head */ + if (d1 < d2) + return crq1; + else if (d2 < d1) + return crq2; + else { + if (s1 >= s2) + return crq1; + else + return crq2; + } +} + +/* + * would be nice to take fifo expire time into account as well + */ +static struct cfq_rq * +cfq_find_next_crq(struct cfq_data *cfqd, struct cfq_queue *cfqq, + struct cfq_rq *last) +{ + struct cfq_rq *crq_next = NULL, *crq_prev = NULL; + struct rb_node *rbnext, *rbprev; + + if (!ON_RB(&last->rb_node)) + return NULL; + + if ((rbnext = rb_next(&last->rb_node)) == NULL) + rbnext = rb_first(&cfqq->sort_list); + + rbprev = rb_prev(&last->rb_node); + + if (rbprev) + crq_prev = rb_entry_crq(rbprev); + if (rbnext) + crq_next = rb_entry_crq(rbnext); + + return cfq_choose_req(cfqd, crq_next, crq_prev); +} + +static void cfq_update_next_crq(struct cfq_rq *crq) { + struct cfq_queue *cfqq = crq->cfq_queue; + + if (cfqq->next_crq == crq) + cfqq->next_crq = cfq_find_next_crq(cfqq->cfqd, cfqq, crq); +} + +static int cfq_check_sort_rr_list(struct cfq_queue *cfqq) +{ + struct list_head *head = &cfqq->cfqd->rr_list; + struct list_head *next, *prev; + + /* + * list might still be ordered + */ + next = cfqq->cfq_list.next; + if (next != head) { + struct cfq_queue *cnext = list_entry_cfqq(next); + + if (cfqq->service_used > cnext->service_used) + return 1; + } + + prev = cfqq->cfq_list.prev; + if (prev != head) { + struct cfq_queue *cprev = list_entry_cfqq(prev); + + if (cfqq->service_used < cprev->service_used) + return 1; + } + + return 0; +} + +static void cfq_sort_rr_list(struct cfq_queue *cfqq, int new_queue) +{ + struct list_head *entry = &cfqq->cfqd->rr_list; + + if (!cfqq->on_rr) + return; + if (!new_queue && !cfq_check_sort_rr_list(cfqq)) + return; + + list_del(&cfqq->cfq_list); + + /* + * sort by our mean service_used, sub-sort by in-flight requests + */ + while ((entry = entry->prev) != &cfqq->cfqd->rr_list) { + struct cfq_queue *__cfqq = list_entry_cfqq(entry); + + if (cfqq->service_used > __cfqq->service_used) + break; + else if (cfqq->service_used == __cfqq->service_used) { + struct list_head *prv; + + while ((prv = entry->prev) != &cfqq->cfqd->rr_list) { + __cfqq = list_entry_cfqq(prv); + + WARN_ON(__cfqq->service_used > cfqq->service_used); + if (cfqq->service_used != __cfqq->service_used) + break; + if (cfqq->in_flight > __cfqq->in_flight) + break; + + entry = prv; + } + } + } + + list_add(&cfqq->cfq_list, entry); +} + +/* + * add to busy list of queues for service, trying to be fair in ordering + * the pending list according to requests serviced + */ +static inline void +cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) +{ + /* + * it's currently on the empty list + */ + cfqq->on_rr = 1; + cfqd->busy_queues++; + + if (time_after(jiffies, cfqq->service_start + cfq_service)) + cfqq->service_used >>= 3; + + cfq_sort_rr_list(cfqq, 1); +} + +static inline void +cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) +{ + list_move(&cfqq->cfq_list, &cfqd->empty_list); + cfqq->on_rr = 0; + + BUG_ON(!cfqd->busy_queues); + cfqd->busy_queues--; +} + +/* + * rb tree support functions + */ +static inline void cfq_del_crq_rb(struct cfq_rq *crq) +{ + struct cfq_queue *cfqq = crq->cfq_queue; + if (ON_RB(&crq->rb_node)) { - cfqq->queued[rq_data_dir(crq->request)]--; + struct cfq_data *cfqd = cfqq->cfqd; + + BUG_ON(!cfqq->queued[crq->is_sync]); + + cfq_update_next_crq(crq); + + cfqq->queued[crq->is_sync]--; rb_erase(&crq->rb_node, &cfqq->sort_list); - crq->cfq_queue = NULL; + RB_CLEAR_COLOR(&crq->rb_node); + + if (RB_EMPTY(&cfqq->sort_list) && cfqq->on_rr) + cfq_del_cfqq_rr(cfqd, cfqq); } } static struct cfq_rq * -__cfq_add_crq_rb(struct cfq_queue *cfqq, struct cfq_rq *crq) +__cfq_add_crq_rb(struct cfq_rq *crq) { - struct rb_node **p = &cfqq->sort_list.rb_node; + struct rb_node **p = &crq->cfq_queue->sort_list.rb_node; struct rb_node *parent = NULL; struct cfq_rq *__crq; @@ -199,30 +547,50 @@ __cfq_add_crq_rb(struct cfq_queue *cfqq, struct cfq_rq *crq) return NULL; } -static void -cfq_add_crq_rb(struct cfq_data *cfqd, struct cfq_queue *cfqq,struct cfq_rq *crq) +static void cfq_add_crq_rb(struct cfq_rq *crq) { + struct cfq_queue *cfqq = crq->cfq_queue; + struct cfq_data *cfqd = cfqq->cfqd; struct request *rq = crq->request; struct cfq_rq *__alias; crq->rb_key = rq_rb_key(rq); - cfqq->queued[rq_data_dir(rq)]++; -retry: - __alias = __cfq_add_crq_rb(cfqq, crq); - if (!__alias) { - rb_insert_color(&crq->rb_node, &cfqq->sort_list); - crq->cfq_queue = cfqq; - return; + cfqq->queued[crq->is_sync]++; + + /* + * looks a little odd, but the first insert might return an alias. + * if that happens, put the alias on the dispatch list + */ + while ((__alias = __cfq_add_crq_rb(crq)) != NULL) + cfq_dispatch_sort(cfqd->queue, __alias); + + rb_insert_color(&crq->rb_node, &cfqq->sort_list); + + if (!cfqq->on_rr) + cfq_add_cfqq_rr(cfqd, cfqq); + + /* + * check if this request is a better next-serve candidate + */ + cfqq->next_crq = cfq_choose_req(cfqd, cfqq->next_crq, crq); +} + +static inline void +cfq_reposition_crq_rb(struct cfq_queue *cfqq, struct cfq_rq *crq) +{ + if (ON_RB(&crq->rb_node)) { + rb_erase(&crq->rb_node, &cfqq->sort_list); + cfqq->queued[crq->is_sync]--; } - cfq_dispatch_sort(cfqd, cfqq, __alias); - goto retry; + cfq_add_crq_rb(crq); } static struct request * cfq_find_rq_rb(struct cfq_data *cfqd, sector_t sector) { - struct cfq_queue *cfqq = cfq_find_cfq_hash(cfqd, current->tgid); + const unsigned long key = cfq_hash_key(cfqd, current); + struct cfq_queue *cfqq = cfq_find_cfq_hash(cfqd, key); struct rb_node *n; if (!cfqq) @@ -244,30 +612,44 @@ out: return NULL; } -static void cfq_remove_request(request_queue_t *q, struct request *rq) +/* + * make sure the service time gets corrected on reissue of this request + */ +static void cfq_requeue_request(request_queue_t *q, struct request *rq) { - struct cfq_data *cfqd = q->elevator.elevator_data; struct cfq_rq *crq = RQ_DATA(rq); if (crq) { struct cfq_queue *cfqq = crq->cfq_queue; + if (cfqq->cfqd->cfq_tagged) { + cfqq->service_used--; + cfq_sort_rr_list(cfqq, 0); + } + + crq->accounted = 0; + cfqq->cfqd->rq_in_driver--; + } + list_add(&rq->queuelist, &q->queue_head); +} + +static void cfq_remove_request(request_queue_t *q, struct request *rq) +{ + struct cfq_rq *crq = RQ_DATA(rq); + + if (crq) { cfq_remove_merge_hints(q, crq); list_del_init(&rq->queuelist); - if (cfqq) { - cfq_del_crq_rb(cfqq, crq); - - if (RB_EMPTY(&cfqq->sort_list)) - cfq_put_queue(cfqd, cfqq); - } + if (crq->cfq_queue) + cfq_del_crq_rb(crq); } } static int cfq_merge(request_queue_t *q, struct request **req, struct bio *bio) { - struct cfq_data *cfqd = q->elevator.elevator_data; + struct cfq_data *cfqd = q->elevator->elevator_data; struct request *__rq; int ret; @@ -305,7 +687,7 @@ out_insert: static void cfq_merged_request(request_queue_t *q, struct request *req) { - struct cfq_data *cfqd = q->elevator.elevator_data; + struct cfq_data *cfqd = q->elevator->elevator_data; struct cfq_rq *crq = RQ_DATA(req); cfq_del_crq_hash(crq); @@ -314,193 +696,546 @@ static void cfq_merged_request(request_queue_t *q, struct request *req) if (ON_RB(&crq->rb_node) && (rq_rb_key(req) != crq->rb_key)) { struct cfq_queue *cfqq = crq->cfq_queue; - cfq_del_crq_rb(cfqq, crq); - cfq_add_crq_rb(cfqd, cfqq, crq); + cfq_update_next_crq(crq); + cfq_reposition_crq_rb(cfqq, crq); } q->last_merge = req; } static void -cfq_merged_requests(request_queue_t *q, struct request *req, +cfq_merged_requests(request_queue_t *q, struct request *rq, struct request *next) { - cfq_merged_request(q, req); + struct cfq_rq *crq = RQ_DATA(rq); + struct cfq_rq *cnext = RQ_DATA(next); + + cfq_merged_request(q, rq); + + if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist)) { + if (time_before(cnext->queue_start, crq->queue_start)) { + list_move(&rq->queuelist, &next->queuelist); + crq->queue_start = cnext->queue_start; + } + } + + cfq_update_next_crq(cnext); cfq_remove_request(q, next); } -static void -cfq_dispatch_sort(struct cfq_data *cfqd, struct cfq_queue *cfqq, - struct cfq_rq *crq) +/* + * we dispatch cfqd->cfq_quantum requests in total from the rr_list queues, + * this function sector sorts the selected request to minimize seeks. we start + * at cfqd->last_sector, not 0. + */ +static void cfq_dispatch_sort(request_queue_t *q, struct cfq_rq *crq) { - struct list_head *head = cfqd->dispatch, *entry = head; + struct cfq_data *cfqd = q->elevator->elevator_data; + struct cfq_queue *cfqq = crq->cfq_queue; + struct list_head *head = &q->queue_head, *entry = head; struct request *__rq; + sector_t last; - cfq_del_crq_rb(cfqq, crq); - cfq_remove_merge_hints(cfqd->queue, crq); + cfq_del_crq_rb(crq); + cfq_remove_merge_hints(q, crq); + list_del(&crq->request->queuelist); - if (!list_empty(head)) { - __rq = list_entry_rq(head->next); + last = cfqd->last_sector; + while ((entry = entry->prev) != head) { + __rq = list_entry_rq(entry); - if (crq->request->sector < __rq->sector) { - entry = head->prev; - goto link; + if (blk_barrier_rq(crq->request)) + break; + if (!blk_fs_request(crq->request)) + break; + + if (crq->request->sector > __rq->sector) + break; + if (__rq->sector > last && crq->request->sector < last) { + last = crq->request->sector; + break; } } - while ((entry = entry->prev) != head) { - __rq = list_entry_rq(entry); + cfqd->last_sector = last; + crq->in_flight = 1; + cfqq->in_flight++; + list_add(&crq->request->queuelist, entry); +} - if (crq->request->sector <= __rq->sector) - break; +/* + * return expired entry, or NULL to just start from scratch in rbtree + */ +static inline struct cfq_rq *cfq_check_fifo(struct cfq_queue *cfqq) +{ + struct cfq_data *cfqd = cfqq->cfqd; + const int reads = !list_empty(&cfqq->fifo[0]); + const int writes = !list_empty(&cfqq->fifo[1]); + unsigned long now = jiffies; + struct cfq_rq *crq; + + if (time_before(now, cfqq->last_fifo_expire + cfqd->cfq_fifo_batch_expire)) + return NULL; + + crq = RQ_DATA(list_entry(cfqq->fifo[0].next, struct request, queuelist)); + if (reads && time_after(now, crq->queue_start + cfqd->cfq_fifo_expire_r)) { + cfqq->last_fifo_expire = now; + return crq; + } + + crq = RQ_DATA(list_entry(cfqq->fifo[1].next, struct request, queuelist)); + if (writes && time_after(now, crq->queue_start + cfqd->cfq_fifo_expire_w)) { + cfqq->last_fifo_expire = now; + return crq; } -link: - list_add_tail(&crq->request->queuelist, entry); + return NULL; } +/* + * dispatch a single request from given queue + */ static inline void -__cfq_dispatch_requests(request_queue_t *q, struct cfq_data *cfqd, - struct cfq_queue *cfqq) +cfq_dispatch_request(request_queue_t *q, struct cfq_data *cfqd, + struct cfq_queue *cfqq) { - struct cfq_rq *crq = rb_entry_crq(rb_first(&cfqq->sort_list)); + struct cfq_rq *crq; + + /* + * follow expired path, else get first next available + */ + if ((crq = cfq_check_fifo(cfqq)) == NULL) { + if (cfqd->find_best_crq) + crq = cfqq->next_crq; + else + crq = rb_entry_crq(rb_first(&cfqq->sort_list)); + } + + cfqd->last_sector = crq->request->sector + crq->request->nr_sectors; - cfq_dispatch_sort(cfqd, cfqq, crq); + /* + * finally, insert request into driver list + */ + cfq_dispatch_sort(q, crq); } -static int cfq_dispatch_requests(request_queue_t *q, struct cfq_data *cfqd) +static int cfq_dispatch_requests(request_queue_t *q, int max_dispatch) { + struct cfq_data *cfqd = q->elevator->elevator_data; struct cfq_queue *cfqq; struct list_head *entry, *tmp; - int ret, queued, good_queues; + int queued, busy_queues, first_round; if (list_empty(&cfqd->rr_list)) return 0; - queued = ret = 0; + queued = 0; + first_round = 1; restart: - good_queues = 0; + busy_queues = 0; list_for_each_safe(entry, tmp, &cfqd->rr_list) { - cfqq = list_entry_cfqq(cfqd->rr_list.next); + cfqq = list_entry_cfqq(entry); BUG_ON(RB_EMPTY(&cfqq->sort_list)); - __cfq_dispatch_requests(q, cfqd, cfqq); + /* + * first round of queueing, only select from queues that + * don't already have io in-flight + */ + if (first_round && cfqq->in_flight) + continue; - if (RB_EMPTY(&cfqq->sort_list)) - cfq_put_queue(cfqd, cfqq); - else - good_queues++; + cfq_dispatch_request(q, cfqd, cfqq); + + if (!RB_EMPTY(&cfqq->sort_list)) + busy_queues++; queued++; - ret = 1; } - if ((queued < cfqd->cfq_quantum) && good_queues) + if ((queued < max_dispatch) && (busy_queues || first_round)) { + first_round = 0; goto restart; + } - return ret; + return queued; +} + +static inline void cfq_account_dispatch(struct cfq_rq *crq) +{ + struct cfq_queue *cfqq = crq->cfq_queue; + struct cfq_data *cfqd = cfqq->cfqd; + unsigned long now, elapsed; + + /* + * accounted bit is necessary since some drivers will call + * elv_next_request() many times for the same request (eg ide) + */ + if (crq->accounted) + return; + + now = jiffies; + if (cfqq->service_start == ~0UL) + cfqq->service_start = now; + + /* + * on drives with tagged command queueing, command turn-around time + * doesn't necessarily reflect the time spent processing this very + * command inside the drive. so do the accounting differently there, + * by just sorting on the number of requests + */ + if (cfqd->cfq_tagged) { + if (time_after(now, cfqq->service_start + cfq_service)) { + cfqq->service_start = now; + cfqq->service_used /= 10; + } + + cfqq->service_used++; + cfq_sort_rr_list(cfqq, 0); + } + + elapsed = now - crq->queue_start; + if (elapsed > max_elapsed_dispatch) + max_elapsed_dispatch = elapsed; + + crq->accounted = 1; + crq->service_start = now; + + if (++cfqd->rq_in_driver >= CFQ_MAX_TAG && !cfqd->cfq_tagged) { + cfqq->cfqd->cfq_tagged = 1; + printk("cfq: depth %d reached, tagging now on\n", CFQ_MAX_TAG); + } +} + +static inline void +cfq_account_completion(struct cfq_queue *cfqq, struct cfq_rq *crq) +{ + struct cfq_data *cfqd = cfqq->cfqd; + + WARN_ON(!cfqd->rq_in_driver); + cfqd->rq_in_driver--; + + if (!cfqd->cfq_tagged) { + unsigned long now = jiffies; + unsigned long duration = now - crq->service_start; + + if (time_after(now, cfqq->service_start + cfq_service)) { + cfqq->service_start = now; + cfqq->service_used >>= 3; + } + + cfqq->service_used += duration; + cfq_sort_rr_list(cfqq, 0); + + if (duration > max_elapsed_crq) + max_elapsed_crq = duration; + } } static struct request *cfq_next_request(request_queue_t *q) { - struct cfq_data *cfqd = q->elevator.elevator_data; + struct cfq_data *cfqd = q->elevator->elevator_data; struct request *rq; - if (!list_empty(cfqd->dispatch)) { + if (!list_empty(&q->queue_head)) { struct cfq_rq *crq; dispatch: - rq = list_entry_rq(cfqd->dispatch->next); + rq = list_entry_rq(q->queue_head.next); - crq = RQ_DATA(rq); - if (crq) + if ((crq = RQ_DATA(rq)) != NULL) { cfq_remove_merge_hints(q, crq); + cfq_account_dispatch(crq); + } return rq; } - if (cfq_dispatch_requests(q, cfqd)) + if (cfq_dispatch_requests(q, cfqd->cfq_quantum)) goto dispatch; return NULL; } +/* + * task holds one reference to the queue, dropped when task exits. each crq + * in-flight on this queue also holds a reference, dropped when crq is freed. + * + * queue lock must be held here. + */ +static void cfq_put_queue(struct cfq_queue *cfqq) +{ + BUG_ON(!atomic_read(&cfqq->ref)); + + if (!atomic_dec_and_test(&cfqq->ref)) + return; + + BUG_ON(rb_first(&cfqq->sort_list)); + BUG_ON(cfqq->on_rr); + + cfq_put_cfqd(cfqq->cfqd); + + /* + * it's on the empty list and still hashed + */ + list_del(&cfqq->cfq_list); + hlist_del(&cfqq->cfq_hash); + kmem_cache_free(cfq_pool, cfqq); +} + static inline struct cfq_queue * -__cfq_find_cfq_hash(struct cfq_data *cfqd, int pid, const int hashval) +__cfq_find_cfq_hash(struct cfq_data *cfqd, unsigned long key, const int hashval) { - struct list_head *hash_list = &cfqd->cfq_hash[hashval]; - struct list_head *entry; + struct hlist_head *hash_list = &cfqd->cfq_hash[hashval]; + struct hlist_node *entry, *next; - list_for_each(entry, hash_list) { + hlist_for_each_safe(entry, next, hash_list) { struct cfq_queue *__cfqq = list_entry_qhash(entry); - if (__cfqq->pid == pid) + if (__cfqq->key == key) return __cfqq; } return NULL; } -static struct cfq_queue *cfq_find_cfq_hash(struct cfq_data *cfqd, int pid) +static struct cfq_queue * +cfq_find_cfq_hash(struct cfq_data *cfqd, unsigned long key) { - const int hashval = hash_long(current->tgid, CFQ_QHASH_SHIFT); + return __cfq_find_cfq_hash(cfqd, key, hash_long(key, CFQ_QHASH_SHIFT)); +} + +static inline void +cfq_rehash_cfqq(struct cfq_data *cfqd, struct cfq_queue **cfqq, + struct cfq_io_context *cic) +{ + unsigned long hashkey = cfq_hash_key(cfqd, current); + unsigned long hashval = hash_long(hashkey, CFQ_QHASH_SHIFT); + struct cfq_queue *__cfqq; + unsigned long flags; + + spin_lock_irqsave(cfqd->queue->queue_lock, flags); - return __cfq_find_cfq_hash(cfqd, pid, hashval); + hlist_del(&(*cfqq)->cfq_hash); + + __cfqq = __cfq_find_cfq_hash(cfqd, hashkey, hashval); + if (!__cfqq || __cfqq == *cfqq) { + __cfqq = *cfqq; + hlist_add_head(&__cfqq->cfq_hash, &cfqd->cfq_hash[hashval]); + __cfqq->key_type = cfqd->key_type; + } else { + atomic_inc(&__cfqq->ref); + cic->cfqq = __cfqq; + cfq_put_queue(*cfqq); + *cfqq = __cfqq; + } + + cic->cfqq = __cfqq; + spin_unlock_irqrestore(cfqd->queue->queue_lock, flags); } -static void cfq_put_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq) +static void cfq_free_io_context(struct cfq_io_context *cic) { - cfqd->busy_queues--; - list_del(&cfqq->cfq_list); - list_del(&cfqq->cfq_hash); - mempool_free(cfqq, cfq_mpool); + kmem_cache_free(cfq_ioc_pool, cic); } -static struct cfq_queue *__cfq_get_queue(struct cfq_data *cfqd, int pid, - int gfp_mask) +/* + * locking hierarchy is: io_context lock -> queue locks + */ +static void cfq_exit_io_context(struct cfq_io_context *cic) { - const int hashval = hash_long(current->tgid, CFQ_QHASH_SHIFT); + struct cfq_queue *cfqq = cic->cfqq; + struct list_head *entry = &cic->list; + request_queue_t *q; + unsigned long flags; + + /* + * put the reference this task is holding to the various queues + */ + spin_lock_irqsave(&cic->ioc->lock, flags); + while ((entry = cic->list.next) != &cic->list) { + struct cfq_io_context *__cic; + + __cic = list_entry(entry, struct cfq_io_context, list); + list_del(entry); + + q = __cic->cfqq->cfqd->queue; + spin_lock(q->queue_lock); + cfq_put_queue(__cic->cfqq); + spin_unlock(q->queue_lock); + } + + q = cfqq->cfqd->queue; + spin_lock(q->queue_lock); + cfq_put_queue(cfqq); + spin_unlock(q->queue_lock); + + cic->cfqq = NULL; + spin_unlock_irqrestore(&cic->ioc->lock, flags); +} + +static struct cfq_io_context *cfq_alloc_io_context(int gfp_flags) +{ + struct cfq_io_context *cic = kmem_cache_alloc(cfq_ioc_pool, gfp_flags); + + if (cic) { + cic->dtor = cfq_free_io_context; + cic->exit = cfq_exit_io_context; + INIT_LIST_HEAD(&cic->list); + cic->cfqq = NULL; + } + + return cic; +} + +/* + * Setup general io context and cfq io context. There can be several cfq + * io contexts per general io context, if this process is doing io to more + * than one device managed by cfq. Note that caller is holding a reference to + * cfqq, so we don't need to worry about it disappearing + */ +static struct cfq_io_context * +cfq_get_io_context(struct cfq_queue **cfqq, int gfp_flags) +{ + struct cfq_data *cfqd = (*cfqq)->cfqd; + struct cfq_queue *__cfqq = *cfqq; + struct cfq_io_context *cic; + struct io_context *ioc; + + might_sleep_if(gfp_flags & __GFP_WAIT); + + ioc = get_io_context(gfp_flags); + if (!ioc) + return NULL; + + if ((cic = ioc->cic) == NULL) { + cic = cfq_alloc_io_context(gfp_flags); + + if (cic == NULL) + goto err; + + ioc->cic = cic; + cic->ioc = ioc; + cic->cfqq = __cfqq; + atomic_inc(&__cfqq->ref); + } else { + struct cfq_io_context *__cic; + unsigned long flags; + + /* + * since the first cic on the list is actually the head + * itself, need to check this here or we'll duplicate an + * cic per ioc for no reason + */ + if (cic->cfqq == __cfqq) + goto out; + + /* + * cic exists, check if we already are there. linear search + * should be ok here, the list will usually not be more than + * 1 or a few entries long + */ + spin_lock_irqsave(&ioc->lock, flags); + list_for_each_entry(__cic, &cic->list, list) { + /* + * this process is already holding a reference to + * this queue, so no need to get one more + */ + if (__cic->cfqq == __cfqq) { + cic = __cic; + spin_unlock_irqrestore(&ioc->lock, flags); + goto out; + } + } + spin_unlock_irqrestore(&ioc->lock, flags); + + /* + * nope, process doesn't have a cic assoicated with this + * cfqq yet. get a new one and add to list + */ + __cic = cfq_alloc_io_context(gfp_flags); + if (__cic == NULL) + goto err; + + __cic->ioc = ioc; + __cic->cfqq = __cfqq; + atomic_inc(&__cfqq->ref); + spin_lock_irqsave(&ioc->lock, flags); + list_add(&__cic->list, &cic->list); + spin_unlock_irqrestore(&ioc->lock, flags); + + cic = __cic; + *cfqq = __cfqq; + } + +out: + /* + * if key_type has been changed on the fly, we lazily rehash + * each queue at lookup time + */ + if ((*cfqq)->key_type != cfqd->key_type) + cfq_rehash_cfqq(cfqd, cfqq, cic); + + return cic; +err: + put_io_context(ioc); + return NULL; +} + +static struct cfq_queue * +__cfq_get_queue(struct cfq_data *cfqd, unsigned long key, int gfp_mask) +{ + const int hashval = hash_long(key, CFQ_QHASH_SHIFT); struct cfq_queue *cfqq, *new_cfqq = NULL; - request_queue_t *q = cfqd->queue; retry: - cfqq = __cfq_find_cfq_hash(cfqd, pid, hashval); + cfqq = __cfq_find_cfq_hash(cfqd, key, hashval); if (!cfqq) { if (new_cfqq) { cfqq = new_cfqq; new_cfqq = NULL; } else if (gfp_mask & __GFP_WAIT) { - spin_unlock_irq(q->queue_lock); - new_cfqq = mempool_alloc(cfq_mpool, gfp_mask); - spin_lock_irq(q->queue_lock); + spin_unlock_irq(cfqd->queue->queue_lock); + new_cfqq = kmem_cache_alloc(cfq_pool, gfp_mask); + spin_lock_irq(cfqd->queue->queue_lock); goto retry; } else - return NULL; + goto out; + + memset(cfqq, 0, sizeof(*cfqq)); - INIT_LIST_HEAD(&cfqq->cfq_hash); + INIT_HLIST_NODE(&cfqq->cfq_hash); INIT_LIST_HEAD(&cfqq->cfq_list); RB_CLEAR_ROOT(&cfqq->sort_list); - - cfqq->pid = pid; - cfqq->queued[0] = cfqq->queued[1] = 0; - list_add(&cfqq->cfq_hash, &cfqd->cfq_hash[hashval]); + INIT_LIST_HEAD(&cfqq->fifo[0]); + INIT_LIST_HEAD(&cfqq->fifo[1]); + + cfqq->key = key; + hlist_add_head(&cfqq->cfq_hash, &cfqd->cfq_hash[hashval]); + atomic_set(&cfqq->ref, 0); + cfqq->cfqd = cfqd; + atomic_inc(&cfqd->ref); + cfqq->key_type = cfqd->key_type; + cfqq->service_start = ~0UL; } if (new_cfqq) - mempool_free(new_cfqq, cfq_mpool); + kmem_cache_free(cfq_pool, new_cfqq); + atomic_inc(&cfqq->ref); +out: + WARN_ON((gfp_mask & __GFP_WAIT) && !cfqq); return cfqq; } -static struct cfq_queue *cfq_get_queue(struct cfq_data *cfqd, int pid, - int gfp_mask) +static struct cfq_queue * +cfq_get_queue(struct cfq_data *cfqd, unsigned long key, int gfp_mask) { request_queue_t *q = cfqd->queue; struct cfq_queue *cfqq; spin_lock_irq(q->queue_lock); - cfqq = __cfq_get_queue(cfqd, pid, gfp_mask); + cfqq = __cfq_get_queue(cfqd, key, gfp_mask); spin_unlock_irq(q->queue_lock); return cfqq; @@ -508,40 +1243,30 @@ static struct cfq_queue *cfq_get_queue(struct cfq_data *cfqd, int pid, static void cfq_enqueue(struct cfq_data *cfqd, struct cfq_rq *crq) { - struct cfq_queue *cfqq; + crq->is_sync = 0; + if (rq_data_dir(crq->request) == READ || current->flags & PF_SYNCWRITE) + crq->is_sync = 1; - cfqq = __cfq_get_queue(cfqd, current->tgid, GFP_ATOMIC); - if (cfqq) { - cfq_add_crq_rb(cfqd, cfqq, crq); + cfq_add_crq_rb(crq); + crq->queue_start = jiffies; - if (list_empty(&cfqq->cfq_list)) { - list_add(&cfqq->cfq_list, &cfqd->rr_list); - cfqd->busy_queues++; - } - } else { - /* - * should can only happen if the request wasn't allocated - * through blk_alloc_request(), eg stack requests from ide-cd - * (those should be removed) _and_ we are in OOM. - */ - list_add_tail(&crq->request->queuelist, cfqd->dispatch); - } + list_add_tail(&crq->request->queuelist, &crq->cfq_queue->fifo[crq->is_sync]); } static void cfq_insert_request(request_queue_t *q, struct request *rq, int where) { - struct cfq_data *cfqd = q->elevator.elevator_data; + struct cfq_data *cfqd = q->elevator->elevator_data; struct cfq_rq *crq = RQ_DATA(rq); switch (where) { case ELEVATOR_INSERT_BACK: - while (cfq_dispatch_requests(q, cfqd)) + while (cfq_dispatch_requests(q, cfqd->cfq_quantum)) ; - list_add_tail(&rq->queuelist, cfqd->dispatch); + list_add_tail(&rq->queuelist, &q->queue_head); break; case ELEVATOR_INSERT_FRONT: - list_add(&rq->queuelist, cfqd->dispatch); + list_add(&rq->queuelist, &q->queue_head); break; case ELEVATOR_INSERT_SORT: BUG_ON(!blk_fs_request(rq)); @@ -562,12 +1287,27 @@ cfq_insert_request(request_queue_t *q, struct request *rq, int where) static int cfq_queue_empty(request_queue_t *q) { - struct cfq_data *cfqd = q->elevator.elevator_data; + struct cfq_data *cfqd = q->elevator->elevator_data; - if (list_empty(cfqd->dispatch) && list_empty(&cfqd->rr_list)) - return 1; + return list_empty(&q->queue_head) && list_empty(&cfqd->rr_list); +} + +static void cfq_completed_request(request_queue_t *q, struct request *rq) +{ + struct cfq_rq *crq = RQ_DATA(rq); + + if (unlikely(!blk_fs_request(rq))) + return; + + if (crq->in_flight) { + struct cfq_queue *cfqq = crq->cfq_queue; + + WARN_ON(!cfqq->in_flight); + cfqq->in_flight--; + + cfq_account_completion(cfqq, crq); + } - return 0; } static struct request * @@ -596,92 +1336,169 @@ cfq_latter_request(request_queue_t *q, struct request *rq) static int cfq_may_queue(request_queue_t *q, int rw) { - struct cfq_data *cfqd = q->elevator.elevator_data; + struct cfq_data *cfqd = q->elevator->elevator_data; struct cfq_queue *cfqq; - int ret = 1; + int ret = ELV_MQUEUE_MAY; - if (!cfqd->busy_queues) - goto out; + if (current->flags & PF_MEMALLOC) + return ELV_MQUEUE_MAY; - cfqq = cfq_find_cfq_hash(cfqd, current->tgid); + cfqq = cfq_find_cfq_hash(cfqd, cfq_hash_key(cfqd, current)); if (cfqq) { - int limit = (q->nr_requests - cfqd->cfq_queued) / cfqd->busy_queues; + int limit = cfqd->max_queued; + + if (cfqq->allocated[rw] < cfqd->cfq_queued) + return ELV_MQUEUE_MUST; - if (limit < 3) - limit = 3; + if (cfqd->busy_queues) + limit = q->nr_requests / cfqd->busy_queues; + + if (limit < cfqd->cfq_queued) + limit = cfqd->cfq_queued; else if (limit > cfqd->max_queued) limit = cfqd->max_queued; - if (cfqq->queued[rw] > limit) - ret = 0; + if (cfqq->allocated[rw] >= limit) { + if (limit > cfqq->alloc_limit[rw]) + cfqq->alloc_limit[rw] = limit; + + ret = ELV_MQUEUE_NO; + } } -out: + return ret; } +static void cfq_check_waiters(request_queue_t *q, struct cfq_queue *cfqq) +{ + struct request_list *rl = &q->rq; + const int write = waitqueue_active(&rl->wait[WRITE]); + const int read = waitqueue_active(&rl->wait[READ]); + + if (read && cfqq->allocated[READ] < cfqq->alloc_limit[READ]) + wake_up(&rl->wait[READ]); + if (write && cfqq->allocated[WRITE] < cfqq->alloc_limit[WRITE]) + wake_up(&rl->wait[WRITE]); +} + +/* + * queue lock held here + */ static void cfq_put_request(request_queue_t *q, struct request *rq) { - struct cfq_data *cfqd = q->elevator.elevator_data; + struct cfq_data *cfqd = q->elevator->elevator_data; struct cfq_rq *crq = RQ_DATA(rq); - struct request_list *rl; - int other_rw; if (crq) { + struct cfq_queue *cfqq = crq->cfq_queue; + BUG_ON(q->last_merge == rq); - BUG_ON(ON_MHASH(crq)); + BUG_ON(!hlist_unhashed(&crq->hash)); + + if (crq->io_context) + put_io_context(crq->io_context->ioc); + + if (!cfqq->allocated[crq->is_write]) { + WARN_ON(1); + cfqq->allocated[crq->is_write] = 1; + } + cfqq->allocated[crq->is_write]--; mempool_free(crq, cfqd->crq_pool); rq->elevator_private = NULL; - } - /* - * work-around for may_queue "bug": if a read gets issued and refused - * to queue because writes ate all the allowed slots and no other - * reads are pending for this queue, it could get stuck infinitely - * since freed_request() only checks the waitqueue for writes when - * freeing them. or vice versa for a single write vs many reads. - * so check here whether "the other" data direction might be able - * to queue and wake them - */ - rl = &q->rq; - other_rw = rq_data_dir(rq) ^ 1; - if (rl->count[other_rw] <= q->nr_requests) { smp_mb(); - if (waitqueue_active(&rl->wait[other_rw])) - wake_up(&rl->wait[other_rw]); + cfq_check_waiters(q, cfqq); + cfq_put_queue(cfqq); } } +/* + * Allocate cfq data structures associated with this request. A queue and + */ static int cfq_set_request(request_queue_t *q, struct request *rq, int gfp_mask) { - struct cfq_data *cfqd = q->elevator.elevator_data; + struct cfq_data *cfqd = q->elevator->elevator_data; + struct cfq_io_context *cic; + const int rw = rq_data_dir(rq); struct cfq_queue *cfqq; struct cfq_rq *crq; + unsigned long flags; + + might_sleep_if(gfp_mask & __GFP_WAIT); + + spin_lock_irqsave(q->queue_lock, flags); + + cfqq = __cfq_get_queue(cfqd, cfq_hash_key(cfqd, current), gfp_mask); + if (!cfqq) { +#if 0 + cfqq = cfq_get_queue(cfqd, CFQ_KEY_SPARE, gfp_mask); + printk("%s: got spare queue\n", current->comm); +#else + goto out_lock; +#endif + } + + if (cfqq->allocated[rw] >= cfqd->max_queued) + goto out_lock; + + spin_unlock_irqrestore(q->queue_lock, flags); /* - * prepare a queue up front, so cfq_enqueue() doesn't have to + * if hashing type has changed, the cfq_queue might change here. we + * don't bother rechecking ->allocated since it should be a rare + * event */ - cfqq = cfq_get_queue(cfqd, current->tgid, gfp_mask); - if (!cfqq) - return 1; + cic = cfq_get_io_context(&cfqq, gfp_mask); + if (!cic) + goto err; crq = mempool_alloc(cfqd->crq_pool, gfp_mask); if (crq) { - memset(crq, 0, sizeof(*crq)); RB_CLEAR(&crq->rb_node); + crq->rb_key = 0; crq->request = rq; - crq->cfq_queue = NULL; - INIT_LIST_HEAD(&crq->hash); + INIT_HLIST_NODE(&crq->hash); + crq->cfq_queue = cfqq; + crq->io_context = cic; + crq->service_start = crq->queue_start = 0; + crq->in_flight = crq->accounted = crq->is_sync = 0; + crq->is_write = rw; rq->elevator_private = crq; + cfqq->allocated[rw]++; + cfqq->alloc_limit[rw] = 0; return 0; } + put_io_context(cic->ioc); +err: + spin_lock_irqsave(q->queue_lock, flags); + cfq_put_queue(cfqq); +out_lock: + spin_unlock_irqrestore(q->queue_lock, flags); return 1; } -static void cfq_exit(request_queue_t *q, elevator_t *e) +static void cfq_put_cfqd(struct cfq_data *cfqd) { - struct cfq_data *cfqd = e->elevator_data; + request_queue_t *q = cfqd->queue; + elevator_t *e = q->elevator; + struct cfq_queue *cfqq; + + if (!atomic_dec_and_test(&cfqd->ref)) + return; + + /* + * kill spare queue, getting it means we have two refences to it. + * drop both + */ + spin_lock_irq(q->queue_lock); + cfqq = __cfq_get_queue(cfqd, CFQ_KEY_SPARE, GFP_ATOMIC); + cfq_put_queue(cfqq); + cfq_put_queue(cfqq); + spin_unlock_irq(q->queue_lock); + + blk_put_queue(q); e->elevator_data = NULL; mempool_destroy(cfqd->crq_pool); @@ -690,9 +1507,15 @@ static void cfq_exit(request_queue_t *q, elevator_t *e) kfree(cfqd); } -static int cfq_init(request_queue_t *q, elevator_t *e) +static void cfq_exit_queue(elevator_t *e) +{ + cfq_put_cfqd(e->elevator_data); +} + +static int cfq_init_queue(request_queue_t *q, elevator_t *e) { struct cfq_data *cfqd; + struct cfq_queue *cfqq; int i; cfqd = kmalloc(sizeof(*cfqd), GFP_KERNEL); @@ -701,12 +1524,13 @@ static int cfq_init(request_queue_t *q, elevator_t *e) memset(cfqd, 0, sizeof(*cfqd)); INIT_LIST_HEAD(&cfqd->rr_list); + INIT_LIST_HEAD(&cfqd->empty_list); - cfqd->crq_hash = kmalloc(sizeof(struct list_head) * CFQ_MHASH_ENTRIES, GFP_KERNEL); + cfqd->crq_hash = kmalloc(sizeof(struct hlist_head) * CFQ_MHASH_ENTRIES, GFP_KERNEL); if (!cfqd->crq_hash) goto out_crqhash; - cfqd->cfq_hash = kmalloc(sizeof(struct list_head) * CFQ_QHASH_ENTRIES, GFP_KERNEL); + cfqd->cfq_hash = kmalloc(sizeof(struct hlist_head) * CFQ_QHASH_ENTRIES, GFP_KERNEL); if (!cfqd->cfq_hash) goto out_cfqhash; @@ -715,25 +1539,44 @@ static int cfq_init(request_queue_t *q, elevator_t *e) goto out_crqpool; for (i = 0; i < CFQ_MHASH_ENTRIES; i++) - INIT_LIST_HEAD(&cfqd->crq_hash[i]); + INIT_HLIST_HEAD(&cfqd->crq_hash[i]); for (i = 0; i < CFQ_QHASH_ENTRIES; i++) - INIT_LIST_HEAD(&cfqd->cfq_hash[i]); + INIT_HLIST_HEAD(&cfqd->cfq_hash[i]); - cfqd->dispatch = &q->queue_head; e->elevator_data = cfqd; + cfqd->queue = q; + atomic_inc(&q->refcnt); + + /* + * setup spare failure queue + */ + cfqq = cfq_get_queue(cfqd, CFQ_KEY_SPARE, GFP_KERNEL); + if (!cfqq) + goto out_spare; /* * just set it to some high value, we want anyone to be able to queue * some requests. fairness is handled differently */ - cfqd->max_queued = q->nr_requests; - q->nr_requests = 8192; + q->nr_requests = 1024; + cfqd->max_queued = q->nr_requests / 16; + q->nr_batching = cfq_queued; + cfqd->key_type = CFQ_KEY_TGID; + cfqd->find_best_crq = 1; + atomic_set(&cfqd->ref, 1); cfqd->cfq_queued = cfq_queued; cfqd->cfq_quantum = cfq_quantum; + cfqd->cfq_fifo_expire_r = cfq_fifo_expire_r; + cfqd->cfq_fifo_expire_w = cfq_fifo_expire_w; + cfqd->cfq_fifo_batch_expire = cfq_fifo_rate; + cfqd->cfq_back_max = cfq_back_max; + cfqd->cfq_back_penalty = cfq_back_penalty; return 0; +out_spare: + mempool_destroy(cfqd->crq_pool); out_crqpool: kfree(cfqd->cfq_hash); out_cfqhash: @@ -743,29 +1586,39 @@ out_crqhash: return -ENOMEM; } +static void cfq_slab_kill(void) +{ + if (crq_pool) + kmem_cache_destroy(crq_pool); + if (cfq_pool) + kmem_cache_destroy(cfq_pool); + if (cfq_ioc_pool) + kmem_cache_destroy(cfq_ioc_pool); +} + static int __init cfq_slab_setup(void) { crq_pool = kmem_cache_create("crq_pool", sizeof(struct cfq_rq), 0, 0, NULL, NULL); - if (!crq_pool) - panic("cfq_iosched: can't init crq pool\n"); + goto fail; cfq_pool = kmem_cache_create("cfq_pool", sizeof(struct cfq_queue), 0, 0, NULL, NULL); - if (!cfq_pool) - panic("cfq_iosched: can't init cfq pool\n"); + goto fail; - cfq_mpool = mempool_create(64, mempool_alloc_slab, mempool_free_slab, cfq_pool); - - if (!cfq_mpool) - panic("cfq_iosched: can't init cfq mpool\n"); + cfq_ioc_pool = kmem_cache_create("cfq_ioc_pool", + sizeof(struct cfq_io_context), 0, 0, NULL, NULL); + if (!cfq_ioc_pool) + goto fail; return 0; +fail: + cfq_slab_kill(); + return -ENOMEM; } -subsys_initcall(cfq_slab_setup); /* * sysfs parts below --> @@ -791,27 +1644,135 @@ cfq_var_store(unsigned int *var, const char *page, size_t count) return count; } -#define SHOW_FUNCTION(__FUNC, __VAR) \ +static ssize_t +cfq_clear_elapsed(struct cfq_data *cfqd, const char *page, size_t count) +{ + max_elapsed_dispatch = max_elapsed_crq = 0; + return count; +} + +static ssize_t +cfq_set_key_type(struct cfq_data *cfqd, const char *page, size_t count) +{ + spin_lock_irq(cfqd->queue->queue_lock); + if (!strncmp(page, "pgid", 4)) + cfqd->key_type = CFQ_KEY_PGID; + else if (!strncmp(page, "tgid", 4)) + cfqd->key_type = CFQ_KEY_TGID; + else if (!strncmp(page, "uid", 3)) + cfqd->key_type = CFQ_KEY_UID; + else if (!strncmp(page, "gid", 3)) + cfqd->key_type = CFQ_KEY_GID; + spin_unlock_irq(cfqd->queue->queue_lock); + return count; +} + +static ssize_t +cfq_read_key_type(struct cfq_data *cfqd, char *page) +{ + ssize_t len = 0; + int i; + + for (i = CFQ_KEY_PGID; i < CFQ_KEY_LAST; i++) { + if (cfqd->key_type == i) + len += sprintf(page+len, "[%s] ", cfq_key_types[i]); + else + len += sprintf(page+len, "%s ", cfq_key_types[i]); + } + len += sprintf(page+len, "\n"); + return len; +} + +static ssize_t +cfq_status_show(struct cfq_data *cfqd, char *page) +{ + struct list_head *entry; + struct cfq_queue *cfqq; + ssize_t len; + int i = 0, queues; + + len = sprintf(page, "Busy queues: %u\n", cfqd->busy_queues); + len += sprintf(page+len, "key type: %s\n", + cfq_key_types[cfqd->key_type]); + len += sprintf(page+len, "last sector: %Lu\n", + (unsigned long long)cfqd->last_sector); + len += sprintf(page+len, "max time in iosched: %lu\n", + max_elapsed_dispatch); + len += sprintf(page+len, "max completion time: %lu\n", max_elapsed_crq); + + len += sprintf(page+len, "Busy queue list:\n"); + spin_lock_irq(cfqd->queue->queue_lock); + list_for_each(entry, &cfqd->rr_list) { + i++; + cfqq = list_entry_cfqq(entry); + len += sprintf(page+len, " cfqq: key=%lu alloc=%d/%d, " + "queued=%d/%d, last_fifo=%lu, service_used=%lu\n", + cfqq->key, cfqq->allocated[0], cfqq->allocated[1], + cfqq->queued[0], cfqq->queued[1], + cfqq->last_fifo_expire, cfqq->service_used); + } + len += sprintf(page+len, " busy queues total: %d\n", i); + queues = i; + + len += sprintf(page+len, "Empty queue list:\n"); + i = 0; + list_for_each(entry, &cfqd->empty_list) { + i++; + cfqq = list_entry_cfqq(entry); + len += sprintf(page+len, " cfqq: key=%lu alloc=%d/%d, " + "queued=%d/%d, last_fifo=%lu, service_used=%lu\n", + cfqq->key, cfqq->allocated[0], cfqq->allocated[1], + cfqq->queued[0], cfqq->queued[1], + cfqq->last_fifo_expire, cfqq->service_used); + } + len += sprintf(page+len, " empty queues total: %d\n", i); + queues += i; + len += sprintf(page+len, "Total queues: %d\n", queues); + spin_unlock_irq(cfqd->queue->queue_lock); + return len; +} + +#define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \ static ssize_t __FUNC(struct cfq_data *cfqd, char *page) \ { \ - return cfq_var_show(__VAR, (page)); \ + unsigned int __data = __VAR; \ + if (__CONV) \ + __data = jiffies_to_msecs(__data); \ + return cfq_var_show(__data, (page)); \ } -SHOW_FUNCTION(cfq_quantum_show, cfqd->cfq_quantum); -SHOW_FUNCTION(cfq_queued_show, cfqd->cfq_queued); +SHOW_FUNCTION(cfq_quantum_show, cfqd->cfq_quantum, 0); +SHOW_FUNCTION(cfq_queued_show, cfqd->cfq_queued, 0); +SHOW_FUNCTION(cfq_fifo_expire_r_show, cfqd->cfq_fifo_expire_r, 1); +SHOW_FUNCTION(cfq_fifo_expire_w_show, cfqd->cfq_fifo_expire_w, 1); +SHOW_FUNCTION(cfq_fifo_batch_expire_show, cfqd->cfq_fifo_batch_expire, 1); +SHOW_FUNCTION(cfq_find_best_show, cfqd->find_best_crq, 0); +SHOW_FUNCTION(cfq_back_max_show, cfqd->cfq_back_max, 0); +SHOW_FUNCTION(cfq_back_penalty_show, cfqd->cfq_back_penalty, 0); #undef SHOW_FUNCTION -#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX) \ +#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \ static ssize_t __FUNC(struct cfq_data *cfqd, const char *page, size_t count) \ { \ - int ret = cfq_var_store(__PTR, (page), count); \ - if (*(__PTR) < (MIN)) \ - *(__PTR) = (MIN); \ - else if (*(__PTR) > (MAX)) \ - *(__PTR) = (MAX); \ + unsigned int __data; \ + int ret = cfq_var_store(&__data, (page), count); \ + if (__data < (MIN)) \ + __data = (MIN); \ + else if (__data > (MAX)) \ + __data = (MAX); \ + if (__CONV) \ + *(__PTR) = msecs_to_jiffies(__data); \ + else \ + *(__PTR) = __data; \ return ret; \ } -STORE_FUNCTION(cfq_quantum_store, &cfqd->cfq_quantum, 1, INT_MAX); -STORE_FUNCTION(cfq_queued_store, &cfqd->cfq_queued, 1, INT_MAX); +STORE_FUNCTION(cfq_quantum_store, &cfqd->cfq_quantum, 1, UINT_MAX, 0); +STORE_FUNCTION(cfq_queued_store, &cfqd->cfq_queued, 1, UINT_MAX, 0); +STORE_FUNCTION(cfq_fifo_expire_r_store, &cfqd->cfq_fifo_expire_r, 1, UINT_MAX, 1); +STORE_FUNCTION(cfq_fifo_expire_w_store, &cfqd->cfq_fifo_expire_w, 1, UINT_MAX, 1); +STORE_FUNCTION(cfq_fifo_batch_expire_store, &cfqd->cfq_fifo_batch_expire, 0, UINT_MAX, 1); +STORE_FUNCTION(cfq_find_best_store, &cfqd->find_best_crq, 0, 1, 0); +STORE_FUNCTION(cfq_back_max_store, &cfqd->cfq_back_max, 0, UINT_MAX, 0); +STORE_FUNCTION(cfq_back_penalty_store, &cfqd->cfq_back_penalty, 1, UINT_MAX, 0); #undef STORE_FUNCTION static struct cfq_fs_entry cfq_quantum_entry = { @@ -824,10 +1785,62 @@ static struct cfq_fs_entry cfq_queued_entry = { .show = cfq_queued_show, .store = cfq_queued_store, }; +static struct cfq_fs_entry cfq_fifo_expire_r_entry = { + .attr = {.name = "fifo_expire_sync", .mode = S_IRUGO | S_IWUSR }, + .show = cfq_fifo_expire_r_show, + .store = cfq_fifo_expire_r_store, +}; +static struct cfq_fs_entry cfq_fifo_expire_w_entry = { + .attr = {.name = "fifo_expire_async", .mode = S_IRUGO | S_IWUSR }, + .show = cfq_fifo_expire_w_show, + .store = cfq_fifo_expire_w_store, +}; +static struct cfq_fs_entry cfq_fifo_batch_expire_entry = { + .attr = {.name = "fifo_batch_expire", .mode = S_IRUGO | S_IWUSR }, + .show = cfq_fifo_batch_expire_show, + .store = cfq_fifo_batch_expire_store, +}; +static struct cfq_fs_entry cfq_find_best_entry = { + .attr = {.name = "find_best_crq", .mode = S_IRUGO | S_IWUSR }, + .show = cfq_find_best_show, + .store = cfq_find_best_store, +}; +static struct cfq_fs_entry cfq_back_max_entry = { + .attr = {.name = "back_seek_max", .mode = S_IRUGO | S_IWUSR }, + .show = cfq_back_max_show, + .store = cfq_back_max_store, +}; +static struct cfq_fs_entry cfq_back_penalty_entry = { + .attr = {.name = "back_seek_penalty", .mode = S_IRUGO | S_IWUSR }, + .show = cfq_back_penalty_show, + .store = cfq_back_penalty_store, +}; +static struct cfq_fs_entry cfq_clear_elapsed_entry = { + .attr = {.name = "clear_elapsed", .mode = S_IWUSR }, + .store = cfq_clear_elapsed, +}; +static struct cfq_fs_entry cfq_misc_entry = { + .attr = {.name = "show_status", .mode = S_IRUGO }, + .show = cfq_status_show, +}; +static struct cfq_fs_entry cfq_key_type_entry = { + .attr = {.name = "key_type", .mode = S_IRUGO | S_IWUSR }, + .show = cfq_read_key_type, + .store = cfq_set_key_type, +}; static struct attribute *default_attrs[] = { &cfq_quantum_entry.attr, &cfq_queued_entry.attr, + &cfq_fifo_expire_r_entry.attr, + &cfq_fifo_expire_w_entry.attr, + &cfq_fifo_batch_expire_entry.attr, + &cfq_key_type_entry.attr, + &cfq_find_best_entry.attr, + &cfq_back_max_entry.attr, + &cfq_back_penalty_entry.attr, + &cfq_clear_elapsed_entry.attr, + &cfq_misc_entry.attr, NULL, }; @@ -868,23 +1881,56 @@ struct kobj_type cfq_ktype = { .default_attrs = default_attrs, }; -elevator_t iosched_cfq = { - .elevator_name = "cfq", - .elevator_ktype = &cfq_ktype, - .elevator_merge_fn = cfq_merge, - .elevator_merged_fn = cfq_merged_request, - .elevator_merge_req_fn = cfq_merged_requests, - .elevator_next_req_fn = cfq_next_request, - .elevator_add_req_fn = cfq_insert_request, - .elevator_remove_req_fn = cfq_remove_request, - .elevator_queue_empty_fn = cfq_queue_empty, - .elevator_former_req_fn = cfq_former_request, - .elevator_latter_req_fn = cfq_latter_request, - .elevator_set_req_fn = cfq_set_request, - .elevator_put_req_fn = cfq_put_request, - .elevator_may_queue_fn = cfq_may_queue, - .elevator_init_fn = cfq_init, - .elevator_exit_fn = cfq_exit, +static struct elevator_type iosched_cfq = { + .ops = { + .elevator_merge_fn = cfq_merge, + .elevator_merged_fn = cfq_merged_request, + .elevator_merge_req_fn = cfq_merged_requests, + .elevator_next_req_fn = cfq_next_request, + .elevator_add_req_fn = cfq_insert_request, + .elevator_remove_req_fn = cfq_remove_request, + .elevator_requeue_req_fn = cfq_requeue_request, + .elevator_queue_empty_fn = cfq_queue_empty, + .elevator_completed_req_fn = cfq_completed_request, + .elevator_former_req_fn = cfq_former_request, + .elevator_latter_req_fn = cfq_latter_request, + .elevator_set_req_fn = cfq_set_request, + .elevator_put_req_fn = cfq_put_request, + .elevator_may_queue_fn = cfq_may_queue, + .elevator_init_fn = cfq_init_queue, + .elevator_exit_fn = cfq_exit_queue, + }, + .elevator_ktype = &cfq_ktype, + .elevator_name = "cfq", + .elevator_owner = THIS_MODULE, }; -EXPORT_SYMBOL(iosched_cfq); +int cfq_init(void) +{ + int ret; + + if (cfq_slab_setup()) + return -ENOMEM; + + ret = elv_register(&iosched_cfq); + if (!ret) { + __module_get(THIS_MODULE); + return 0; + } + + cfq_slab_kill(); + return ret; +} + +void cfq_exit(void) +{ + cfq_slab_kill(); + elv_unregister(&iosched_cfq); +} + +module_init(cfq_init); +module_exit(cfq_exit); + +MODULE_AUTHOR("Jens Axboe"); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Completely Fair Queueing IO scheduler"); diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c index 204b3182900d..dc896a12283b 100644 --- a/drivers/block/cpqarray.c +++ b/drivers/block/cpqarray.c @@ -21,7 +21,6 @@ */ #include <linux/config.h> /* CONFIG_PROC_FS */ #include <linux/module.h> -#include <linux/version.h> #include <linux/types.h> #include <linux/pci.h> #include <linux/bio.h> @@ -732,7 +731,6 @@ static void __iomem *remap_pci_mem(ulong base, ulong size) } #ifndef MODULE -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,3,13) /* * Config string is a comma separated set of i/o addresses of EISA cards. */ @@ -749,18 +747,6 @@ static int cpqarray_setup(char *str) __setup("smart2=", cpqarray_setup); -#else - -/* - * Copy the contents of the ints[] array passed to us by init. - */ -void cpqarray_setup(char *str, int *ints) -{ - int i; - for(i=0; i<ints[0] && i<8; i++) - eisa[i] = ints[i+1]; -} -#endif #endif /* diff --git a/drivers/block/deadline-iosched.c b/drivers/block/deadline-iosched.c index fb7ab733c709..f482e8bdb4d6 100644 --- a/drivers/block/deadline-iosched.c +++ b/drivers/block/deadline-iosched.c @@ -289,7 +289,7 @@ deadline_find_first_drq(struct deadline_data *dd, int data_dir) static inline void deadline_add_request(struct request_queue *q, struct request *rq) { - struct deadline_data *dd = q->elevator.elevator_data; + struct deadline_data *dd = q->elevator->elevator_data; struct deadline_rq *drq = RQ_DATA(rq); const int data_dir = rq_data_dir(drq->request); @@ -317,7 +317,7 @@ static void deadline_remove_request(request_queue_t *q, struct request *rq) struct deadline_rq *drq = RQ_DATA(rq); if (drq) { - struct deadline_data *dd = q->elevator.elevator_data; + struct deadline_data *dd = q->elevator->elevator_data; list_del_init(&drq->fifo); deadline_remove_merge_hints(q, drq); @@ -328,7 +328,7 @@ static void deadline_remove_request(request_queue_t *q, struct request *rq) static int deadline_merge(request_queue_t *q, struct request **req, struct bio *bio) { - struct deadline_data *dd = q->elevator.elevator_data; + struct deadline_data *dd = q->elevator->elevator_data; struct request *__rq; int ret; @@ -383,7 +383,7 @@ out_insert: static void deadline_merged_request(request_queue_t *q, struct request *req) { - struct deadline_data *dd = q->elevator.elevator_data; + struct deadline_data *dd = q->elevator->elevator_data; struct deadline_rq *drq = RQ_DATA(req); /* @@ -407,7 +407,7 @@ static void deadline_merged_requests(request_queue_t *q, struct request *req, struct request *next) { - struct deadline_data *dd = q->elevator.elevator_data; + struct deadline_data *dd = q->elevator->elevator_data; struct deadline_rq *drq = RQ_DATA(req); struct deadline_rq *dnext = RQ_DATA(next); @@ -604,7 +604,7 @@ dispatch_request: static struct request *deadline_next_request(request_queue_t *q) { - struct deadline_data *dd = q->elevator.elevator_data; + struct deadline_data *dd = q->elevator->elevator_data; struct request *rq; /* @@ -625,7 +625,7 @@ dispatch: static void deadline_insert_request(request_queue_t *q, struct request *rq, int where) { - struct deadline_data *dd = q->elevator.elevator_data; + struct deadline_data *dd = q->elevator->elevator_data; /* barriers must flush the reorder queue */ if (unlikely(rq->flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER) @@ -653,7 +653,7 @@ deadline_insert_request(request_queue_t *q, struct request *rq, int where) static int deadline_queue_empty(request_queue_t *q) { - struct deadline_data *dd = q->elevator.elevator_data; + struct deadline_data *dd = q->elevator->elevator_data; if (!list_empty(&dd->fifo_list[WRITE]) || !list_empty(&dd->fifo_list[READ]) @@ -687,7 +687,7 @@ deadline_latter_request(request_queue_t *q, struct request *rq) return NULL; } -static void deadline_exit(request_queue_t *q, elevator_t *e) +static void deadline_exit_queue(elevator_t *e) { struct deadline_data *dd = e->elevator_data; @@ -703,7 +703,7 @@ static void deadline_exit(request_queue_t *q, elevator_t *e) * initialize elevator private data (deadline_data), and alloc a drq for * each request on the free lists */ -static int deadline_init(request_queue_t *q, elevator_t *e) +static int deadline_init_queue(request_queue_t *q, elevator_t *e) { struct deadline_data *dd; int i; @@ -748,7 +748,7 @@ static int deadline_init(request_queue_t *q, elevator_t *e) static void deadline_put_request(request_queue_t *q, struct request *rq) { - struct deadline_data *dd = q->elevator.elevator_data; + struct deadline_data *dd = q->elevator->elevator_data; struct deadline_rq *drq = RQ_DATA(rq); if (drq) { @@ -760,7 +760,7 @@ static void deadline_put_request(request_queue_t *q, struct request *rq) static int deadline_set_request(request_queue_t *q, struct request *rq, int gfp_mask) { - struct deadline_data *dd = q->elevator.elevator_data; + struct deadline_data *dd = q->elevator->elevator_data; struct deadline_rq *drq; drq = mempool_alloc(dd->drq_pool, gfp_mask); @@ -805,33 +805,41 @@ deadline_var_store(unsigned int *var, const char *page, size_t count) return count; } -#define SHOW_FUNCTION(__FUNC, __VAR) \ +#define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \ static ssize_t __FUNC(struct deadline_data *dd, char *page) \ { \ - return deadline_var_show(__VAR, (page)); \ -} -SHOW_FUNCTION(deadline_readexpire_show, dd->fifo_expire[READ]); -SHOW_FUNCTION(deadline_writeexpire_show, dd->fifo_expire[WRITE]); -SHOW_FUNCTION(deadline_writesstarved_show, dd->writes_starved); -SHOW_FUNCTION(deadline_frontmerges_show, dd->front_merges); -SHOW_FUNCTION(deadline_fifobatch_show, dd->fifo_batch); + unsigned int __data = __VAR; \ + if (__CONV) \ + __data = jiffies_to_msecs(__data); \ + return deadline_var_show(__data, (page)); \ +} +SHOW_FUNCTION(deadline_readexpire_show, dd->fifo_expire[READ], 1); +SHOW_FUNCTION(deadline_writeexpire_show, dd->fifo_expire[WRITE], 1); +SHOW_FUNCTION(deadline_writesstarved_show, dd->writes_starved, 0); +SHOW_FUNCTION(deadline_frontmerges_show, dd->front_merges, 0); +SHOW_FUNCTION(deadline_fifobatch_show, dd->fifo_batch, 0); #undef SHOW_FUNCTION -#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX) \ +#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \ static ssize_t __FUNC(struct deadline_data *dd, const char *page, size_t count) \ { \ - int ret = deadline_var_store(__PTR, (page), count); \ - if (*(__PTR) < (MIN)) \ - *(__PTR) = (MIN); \ - else if (*(__PTR) > (MAX)) \ - *(__PTR) = (MAX); \ + unsigned int __data; \ + int ret = deadline_var_store(&__data, (page), count); \ + if (__data < (MIN)) \ + __data = (MIN); \ + else if (__data > (MAX)) \ + __data = (MAX); \ + if (__CONV) \ + *(__PTR) = msecs_to_jiffies(__data); \ + else \ + *(__PTR) = __data; \ return ret; \ } -STORE_FUNCTION(deadline_readexpire_store, &dd->fifo_expire[READ], 0, INT_MAX); -STORE_FUNCTION(deadline_writeexpire_store, &dd->fifo_expire[WRITE], 0, INT_MAX); -STORE_FUNCTION(deadline_writesstarved_store, &dd->writes_starved, INT_MIN, INT_MAX); -STORE_FUNCTION(deadline_frontmerges_store, &dd->front_merges, 0, 1); -STORE_FUNCTION(deadline_fifobatch_store, &dd->fifo_batch, 0, INT_MAX); +STORE_FUNCTION(deadline_readexpire_store, &dd->fifo_expire[READ], 0, INT_MAX, 1); +STORE_FUNCTION(deadline_writeexpire_store, &dd->fifo_expire[WRITE], 0, INT_MAX, 1); +STORE_FUNCTION(deadline_writesstarved_store, &dd->writes_starved, INT_MIN, INT_MAX, 0); +STORE_FUNCTION(deadline_frontmerges_store, &dd->front_merges, 0, 1, 0); +STORE_FUNCTION(deadline_fifobatch_store, &dd->fifo_batch, 0, INT_MAX, 0); #undef STORE_FUNCTION static struct deadline_fs_entry deadline_readexpire_entry = { @@ -906,36 +914,54 @@ struct kobj_type deadline_ktype = { .default_attrs = default_attrs, }; -static int __init deadline_slab_setup(void) +static struct elevator_type iosched_deadline = { + .ops = { + .elevator_merge_fn = deadline_merge, + .elevator_merged_fn = deadline_merged_request, + .elevator_merge_req_fn = deadline_merged_requests, + .elevator_next_req_fn = deadline_next_request, + .elevator_add_req_fn = deadline_insert_request, + .elevator_remove_req_fn = deadline_remove_request, + .elevator_queue_empty_fn = deadline_queue_empty, + .elevator_former_req_fn = deadline_former_request, + .elevator_latter_req_fn = deadline_latter_request, + .elevator_set_req_fn = deadline_set_request, + .elevator_put_req_fn = deadline_put_request, + .elevator_init_fn = deadline_init_queue, + .elevator_exit_fn = deadline_exit_queue, + }, + + .elevator_ktype = &deadline_ktype, + .elevator_name = "deadline", + .elevator_owner = THIS_MODULE, +}; + +int deadline_init(void) { + int ret; + drq_pool = kmem_cache_create("deadline_drq", sizeof(struct deadline_rq), 0, 0, NULL, NULL); if (!drq_pool) - panic("deadline: can't init slab pool\n"); + return -ENOMEM; - return 0; + ret = elv_register(&iosched_deadline); + if (ret) + kmem_cache_destroy(drq_pool); + + return ret; } -subsys_initcall(deadline_slab_setup); - -elevator_t iosched_deadline = { - .elevator_merge_fn = deadline_merge, - .elevator_merged_fn = deadline_merged_request, - .elevator_merge_req_fn = deadline_merged_requests, - .elevator_next_req_fn = deadline_next_request, - .elevator_add_req_fn = deadline_insert_request, - .elevator_remove_req_fn = deadline_remove_request, - .elevator_queue_empty_fn = deadline_queue_empty, - .elevator_former_req_fn = deadline_former_request, - .elevator_latter_req_fn = deadline_latter_request, - .elevator_set_req_fn = deadline_set_request, - .elevator_put_req_fn = deadline_put_request, - .elevator_init_fn = deadline_init, - .elevator_exit_fn = deadline_exit, - - .elevator_ktype = &deadline_ktype, - .elevator_name = "deadline", -}; +void deadline_exit(void) +{ + kmem_cache_destroy(drq_pool); + elv_unregister(&iosched_deadline); +} + +module_init(deadline_init); +module_exit(deadline_exit); -EXPORT_SYMBOL(iosched_deadline); +MODULE_AUTHOR("Jens Axboe"); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("deadline IO scheduler"); diff --git a/drivers/block/elevator.c b/drivers/block/elevator.c index 35c9385ac133..1b4f6a70c0ca 100644 --- a/drivers/block/elevator.c +++ b/drivers/block/elevator.c @@ -37,6 +37,9 @@ #include <asm/uaccess.h> +static spinlock_t elv_list_lock = SPIN_LOCK_UNLOCKED; +static LIST_HEAD(elv_list); + /* * can we safely merge with this request? */ @@ -60,6 +63,7 @@ inline int elv_rq_merge_ok(struct request *rq, struct bio *bio) return 0; } +EXPORT_SYMBOL(elv_rq_merge_ok); inline int elv_try_merge(struct request *__rq, struct bio *bio) { @@ -77,6 +81,7 @@ inline int elv_try_merge(struct request *__rq, struct bio *bio) return ret; } +EXPORT_SYMBOL(elv_try_merge); inline int elv_try_last_merge(request_queue_t *q, struct bio *bio) { @@ -85,31 +90,117 @@ inline int elv_try_last_merge(request_queue_t *q, struct bio *bio) return ELEVATOR_NO_MERGE; } +EXPORT_SYMBOL(elv_try_last_merge); -/* - * general block -> elevator interface starts here - */ -int elevator_init(request_queue_t *q, elevator_t *type) +struct elevator_type *elevator_find(const char *name) +{ + struct elevator_type *e = NULL; + struct list_head *entry; + + spin_lock_irq(&elv_list_lock); + list_for_each(entry, &elv_list) { + struct elevator_type *__e; + + __e = list_entry(entry, struct elevator_type, list); + + if (!strcmp(__e->elevator_name, name)) { + e = __e; + break; + } + } + spin_unlock_irq(&elv_list_lock); + + return e; +} + +static int elevator_attach(request_queue_t *q, struct elevator_type *e, + struct elevator_queue *eq) { - elevator_t *e = &q->elevator; + int ret = 0; - memcpy(e, type, sizeof(*e)); + if (!try_module_get(e->elevator_owner)) + return -EINVAL; + + memset(eq, 0, sizeof(*eq)); + eq->ops = &e->ops; + eq->elevator_type = e; INIT_LIST_HEAD(&q->queue_head); q->last_merge = NULL; + q->elevator = eq; + + if (eq->ops->elevator_init_fn) + ret = eq->ops->elevator_init_fn(q, eq); - if (e->elevator_init_fn) - return e->elevator_init_fn(q, e); + return ret; +} + +static char chosen_elevator[16]; + +static void elevator_setup_default(void) +{ + /* + * check if default is set and exists + */ + if (chosen_elevator[0] && elevator_find(chosen_elevator)) + return; + +#if defined(CONFIG_IOSCHED_AS) + strcpy(chosen_elevator, "anticipatory"); +#elif defined(CONFIG_IOSCHED_DEADLINE) + strcpy(chosen_elevator, "deadline"); +#elif defined(CONFIG_IOSCHED_CFQ) + strcpy(chosen_elevator, "cfq"); +#elif defined(CONFIG_IOSCHED_NOOP) + strcpy(chosen_elevator, "noop"); +#else +#error "You must build at least 1 IO scheduler into the kernel" +#endif + printk("elevator: using %s as default io scheduler\n", chosen_elevator); +} +static int __init elevator_setup(char *str) +{ + strncpy(chosen_elevator, str, sizeof(chosen_elevator) - 1); return 0; } -void elevator_exit(request_queue_t *q) +__setup("elevator=", elevator_setup); + +int elevator_init(request_queue_t *q, char *name) +{ + struct elevator_type *e = NULL; + struct elevator_queue *eq; + int ret = 0; + + elevator_setup_default(); + + if (!name) + name = chosen_elevator; + + e = elevator_find(name); + if (!e) + return -EINVAL; + + eq = kmalloc(sizeof(struct elevator_queue), GFP_KERNEL); + if (!eq) + return -ENOMEM; + + ret = elevator_attach(q, e, eq); + if (ret) + kfree(eq); + + return ret; +} + +void elevator_exit(elevator_t *e) { - elevator_t *e = &q->elevator; + if (e->ops->elevator_exit_fn) + e->ops->elevator_exit_fn(e); - if (e->elevator_exit_fn) - e->elevator_exit_fn(q, e); + module_put(e->elevator_type->elevator_owner); + e->elevator_type = NULL; + kfree(e); } int elevator_global_init(void) @@ -119,32 +210,32 @@ int elevator_global_init(void) int elv_merge(request_queue_t *q, struct request **req, struct bio *bio) { - elevator_t *e = &q->elevator; + elevator_t *e = q->elevator; - if (e->elevator_merge_fn) - return e->elevator_merge_fn(q, req, bio); + if (e->ops->elevator_merge_fn) + return e->ops->elevator_merge_fn(q, req, bio); return ELEVATOR_NO_MERGE; } void elv_merged_request(request_queue_t *q, struct request *rq) { - elevator_t *e = &q->elevator; + elevator_t *e = q->elevator; - if (e->elevator_merged_fn) - e->elevator_merged_fn(q, rq); + if (e->ops->elevator_merged_fn) + e->ops->elevator_merged_fn(q, rq); } void elv_merge_requests(request_queue_t *q, struct request *rq, struct request *next) { - elevator_t *e = &q->elevator; + elevator_t *e = q->elevator; if (q->last_merge == next) q->last_merge = NULL; - if (e->elevator_merge_req_fn) - e->elevator_merge_req_fn(q, rq, next); + if (e->ops->elevator_merge_req_fn) + e->ops->elevator_merge_req_fn(q, rq, next); } void elv_requeue_request(request_queue_t *q, struct request *rq) @@ -160,8 +251,8 @@ void elv_requeue_request(request_queue_t *q, struct request *rq) * if iosched has an explicit requeue hook, then use that. otherwise * just put the request at the front of the queue */ - if (q->elevator.elevator_requeue_req_fn) - q->elevator.elevator_requeue_req_fn(q, rq); + if (q->elevator->ops->elevator_requeue_req_fn) + q->elevator->ops->elevator_requeue_req_fn(q, rq); else __elv_add_request(q, rq, ELEVATOR_INSERT_FRONT, 0); } @@ -180,7 +271,7 @@ void __elv_add_request(request_queue_t *q, struct request *rq, int where, blk_plug_device(q); rq->q = q; - q->elevator.elevator_add_req_fn(q, rq, where); + q->elevator->ops->elevator_add_req_fn(q, rq, where); if (blk_queue_plugged(q)) { int nrq = q->rq.count[READ] + q->rq.count[WRITE] - q->in_flight; @@ -203,7 +294,7 @@ void elv_add_request(request_queue_t *q, struct request *rq, int where, static inline struct request *__elv_next_request(request_queue_t *q) { - return q->elevator.elevator_next_req_fn(q); + return q->elevator->ops->elevator_next_req_fn(q); } struct request *elv_next_request(request_queue_t *q) @@ -252,7 +343,7 @@ struct request *elv_next_request(request_queue_t *q) void elv_remove_request(request_queue_t *q, struct request *rq) { - elevator_t *e = &q->elevator; + elevator_t *e = q->elevator; /* * the time frame between a request being removed from the lists @@ -274,16 +365,16 @@ void elv_remove_request(request_queue_t *q, struct request *rq) if (rq == q->last_merge) q->last_merge = NULL; - if (e->elevator_remove_req_fn) - e->elevator_remove_req_fn(q, rq); + if (e->ops->elevator_remove_req_fn) + e->ops->elevator_remove_req_fn(q, rq); } int elv_queue_empty(request_queue_t *q) { - elevator_t *e = &q->elevator; + elevator_t *e = q->elevator; - if (e->elevator_queue_empty_fn) - return e->elevator_queue_empty_fn(q); + if (e->ops->elevator_queue_empty_fn) + return e->ops->elevator_queue_empty_fn(q); return list_empty(&q->queue_head); } @@ -292,10 +383,10 @@ struct request *elv_latter_request(request_queue_t *q, struct request *rq) { struct list_head *next; - elevator_t *e = &q->elevator; + elevator_t *e = q->elevator; - if (e->elevator_latter_req_fn) - return e->elevator_latter_req_fn(q, rq); + if (e->ops->elevator_latter_req_fn) + return e->ops->elevator_latter_req_fn(q, rq); next = rq->queuelist.next; if (next != &q->queue_head && next != &rq->queuelist) @@ -308,10 +399,10 @@ struct request *elv_former_request(request_queue_t *q, struct request *rq) { struct list_head *prev; - elevator_t *e = &q->elevator; + elevator_t *e = q->elevator; - if (e->elevator_former_req_fn) - return e->elevator_former_req_fn(q, rq); + if (e->ops->elevator_former_req_fn) + return e->ops->elevator_former_req_fn(q, rq); prev = rq->queuelist.prev; if (prev != &q->queue_head && prev != &rq->queuelist) @@ -322,10 +413,10 @@ struct request *elv_former_request(request_queue_t *q, struct request *rq) int elv_set_request(request_queue_t *q, struct request *rq, int gfp_mask) { - elevator_t *e = &q->elevator; + elevator_t *e = q->elevator; - if (e->elevator_set_req_fn) - return e->elevator_set_req_fn(q, rq, gfp_mask); + if (e->ops->elevator_set_req_fn) + return e->ops->elevator_set_req_fn(q, rq, gfp_mask); rq->elevator_private = NULL; return 0; @@ -333,25 +424,25 @@ int elv_set_request(request_queue_t *q, struct request *rq, int gfp_mask) void elv_put_request(request_queue_t *q, struct request *rq) { - elevator_t *e = &q->elevator; + elevator_t *e = q->elevator; - if (e->elevator_put_req_fn) - e->elevator_put_req_fn(q, rq); + if (e->ops->elevator_put_req_fn) + e->ops->elevator_put_req_fn(q, rq); } int elv_may_queue(request_queue_t *q, int rw) { - elevator_t *e = &q->elevator; + elevator_t *e = q->elevator; - if (e->elevator_may_queue_fn) - return e->elevator_may_queue_fn(q, rw); + if (e->ops->elevator_may_queue_fn) + return e->ops->elevator_may_queue_fn(q, rw); - return 0; + return ELV_MQUEUE_MAY; } void elv_completed_request(request_queue_t *q, struct request *rq) { - elevator_t *e = &q->elevator; + elevator_t *e = q->elevator; /* * request is released from the driver, io must be done @@ -359,22 +450,20 @@ void elv_completed_request(request_queue_t *q, struct request *rq) if (blk_account_rq(rq)) q->in_flight--; - if (e->elevator_completed_req_fn) - e->elevator_completed_req_fn(q, rq); + if (e->ops->elevator_completed_req_fn) + e->ops->elevator_completed_req_fn(q, rq); } int elv_register_queue(struct request_queue *q) { - elevator_t *e; - - e = &q->elevator; + elevator_t *e = q->elevator; e->kobj.parent = kobject_get(&q->kobj); if (!e->kobj.parent) return -EBUSY; snprintf(e->kobj.name, KOBJ_NAME_LEN, "%s", "iosched"); - e->kobj.ktype = e->elevator_ktype; + e->kobj.ktype = e->elevator_type->elevator_ktype; return kobject_register(&e->kobj); } @@ -382,12 +471,131 @@ int elv_register_queue(struct request_queue *q) void elv_unregister_queue(struct request_queue *q) { if (q) { - elevator_t * e = &q->elevator; + elevator_t *e = q->elevator; kobject_unregister(&e->kobj); kobject_put(&q->kobj); } } +int elv_register(struct elevator_type *e) +{ + if (elevator_find(e->elevator_name)) + BUG(); + + spin_lock_irq(&elv_list_lock); + list_add_tail(&e->list, &elv_list); + spin_unlock_irq(&elv_list_lock); + + printk("io scheduler %s registered\n", e->elevator_name); + return 0; +} +EXPORT_SYMBOL_GPL(elv_register); + +void elv_unregister(struct elevator_type *e) +{ + spin_lock_irq(&elv_list_lock); + list_del_init(&e->list); + spin_unlock_irq(&elv_list_lock); +} +EXPORT_SYMBOL_GPL(elv_unregister); + +/* + * switch to new_e io scheduler. be careful not to introduce deadlocks - + * we don't free the old io scheduler, before we have allocated what we + * need for the new one. this way we have a chance of going back to the old + * one, if the new one fails init for some reason + */ +static void elevator_switch(request_queue_t *q, struct elevator_type *new_e) +{ + elevator_t *e = kmalloc(sizeof(elevator_t), GFP_KERNEL); + elevator_t *old_elevator; + + if (!e) { + printk("elevator: out of memory\n"); + return; + } + + blk_wait_queue_drained(q); + + /* + * unregister old elevator data + */ + elv_unregister_queue(q); + old_elevator = q->elevator; + + /* + * attach and start new elevator + */ + if (elevator_attach(q, new_e, e)) + goto fail; + + if (elv_register_queue(q)) + goto fail_register; + + /* + * finally exit old elevator and start queue again + */ + elevator_exit(old_elevator); + blk_finish_queue_drain(q); + return; + +fail_register: + /* + * switch failed, exit the new io scheduler and reattach the old + * one again (along with re-adding the sysfs dir) + */ + elevator_exit(e); +fail: + q->elevator = old_elevator; + elv_register_queue(q); + blk_finish_queue_drain(q); + printk("elevator: switch to %s failed\n", new_e->elevator_name); +} + +ssize_t elv_iosched_store(request_queue_t *q, const char *name, size_t count) +{ + char elevator_name[ELV_NAME_MAX]; + struct elevator_type *e; + + memset(elevator_name, 0, sizeof(elevator_name)); + strncpy(elevator_name, name, sizeof(elevator_name)); + + if (elevator_name[strlen(elevator_name) - 1] == '\n') + elevator_name[strlen(elevator_name) - 1] = '\0'; + + e = elevator_find(elevator_name); + if (!e) { + printk("elevator: type %s not found\n", elevator_name); + return -EINVAL; + } + + elevator_switch(q, e); + return count; +} + +ssize_t elv_iosched_show(request_queue_t *q, char *name) +{ + elevator_t *e = q->elevator; + struct elevator_type *elv = e->elevator_type; + struct list_head *entry; + int len = 0; + + spin_lock_irq(q->queue_lock); + list_for_each(entry, &elv_list) { + struct elevator_type *__e; + + __e = list_entry(entry, struct elevator_type, list); + if (!strcmp(elv->elevator_name, __e->elevator_name)) + len += sprintf(name+len, "[%s] ", elv->elevator_name); + else + len += sprintf(name+len, "%s ", __e->elevator_name); + } + spin_unlock_irq(q->queue_lock); + + len += sprintf(len+name, "\n"); + return len; +} + module_init(elevator_global_init); EXPORT_SYMBOL(elv_add_request); diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c index 26fdf6be6bd0..3ba6430899df 100644 --- a/drivers/block/ll_rw_blk.c +++ b/drivers/block/ll_rw_blk.c @@ -243,6 +243,7 @@ void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn) blk_queue_hardsect_size(q, 512); blk_queue_dma_alignment(q, 511); blk_queue_congestion_threshold(q); + q->nr_batching = BLK_BATCH_REQ; q->unplug_thresh = 4; /* hmm */ q->unplug_delay = (3 * HZ) / 1000; /* 3 milliseconds */ @@ -1395,7 +1396,8 @@ void blk_cleanup_queue(request_queue_t * q) if (!atomic_dec_and_test(&q->refcnt)) return; - elevator_exit(q); + if (q->elevator) + elevator_exit(q->elevator); del_timer_sync(&q->unplug_timer); kblockd_flush(); @@ -1418,6 +1420,7 @@ static int blk_init_free_list(request_queue_t *q) rl->count[READ] = rl->count[WRITE] = 0; init_waitqueue_head(&rl->wait[READ]); init_waitqueue_head(&rl->wait[WRITE]); + init_waitqueue_head(&rl->drain); rl->rq_pool = mempool_create(BLKDEV_MIN_RQ, mempool_alloc_slab, mempool_free_slab, request_cachep); @@ -1429,45 +1432,6 @@ static int blk_init_free_list(request_queue_t *q) static int __make_request(request_queue_t *, struct bio *); -static elevator_t *chosen_elevator = -#if defined(CONFIG_IOSCHED_AS) - &iosched_as; -#elif defined(CONFIG_IOSCHED_DEADLINE) - &iosched_deadline; -#elif defined(CONFIG_IOSCHED_CFQ) - &iosched_cfq; -#elif defined(CONFIG_IOSCHED_NOOP) - &elevator_noop; -#else - NULL; -#error "You must have at least 1 I/O scheduler selected" -#endif - -#if defined(CONFIG_IOSCHED_AS) || defined(CONFIG_IOSCHED_DEADLINE) || defined (CONFIG_IOSCHED_NOOP) -static int __init elevator_setup(char *str) -{ -#ifdef CONFIG_IOSCHED_DEADLINE - if (!strcmp(str, "deadline")) - chosen_elevator = &iosched_deadline; -#endif -#ifdef CONFIG_IOSCHED_AS - if (!strcmp(str, "as")) - chosen_elevator = &iosched_as; -#endif -#ifdef CONFIG_IOSCHED_CFQ - if (!strcmp(str, "cfq")) - chosen_elevator = &iosched_cfq; -#endif -#ifdef CONFIG_IOSCHED_NOOP - if (!strcmp(str, "noop")) - chosen_elevator = &elevator_noop; -#endif - return 1; -} - -__setup("elevator=", elevator_setup); -#endif /* CONFIG_IOSCHED_AS || CONFIG_IOSCHED_DEADLINE || CONFIG_IOSCHED_NOOP */ - request_queue_t *blk_alloc_queue(int gfp_mask) { request_queue_t *q = kmem_cache_alloc(requestq_cachep, gfp_mask); @@ -1520,21 +1484,14 @@ EXPORT_SYMBOL(blk_alloc_queue); **/ request_queue_t *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock) { - request_queue_t *q; - static int printed; + request_queue_t *q = blk_alloc_queue(GFP_KERNEL); - q = blk_alloc_queue(GFP_KERNEL); if (!q) return NULL; if (blk_init_free_list(q)) goto out_init; - if (!printed) { - printed = 1; - printk("Using %s io scheduler\n", chosen_elevator->elevator_name); - } - q->request_fn = rfn; q->back_merge_fn = ll_back_merge_fn; q->front_merge_fn = ll_front_merge_fn; @@ -1555,8 +1512,10 @@ request_queue_t *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock) /* * all done */ - if (!elevator_init(q, chosen_elevator)) + if (!elevator_init(q, NULL)) { + blk_queue_congestion_threshold(q); return q; + } blk_cleanup_queue(q); out_init: @@ -1584,13 +1543,20 @@ static inline void blk_free_request(request_queue_t *q, struct request *rq) mempool_free(rq, q->rq.rq_pool); } -static inline struct request *blk_alloc_request(request_queue_t *q,int gfp_mask) +static inline struct request *blk_alloc_request(request_queue_t *q, int rw, + int gfp_mask) { struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask); if (!rq) return NULL; + /* + * first three bits are identical in rq->flags and bio->bi_rw, + * see bio.h and blkdev.h + */ + rq->flags = rw; + if (!elv_set_request(q, rq, gfp_mask)) return rq; @@ -1602,7 +1568,7 @@ static inline struct request *blk_alloc_request(request_queue_t *q,int gfp_mask) * ioc_batching returns true if the ioc is a valid batching request and * should be given priority access to a request. */ -static inline int ioc_batching(struct io_context *ioc) +static inline int ioc_batching(request_queue_t *q, struct io_context *ioc) { if (!ioc) return 0; @@ -1612,7 +1578,7 @@ static inline int ioc_batching(struct io_context *ioc) * even if the batch times out, otherwise we could theoretically * lose wakeups. */ - return ioc->nr_batch_requests == BLK_BATCH_REQ || + return ioc->nr_batch_requests == q->nr_batching || (ioc->nr_batch_requests > 0 && time_before(jiffies, ioc->last_waited + BLK_BATCH_TIME)); } @@ -1623,12 +1589,12 @@ static inline int ioc_batching(struct io_context *ioc) * is the behaviour we want though - once it gets a wakeup it should be given * a nice run. */ -void ioc_set_batching(struct io_context *ioc) +void ioc_set_batching(request_queue_t *q, struct io_context *ioc) { - if (!ioc || ioc_batching(ioc)) + if (!ioc || ioc_batching(q, ioc)) return; - ioc->nr_batch_requests = BLK_BATCH_REQ; + ioc->nr_batch_requests = q->nr_batching; ioc->last_waited = jiffies; } @@ -1644,11 +1610,14 @@ static void freed_request(request_queue_t *q, int rw) if (rl->count[rw] < queue_congestion_off_threshold(q)) clear_queue_congested(q, rw); if (rl->count[rw]+1 <= q->nr_requests) { + smp_mb(); if (waitqueue_active(&rl->wait[rw])) wake_up(&rl->wait[rw]); - if (!waitqueue_active(&rl->wait[rw])) - blk_clear_queue_full(q, rw); + blk_clear_queue_full(q, rw); } + if (unlikely(waitqueue_active(&rl->drain)) && + !rl->count[READ] && !rl->count[WRITE]) + wake_up(&rl->drain); } #define blkdev_free_rq(list) list_entry((list)->next, struct request, queuelist) @@ -1661,6 +1630,9 @@ static struct request *get_request(request_queue_t *q, int rw, int gfp_mask) struct request_list *rl = &q->rq; struct io_context *ioc = get_io_context(gfp_mask); + if (unlikely(test_bit(QUEUE_FLAG_DRAIN, &q->queue_flags))) + return NULL; + spin_lock_irq(q->queue_lock); if (rl->count[rw]+1 >= q->nr_requests) { /* @@ -1670,13 +1642,22 @@ static struct request *get_request(request_queue_t *q, int rw, int gfp_mask) * will be blocked. */ if (!blk_queue_full(q, rw)) { - ioc_set_batching(ioc); + ioc_set_batching(q, ioc); blk_set_queue_full(q, rw); } } - if (blk_queue_full(q, rw) - && !ioc_batching(ioc) && !elv_may_queue(q, rw)) { + switch (elv_may_queue(q, rw)) { + case ELV_MQUEUE_NO: + spin_unlock_irq(q->queue_lock); + goto out; + case ELV_MQUEUE_MAY: + break; + case ELV_MQUEUE_MUST: + goto get_rq; + } + + if (blk_queue_full(q, rw) && !ioc_batching(q, ioc)) { /* * The queue is full and the allocating process is not a * "batcher", and not exempted by the IO scheduler @@ -1685,12 +1666,13 @@ static struct request *get_request(request_queue_t *q, int rw, int gfp_mask) goto out; } +get_rq: rl->count[rw]++; if (rl->count[rw] >= queue_congestion_on_threshold(q)) set_queue_congested(q, rw); spin_unlock_irq(q->queue_lock); - rq = blk_alloc_request(q, gfp_mask); + rq = blk_alloc_request(q, rw, gfp_mask); if (!rq) { /* * Allocation failed presumably due to memory. Undo anything @@ -1705,17 +1687,11 @@ static struct request *get_request(request_queue_t *q, int rw, int gfp_mask) goto out; } - if (ioc_batching(ioc)) + if (ioc_batching(q, ioc)) ioc->nr_batch_requests--; INIT_LIST_HEAD(&rq->queuelist); - /* - * first three bits are identical in rq->flags and bio->bi_rw, - * see bio.h and blkdev.h - */ - rq->flags = rw; - rq->errors = 0; rq->rq_status = RQ_ACTIVE; rq->bio = rq->biotail = NULL; @@ -1764,7 +1740,7 @@ static struct request *get_request_wait(request_queue_t *q, int rw) * See ioc_batching, ioc_set_batching */ ioc = get_io_context(GFP_NOIO); - ioc_set_batching(ioc); + ioc_set_batching(q, ioc); put_io_context(ioc); } finish_wait(&rl->wait[rw], &wait); @@ -2506,6 +2482,70 @@ static inline void blk_partition_remap(struct bio *bio) } } +void blk_finish_queue_drain(request_queue_t *q) +{ + struct request_list *rl = &q->rq; + + clear_bit(QUEUE_FLAG_DRAIN, &q->queue_flags); + wake_up(&rl->wait[0]); + wake_up(&rl->wait[1]); + wake_up(&rl->drain); +} + +/* + * We rely on the fact that only requests allocated through blk_alloc_request() + * have io scheduler private data structures associated with them. Any other + * type of request (allocated on stack or through kmalloc()) should not go + * to the io scheduler core, but be attached to the queue head instead. + */ +void blk_wait_queue_drained(request_queue_t *q) +{ + struct request_list *rl = &q->rq; + DEFINE_WAIT(wait); + + spin_lock_irq(q->queue_lock); + set_bit(QUEUE_FLAG_DRAIN, &q->queue_flags); + + while (rl->count[READ] || rl->count[WRITE]) { + prepare_to_wait(&rl->drain, &wait, TASK_UNINTERRUPTIBLE); + + if (rl->count[READ] || rl->count[WRITE]) { + __generic_unplug_device(q); + spin_unlock_irq(q->queue_lock); + io_schedule(); + spin_lock_irq(q->queue_lock); + } + + finish_wait(&rl->drain, &wait); + } + + spin_unlock_irq(q->queue_lock); +} + +/* + * block waiting for the io scheduler being started again. + */ +static inline void block_wait_queue_running(request_queue_t *q) +{ + DEFINE_WAIT(wait); + + while (test_bit(QUEUE_FLAG_DRAIN, &q->queue_flags)) { + struct request_list *rl = &q->rq; + + prepare_to_wait_exclusive(&rl->drain, &wait, + TASK_UNINTERRUPTIBLE); + + /* + * re-check the condition. avoids using prepare_to_wait() + * in the fast path (queue is running) + */ + if (test_bit(QUEUE_FLAG_DRAIN, &q->queue_flags)) + io_schedule(); + + finish_wait(&rl->drain, &wait); + } +} + /** * generic_make_request: hand a buffer to its device driver for I/O * @bio: The bio describing the location in memory and on the device. @@ -2595,6 +2635,8 @@ end_io: if (test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)) goto end_io; + block_wait_queue_running(q); + /* * If this device has partitions, remap block n * of partition p to block n+start(p) of the disk. @@ -3018,6 +3060,7 @@ void kblockd_flush(void) { flush_workqueue(kblockd_workqueue); } +EXPORT_SYMBOL(kblockd_flush); int __init blk_dev_init(void) { @@ -3036,6 +3079,7 @@ int __init blk_dev_init(void) blk_max_low_pfn = max_low_pfn; blk_max_pfn = max_pfn; + return 0; } @@ -3052,9 +3096,13 @@ void put_io_context(struct io_context *ioc) if (atomic_dec_and_test(&ioc->refcount)) { if (ioc->aic && ioc->aic->dtor) ioc->aic->dtor(ioc->aic); + if (ioc->cic && ioc->cic->dtor) + ioc->cic->dtor(ioc->cic); + kmem_cache_free(iocontext_cachep, ioc); } } +EXPORT_SYMBOL(put_io_context); /* Called by the exitting task */ void exit_io_context(void) @@ -3064,14 +3112,15 @@ void exit_io_context(void) local_irq_save(flags); ioc = current->io_context; - if (ioc) { - if (ioc->aic && ioc->aic->exit) - ioc->aic->exit(ioc->aic); - put_io_context(ioc); - current->io_context = NULL; - } else - WARN_ON(1); + current->io_context = NULL; local_irq_restore(flags); + + if (ioc->aic && ioc->aic->exit) + ioc->aic->exit(ioc->aic); + if (ioc->cic && ioc->cic->exit) + ioc->cic->exit(ioc->cic); + + put_io_context(ioc); } /* @@ -3090,22 +3139,42 @@ struct io_context *get_io_context(int gfp_flags) local_irq_save(flags); ret = tsk->io_context; - if (ret == NULL) { - ret = kmem_cache_alloc(iocontext_cachep, GFP_ATOMIC); - if (ret) { - atomic_set(&ret->refcount, 1); - ret->pid = tsk->pid; - ret->last_waited = jiffies; /* doesn't matter... */ - ret->nr_batch_requests = 0; /* because this is 0 */ - ret->aic = NULL; + if (ret) + goto out; + + local_irq_restore(flags); + + ret = kmem_cache_alloc(iocontext_cachep, gfp_flags); + if (ret) { + atomic_set(&ret->refcount, 1); + ret->pid = tsk->pid; + ret->last_waited = jiffies; /* doesn't matter... */ + ret->nr_batch_requests = 0; /* because this is 0 */ + ret->aic = NULL; + ret->cic = NULL; + spin_lock_init(&ret->lock); + + local_irq_save(flags); + + /* + * very unlikely, someone raced with us in setting up the task + * io context. free new context and just grab a reference. + */ + if (!tsk->io_context) tsk->io_context = ret; + else { + kmem_cache_free(iocontext_cachep, ret); + ret = tsk->io_context; } - } - if (ret) + +out: atomic_inc(&ret->refcount); - local_irq_restore(flags); + local_irq_restore(flags); + } + return ret; } +EXPORT_SYMBOL(get_io_context); void copy_io_context(struct io_context **pdst, struct io_context **psrc) { @@ -3119,6 +3188,7 @@ void copy_io_context(struct io_context **pdst, struct io_context **psrc) *pdst = src; } } +EXPORT_SYMBOL(copy_io_context); void swap_io_context(struct io_context **ioc1, struct io_context **ioc2) { @@ -3127,7 +3197,7 @@ void swap_io_context(struct io_context **ioc1, struct io_context **ioc2) *ioc1 = *ioc2; *ioc2 = temp; } - +EXPORT_SYMBOL(swap_io_context); /* * sysfs parts below @@ -3285,11 +3355,18 @@ static struct queue_sysfs_entry queue_max_hw_sectors_entry = { .show = queue_max_hw_sectors_show, }; +static struct queue_sysfs_entry queue_iosched_entry = { + .attr = {.name = "scheduler", .mode = S_IRUGO | S_IWUSR }, + .show = elv_iosched_show, + .store = elv_iosched_store, +}; + static struct attribute *default_attrs[] = { &queue_requests_entry.attr, &queue_ra_entry.attr, &queue_max_hw_sectors_entry.attr, &queue_max_sectors_entry.attr, + &queue_iosched_entry.attr, NULL, }; diff --git a/drivers/block/noop-iosched.c b/drivers/block/noop-iosched.c index ffef40be1f92..707dddd7d881 100644 --- a/drivers/block/noop-iosched.c +++ b/drivers/block/noop-iosched.c @@ -83,12 +83,31 @@ struct request *elevator_noop_next_request(request_queue_t *q) return NULL; } -elevator_t elevator_noop = { - .elevator_merge_fn = elevator_noop_merge, - .elevator_merge_req_fn = elevator_noop_merge_requests, - .elevator_next_req_fn = elevator_noop_next_request, - .elevator_add_req_fn = elevator_noop_add_request, - .elevator_name = "noop", +static struct elevator_type elevator_noop = { + .ops = { + .elevator_merge_fn = elevator_noop_merge, + .elevator_merge_req_fn = elevator_noop_merge_requests, + .elevator_next_req_fn = elevator_noop_next_request, + .elevator_add_req_fn = elevator_noop_add_request, + }, + .elevator_name = "noop", + .elevator_owner = THIS_MODULE, }; -EXPORT_SYMBOL(elevator_noop); +int noop_init(void) +{ + return elv_register(&elevator_noop); +} + +void noop_exit(void) +{ + elv_unregister(&elevator_noop); +} + +module_init(noop_init); +module_exit(noop_exit); + + +MODULE_AUTHOR("Jens Axboe"); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("No-op IO scheduler"); diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c new file mode 100644 index 000000000000..fb80b6a91f84 --- /dev/null +++ b/drivers/block/pktcdvd.c @@ -0,0 +1,2679 @@ +/* + * Copyright (C) 2000 Jens Axboe <axboe@suse.de> + * Copyright (C) 2001-2004 Peter Osterlund <petero2@telia.com> + * + * May be copied or modified under the terms of the GNU General Public + * License. See linux/COPYING for more information. + * + * Packet writing layer for ATAPI and SCSI CD-R, CD-RW, DVD-R, and + * DVD-RW devices (aka an exercise in block layer masturbation) + * + * + * TODO: (circa order of when I will fix it) + * - Only able to write on CD-RW media right now. + * - check host application code on media and set it in write page + * - interface for UDF <-> packet to negotiate a new location when a write + * fails. + * - handle OPC, especially for -RW media + * + * Theory of operation: + * + * We use a custom make_request_fn function that forwards reads directly to + * the underlying CD device. Write requests are either attached directly to + * a live packet_data object, or simply stored sequentially in a list for + * later processing by the kcdrwd kernel thread. This driver doesn't use + * any elevator functionally as defined by the elevator_s struct, but the + * underlying CD device uses a standard elevator. + * + * This strategy makes it possible to do very late merging of IO requests. + * A new bio sent to pkt_make_request can be merged with a live packet_data + * object even if the object is in the data gathering state. + * + *************************************************************************/ + +#define VERSION_CODE "v0.2.0a 2004-07-14 Jens Axboe (axboe@suse.de) and petero2@telia.com" + +#include <linux/pktcdvd.h> +#include <linux/config.h> +#include <linux/module.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/kthread.h> +#include <linux/errno.h> +#include <linux/spinlock.h> +#include <linux/file.h> +#include <linux/proc_fs.h> +#include <linux/seq_file.h> +#include <linux/miscdevice.h> +#include <linux/suspend.h> +#include <scsi/scsi_cmnd.h> +#include <scsi/scsi_ioctl.h> + +#include <asm/uaccess.h> + +#if PACKET_DEBUG +#define DPRINTK(fmt, args...) printk(KERN_NOTICE fmt, ##args) +#else +#define DPRINTK(fmt, args...) +#endif + +#if PACKET_DEBUG > 1 +#define VPRINTK(fmt, args...) printk(KERN_NOTICE fmt, ##args) +#else +#define VPRINTK(fmt, args...) +#endif + +#define MAX_SPEED 0xffff + +#define ZONE(sector, pd) (((sector) + (pd)->offset) & ~((pd)->settings.size - 1)) + +static struct pktcdvd_device *pkt_devs[MAX_WRITERS]; +static struct proc_dir_entry *pkt_proc; +static int pkt_major; +static struct semaphore ctl_mutex; /* Serialize open/close/setup/teardown */ +static mempool_t *psd_pool; + + +static void pkt_bio_finished(struct pktcdvd_device *pd) +{ + BUG_ON(atomic_read(&pd->cdrw.pending_bios) <= 0); + if (atomic_dec_and_test(&pd->cdrw.pending_bios)) { + VPRINTK("pktcdvd: queue empty\n"); + atomic_set(&pd->iosched.attention, 1); + wake_up(&pd->wqueue); + } +} + +static void pkt_bio_destructor(struct bio *bio) +{ + kfree(bio->bi_io_vec); + kfree(bio); +} + +static struct bio *pkt_bio_alloc(int nr_iovecs) +{ + struct bio_vec *bvl = NULL; + struct bio *bio; + + bio = kmalloc(sizeof(struct bio), GFP_KERNEL); + if (!bio) + goto no_bio; + bio_init(bio); + + bvl = kmalloc(nr_iovecs * sizeof(struct bio_vec), GFP_KERNEL); + if (!bvl) + goto no_bvl; + memset(bvl, 0, nr_iovecs * sizeof(struct bio_vec)); + + bio->bi_max_vecs = nr_iovecs; + bio->bi_io_vec = bvl; + bio->bi_destructor = pkt_bio_destructor; + + return bio; + + no_bvl: + kfree(bio); + no_bio: + return NULL; +} + +/* + * Allocate a packet_data struct + */ +static struct packet_data *pkt_alloc_packet_data(void) +{ + int i; + struct packet_data *pkt; + + pkt = kmalloc(sizeof(struct packet_data), GFP_KERNEL); + if (!pkt) + goto no_pkt; + memset(pkt, 0, sizeof(struct packet_data)); + + pkt->w_bio = pkt_bio_alloc(PACKET_MAX_SIZE); + if (!pkt->w_bio) + goto no_bio; + + for (i = 0; i < PAGES_PER_PACKET; i++) { + pkt->pages[i] = alloc_page(GFP_KERNEL); + if (!pkt->pages[i]) + goto no_page; + } + for (i = 0; i < PAGES_PER_PACKET; i++) + clear_page(page_address(pkt->pages[i])); + + spin_lock_init(&pkt->lock); + + for (i = 0; i < PACKET_MAX_SIZE; i++) { + struct bio *bio = pkt_bio_alloc(1); + if (!bio) + goto no_rd_bio; + pkt->r_bios[i] = bio; + } + + return pkt; + +no_rd_bio: + for (i = 0; i < PACKET_MAX_SIZE; i++) { + struct bio *bio = pkt->r_bios[i]; + if (bio) + bio_put(bio); + } + +no_page: + for (i = 0; i < PAGES_PER_PACKET; i++) + if (pkt->pages[i]) + __free_page(pkt->pages[i]); + bio_put(pkt->w_bio); +no_bio: + kfree(pkt); +no_pkt: + return NULL; +} + +/* + * Free a packet_data struct + */ +static void pkt_free_packet_data(struct packet_data *pkt) +{ + int i; + + for (i = 0; i < PACKET_MAX_SIZE; i++) { + struct bio *bio = pkt->r_bios[i]; + if (bio) + bio_put(bio); + } + for (i = 0; i < PAGES_PER_PACKET; i++) + __free_page(pkt->pages[i]); + bio_put(pkt->w_bio); + kfree(pkt); +} + +static void pkt_shrink_pktlist(struct pktcdvd_device *pd) +{ + struct packet_data *pkt, *next; + + BUG_ON(!list_empty(&pd->cdrw.pkt_active_list)); + + list_for_each_entry_safe(pkt, next, &pd->cdrw.pkt_free_list, list) { + pkt_free_packet_data(pkt); + } +} + +static int pkt_grow_pktlist(struct pktcdvd_device *pd, int nr_packets) +{ + struct packet_data *pkt; + + INIT_LIST_HEAD(&pd->cdrw.pkt_free_list); + INIT_LIST_HEAD(&pd->cdrw.pkt_active_list); + spin_lock_init(&pd->cdrw.active_list_lock); + while (nr_packets > 0) { + pkt = pkt_alloc_packet_data(); + if (!pkt) { + pkt_shrink_pktlist(pd); + return 0; + } + pkt->id = nr_packets; + pkt->pd = pd; + list_add(&pkt->list, &pd->cdrw.pkt_free_list); + nr_packets--; + } + return 1; +} + +static void *pkt_rb_alloc(int gfp_mask, void *data) +{ + return kmalloc(sizeof(struct pkt_rb_node), gfp_mask); +} + +static void pkt_rb_free(void *ptr, void *data) +{ + kfree(ptr); +} + +static inline struct pkt_rb_node *pkt_rbtree_next(struct pkt_rb_node *node) +{ + struct rb_node *n = rb_next(&node->rb_node); + if (!n) + return NULL; + return rb_entry(n, struct pkt_rb_node, rb_node); +} + +static inline void pkt_rbtree_erase(struct pktcdvd_device *pd, struct pkt_rb_node *node) +{ + rb_erase(&node->rb_node, &pd->bio_queue); + mempool_free(node, pd->rb_pool); + pd->bio_queue_size--; + BUG_ON(pd->bio_queue_size < 0); +} + +/* + * Find the first node in the pd->bio_queue rb tree with a starting sector >= s. + */ +static struct pkt_rb_node *pkt_rbtree_find(struct pktcdvd_device *pd, sector_t s) +{ + struct rb_node *n = pd->bio_queue.rb_node; + struct rb_node *next; + struct pkt_rb_node *tmp; + + if (!n) { + BUG_ON(pd->bio_queue_size > 0); + return NULL; + } + + for (;;) { + tmp = rb_entry(n, struct pkt_rb_node, rb_node); + if (s <= tmp->bio->bi_sector) + next = n->rb_left; + else + next = n->rb_right; + if (!next) + break; + n = next; + } + + if (s > tmp->bio->bi_sector) { + tmp = pkt_rbtree_next(tmp); + if (!tmp) + return NULL; + } + BUG_ON(s > tmp->bio->bi_sector); + return tmp; +} + +/* + * Insert a node into the pd->bio_queue rb tree. + */ +static void pkt_rbtree_insert(struct pktcdvd_device *pd, struct pkt_rb_node *node) +{ + struct rb_node **p = &pd->bio_queue.rb_node; + struct rb_node *parent = NULL; + sector_t s = node->bio->bi_sector; + struct pkt_rb_node *tmp; + + while (*p) { + parent = *p; + tmp = rb_entry(parent, struct pkt_rb_node, rb_node); + if (s < tmp->bio->bi_sector) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + } + rb_link_node(&node->rb_node, parent, p); + rb_insert_color(&node->rb_node, &pd->bio_queue); + pd->bio_queue_size++; +} + +/* + * Add a bio to a single linked list defined by its head and tail pointers. + */ +static inline void pkt_add_list_last(struct bio *bio, struct bio **list_head, struct bio **list_tail) +{ + bio->bi_next = NULL; + if (*list_tail) { + BUG_ON((*list_head) == NULL); + (*list_tail)->bi_next = bio; + (*list_tail) = bio; + } else { + BUG_ON((*list_head) != NULL); + (*list_head) = bio; + (*list_tail) = bio; + } +} + +/* + * Remove and return the first bio from a single linked list defined by its + * head and tail pointers. + */ +static inline struct bio *pkt_get_list_first(struct bio **list_head, struct bio **list_tail) +{ + struct bio *bio; + + if (*list_head == NULL) + return NULL; + + bio = *list_head; + *list_head = bio->bi_next; + if (*list_head == NULL) + *list_tail = NULL; + + bio->bi_next = NULL; + return bio; +} + +/* + * Send a packet_command to the underlying block device and + * wait for completion. + */ +static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command *cgc) +{ + char sense[SCSI_SENSE_BUFFERSIZE]; + request_queue_t *q; + struct request *rq; + DECLARE_COMPLETION(wait); + int err = 0; + + q = bdev_get_queue(pd->bdev); + + rq = blk_get_request(q, (cgc->data_direction == CGC_DATA_WRITE) ? WRITE : READ, + __GFP_WAIT); + rq->errors = 0; + rq->rq_disk = pd->bdev->bd_disk; + rq->bio = NULL; + rq->buffer = NULL; + rq->timeout = 60*HZ; + rq->data = cgc->buffer; + rq->data_len = cgc->buflen; + rq->sense = sense; + memset(sense, 0, sizeof(sense)); + rq->sense_len = 0; + rq->flags |= REQ_BLOCK_PC | REQ_HARDBARRIER; + if (cgc->quiet) + rq->flags |= REQ_QUIET; + memcpy(rq->cmd, cgc->cmd, CDROM_PACKET_SIZE); + if (sizeof(rq->cmd) > CDROM_PACKET_SIZE) + memset(rq->cmd + CDROM_PACKET_SIZE, 0, sizeof(rq->cmd) - CDROM_PACKET_SIZE); + + rq->ref_count++; + rq->flags |= REQ_NOMERGE; + rq->waiting = &wait; + elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 1); + generic_unplug_device(q); + wait_for_completion(&wait); + + if (rq->errors) + err = -EIO; + + blk_put_request(rq); + return err; +} + +/* + * A generic sense dump / resolve mechanism should be implemented across + * all ATAPI + SCSI devices. + */ +static void pkt_dump_sense(struct packet_command *cgc) +{ + static char *info[9] = { "No sense", "Recovered error", "Not ready", + "Medium error", "Hardware error", "Illegal request", + "Unit attention", "Data protect", "Blank check" }; + int i; + struct request_sense *sense = cgc->sense; + + printk("pktcdvd:"); + for (i = 0; i < CDROM_PACKET_SIZE; i++) + printk(" %02x", cgc->cmd[i]); + printk(" - "); + + if (sense == NULL) { + printk("no sense\n"); + return; + } + + printk("sense %02x.%02x.%02x", sense->sense_key, sense->asc, sense->ascq); + + if (sense->sense_key > 8) { + printk(" (INVALID)\n"); + return; + } + + printk(" (%s)\n", info[sense->sense_key]); +} + +/* + * flush the drive cache to media + */ +static int pkt_flush_cache(struct pktcdvd_device *pd) +{ + struct packet_command cgc; + + init_cdrom_command(&cgc, NULL, 0, CGC_DATA_NONE); + cgc.cmd[0] = GPCMD_FLUSH_CACHE; + cgc.quiet = 1; + + /* + * the IMMED bit -- we default to not setting it, although that + * would allow a much faster close, this is safer + */ +#if 0 + cgc.cmd[1] = 1 << 1; +#endif + return pkt_generic_packet(pd, &cgc); +} + +/* + * speed is given as the normal factor, e.g. 4 for 4x + */ +static int pkt_set_speed(struct pktcdvd_device *pd, unsigned write_speed, unsigned read_speed) +{ + struct packet_command cgc; + struct request_sense sense; + int ret; + + init_cdrom_command(&cgc, NULL, 0, CGC_DATA_NONE); + cgc.sense = &sense; + cgc.cmd[0] = GPCMD_SET_SPEED; + cgc.cmd[2] = (read_speed >> 8) & 0xff; + cgc.cmd[3] = read_speed & 0xff; + cgc.cmd[4] = (write_speed >> 8) & 0xff; + cgc.cmd[5] = write_speed & 0xff; + + if ((ret = pkt_generic_packet(pd, &cgc))) + pkt_dump_sense(&cgc); + + return ret; +} + +/* + * Queue a bio for processing by the low-level CD device. Must be called + * from process context. + */ +static void pkt_queue_bio(struct pktcdvd_device *pd, struct bio *bio, int high_prio_read) +{ + spin_lock(&pd->iosched.lock); + if (bio_data_dir(bio) == READ) { + pkt_add_list_last(bio, &pd->iosched.read_queue, + &pd->iosched.read_queue_tail); + if (high_prio_read) + pd->iosched.high_prio_read = 1; + } else { + pkt_add_list_last(bio, &pd->iosched.write_queue, + &pd->iosched.write_queue_tail); + } + spin_unlock(&pd->iosched.lock); + + atomic_set(&pd->iosched.attention, 1); + wake_up(&pd->wqueue); +} + +/* + * Process the queued read/write requests. This function handles special + * requirements for CDRW drives: + * - A cache flush command must be inserted before a read request if the + * previous request was a write. + * - Switching between reading and writing is slow, so don't it more often + * than necessary. + * - Set the read speed according to current usage pattern. When only reading + * from the device, it's best to use the highest possible read speed, but + * when switching often between reading and writing, it's better to have the + * same read and write speeds. + * - Reads originating from user space should have higher priority than reads + * originating from pkt_gather_data, because some process is usually waiting + * on reads of the first kind. + */ +static void pkt_iosched_process_queue(struct pktcdvd_device *pd) +{ + request_queue_t *q; + + if (atomic_read(&pd->iosched.attention) == 0) + return; + atomic_set(&pd->iosched.attention, 0); + + q = bdev_get_queue(pd->bdev); + + for (;;) { + struct bio *bio; + int reads_queued, writes_queued, high_prio_read; + + spin_lock(&pd->iosched.lock); + reads_queued = (pd->iosched.read_queue != NULL); + writes_queued = (pd->iosched.write_queue != NULL); + if (!reads_queued) + pd->iosched.high_prio_read = 0; + high_prio_read = pd->iosched.high_prio_read; + spin_unlock(&pd->iosched.lock); + + if (!reads_queued && !writes_queued) + break; + + if (pd->iosched.writing) { + if (high_prio_read || (!writes_queued && reads_queued)) { + if (atomic_read(&pd->cdrw.pending_bios) > 0) { + VPRINTK("pktcdvd: write, waiting\n"); + break; + } + pkt_flush_cache(pd); + pd->iosched.writing = 0; + } + } else { + if (!reads_queued && writes_queued) { + if (atomic_read(&pd->cdrw.pending_bios) > 0) { + VPRINTK("pktcdvd: read, waiting\n"); + break; + } + pd->iosched.writing = 1; + } + } + + spin_lock(&pd->iosched.lock); + if (pd->iosched.writing) { + bio = pkt_get_list_first(&pd->iosched.write_queue, + &pd->iosched.write_queue_tail); + } else { + bio = pkt_get_list_first(&pd->iosched.read_queue, + &pd->iosched.read_queue_tail); + } + spin_unlock(&pd->iosched.lock); + + if (!bio) + continue; + + if (bio_data_dir(bio) == READ) + pd->iosched.successive_reads += bio->bi_size >> 10; + else + pd->iosched.successive_reads = 0; + if (pd->iosched.successive_reads >= HI_SPEED_SWITCH) { + if (pd->read_speed == pd->write_speed) { + pd->read_speed = MAX_SPEED; + pkt_set_speed(pd, pd->write_speed, pd->read_speed); + } + } else { + if (pd->read_speed != pd->write_speed) { + pd->read_speed = pd->write_speed; + pkt_set_speed(pd, pd->write_speed, pd->read_speed); + } + } + + atomic_inc(&pd->cdrw.pending_bios); + generic_make_request(bio); + } +} + +/* + * Special care is needed if the underlying block device has a small + * max_phys_segments value. + */ +static int pkt_set_segment_merging(struct pktcdvd_device *pd, request_queue_t *q) +{ + if ((pd->settings.size << 9) / CD_FRAMESIZE <= q->max_phys_segments) { + /* + * The cdrom device can handle one segment/frame + */ + clear_bit(PACKET_MERGE_SEGS, &pd->flags); + return 0; + } else if ((pd->settings.size << 9) / PAGE_SIZE <= q->max_phys_segments) { + /* + * We can handle this case at the expense of some extra memory + * copies during write operations + */ + set_bit(PACKET_MERGE_SEGS, &pd->flags); + return 0; + } else { + printk("pktcdvd: cdrom max_phys_segments too small\n"); + return -EIO; + } +} + +/* + * Copy CD_FRAMESIZE bytes from src_bio into a destination page + */ +static void pkt_copy_bio_data(struct bio *src_bio, int seg, int offs, + struct page *dst_page, int dst_offs) +{ + unsigned int copy_size = CD_FRAMESIZE; + + while (copy_size > 0) { + struct bio_vec *src_bvl = bio_iovec_idx(src_bio, seg); + void *vfrom = kmap_atomic(src_bvl->bv_page, KM_USER0) + + src_bvl->bv_offset + offs; + void *vto = page_address(dst_page) + dst_offs; + int len = min_t(int, copy_size, src_bvl->bv_len - offs); + + BUG_ON(len < 0); + memcpy(vto, vfrom, len); + kunmap_atomic(src_bvl->bv_page, KM_USER0); + + seg++; + offs = 0; + dst_offs += len; + copy_size -= len; + } +} + +/* + * Copy all data for this packet to pkt->pages[], so that + * a) The number of required segments for the write bio is minimized, which + * is necessary for some scsi controllers. + * b) The data can be used as cache to avoid read requests if we receive a + * new write request for the same zone. + */ +static void pkt_make_local_copy(struct packet_data *pkt, struct page **pages, int *offsets) +{ + int f, p, offs; + + /* Copy all data to pkt->pages[] */ + p = 0; + offs = 0; + for (f = 0; f < pkt->frames; f++) { + if (pages[f] != pkt->pages[p]) { + void *vfrom = kmap_atomic(pages[f], KM_USER0) + offsets[f]; + void *vto = page_address(pkt->pages[p]) + offs; + memcpy(vto, vfrom, CD_FRAMESIZE); + kunmap_atomic(pages[f], KM_USER0); + pages[f] = pkt->pages[p]; + offsets[f] = offs; + } else { + BUG_ON(offsets[f] != offs); + } + offs += CD_FRAMESIZE; + if (offs >= PAGE_SIZE) { + BUG_ON(offs > PAGE_SIZE); + offs = 0; + p++; + } + } +} + +static int pkt_end_io_read(struct bio *bio, unsigned int bytes_done, int err) +{ + struct packet_data *pkt = bio->bi_private; + struct pktcdvd_device *pd = pkt->pd; + BUG_ON(!pd); + + if (bio->bi_size) + return 1; + + VPRINTK("pkt_end_io_read: bio=%p sec0=%llx sec=%llx err=%d\n", bio, + (unsigned long long)pkt->sector, (unsigned long long)bio->bi_sector, err); + + if (err) + atomic_inc(&pkt->io_errors); + if (atomic_dec_and_test(&pkt->io_wait)) { + atomic_inc(&pkt->run_sm); + wake_up(&pd->wqueue); + } + pkt_bio_finished(pd); + + return 0; +} + +static int pkt_end_io_packet_write(struct bio *bio, unsigned int bytes_done, int err) +{ + struct packet_data *pkt = bio->bi_private; + struct pktcdvd_device *pd = pkt->pd; + BUG_ON(!pd); + + if (bio->bi_size) + return 1; + + VPRINTK("pkt_end_io_packet_write: id=%d, err=%d\n", pkt->id, err); + + pd->stats.pkt_ended++; + + pkt_bio_finished(pd); + atomic_dec(&pkt->io_wait); + atomic_inc(&pkt->run_sm); + wake_up(&pd->wqueue); + return 0; +} + +/* + * Schedule reads for the holes in a packet + */ +static void pkt_gather_data(struct pktcdvd_device *pd, struct packet_data *pkt) +{ + int frames_read = 0; + struct bio *bio; + int f; + char written[PACKET_MAX_SIZE]; + + BUG_ON(!pkt->orig_bios); + + atomic_set(&pkt->io_wait, 0); + atomic_set(&pkt->io_errors, 0); + + if (pkt->cache_valid) { + VPRINTK("pkt_gather_data: zone %llx cached\n", + (unsigned long long)pkt->sector); + goto out_account; + } + + /* + * Figure out which frames we need to read before we can write. + */ + memset(written, 0, sizeof(written)); + spin_lock(&pkt->lock); + for (bio = pkt->orig_bios; bio; bio = bio->bi_next) { + int first_frame = (bio->bi_sector - pkt->sector) / (CD_FRAMESIZE >> 9); + int num_frames = bio->bi_size / CD_FRAMESIZE; + BUG_ON(first_frame < 0); + BUG_ON(first_frame + num_frames > pkt->frames); + for (f = first_frame; f < first_frame + num_frames; f++) + written[f] = 1; + } + spin_unlock(&pkt->lock); + + /* + * Schedule reads for missing parts of the packet. + */ + for (f = 0; f < pkt->frames; f++) { + int p, offset; + if (written[f]) + continue; + bio = pkt->r_bios[f]; + bio_init(bio); + bio->bi_max_vecs = 1; + bio->bi_sector = pkt->sector + f * (CD_FRAMESIZE >> 9); + bio->bi_bdev = pd->bdev; + bio->bi_end_io = pkt_end_io_read; + bio->bi_private = pkt; + + p = (f * CD_FRAMESIZE) / PAGE_SIZE; + offset = (f * CD_FRAMESIZE) % PAGE_SIZE; + VPRINTK("pkt_gather_data: Adding frame %d, page:%p offs:%d\n", + f, pkt->pages[p], offset); + if (!bio_add_page(bio, pkt->pages[p], CD_FRAMESIZE, offset)) + BUG(); + + atomic_inc(&pkt->io_wait); + bio->bi_rw = READ; + pkt_queue_bio(pd, bio, 0); + frames_read++; + } + +out_account: + VPRINTK("pkt_gather_data: need %d frames for zone %llx\n", + frames_read, (unsigned long long)pkt->sector); + pd->stats.pkt_started++; + pd->stats.secs_rg += frames_read * (CD_FRAMESIZE >> 9); + pd->stats.secs_w += pd->settings.size; +} + +/* + * Find a packet matching zone, or the least recently used packet if + * there is no match. + */ +static struct packet_data *pkt_get_packet_data(struct pktcdvd_device *pd, int zone) +{ + struct packet_data *pkt; + + list_for_each_entry(pkt, &pd->cdrw.pkt_free_list, list) { + if (pkt->sector == zone || pkt->list.next == &pd->cdrw.pkt_free_list) { + list_del_init(&pkt->list); + if (pkt->sector != zone) + pkt->cache_valid = 0; + break; + } + } + return pkt; +} + +static void pkt_put_packet_data(struct pktcdvd_device *pd, struct packet_data *pkt) +{ + if (pkt->cache_valid) { + list_add(&pkt->list, &pd->cdrw.pkt_free_list); + } else { + list_add_tail(&pkt->list, &pd->cdrw.pkt_free_list); + } +} + +/* + * recover a failed write, query for relocation if possible + * + * returns 1 if recovery is possible, or 0 if not + * + */ +static int pkt_start_recovery(struct packet_data *pkt) +{ + /* + * FIXME. We need help from the file system to implement + * recovery handling. + */ + return 0; +#if 0 + struct request *rq = pkt->rq; + struct pktcdvd_device *pd = rq->rq_disk->private_data; + struct block_device *pkt_bdev; + struct super_block *sb = NULL; + unsigned long old_block, new_block; + sector_t new_sector; + + pkt_bdev = bdget(kdev_t_to_nr(pd->pkt_dev)); + if (pkt_bdev) { + sb = get_super(pkt_bdev); + bdput(pkt_bdev); + } + + if (!sb) + return 0; + + if (!sb->s_op || !sb->s_op->relocate_blocks) + goto out; + + old_block = pkt->sector / (CD_FRAMESIZE >> 9); + if (sb->s_op->relocate_blocks(sb, old_block, &new_block)) + goto out; + + new_sector = new_block * (CD_FRAMESIZE >> 9); + pkt->sector = new_sector; + + pkt->bio->bi_sector = new_sector; + pkt->bio->bi_next = NULL; + pkt->bio->bi_flags = 1 << BIO_UPTODATE; + pkt->bio->bi_idx = 0; + + BUG_ON(pkt->bio->bi_rw != (1 << BIO_RW)); + BUG_ON(pkt->bio->bi_vcnt != pkt->frames); + BUG_ON(pkt->bio->bi_size != pkt->frames * CD_FRAMESIZE); + BUG_ON(pkt->bio->bi_end_io != pkt_end_io_packet_write); + BUG_ON(pkt->bio->bi_private != pkt); + + drop_super(sb); + return 1; + +out: + drop_super(sb); + return 0; +#endif +} + +static inline void pkt_set_state(struct packet_data *pkt, enum packet_data_state state) +{ +#if PACKET_DEBUG > 1 + static const char *state_name[] = { + "IDLE", "WAITING", "READ_WAIT", "WRITE_WAIT", "RECOVERY", "FINISHED" + }; + enum packet_data_state old_state = pkt->state; + VPRINTK("pkt %2d : s=%6llx %s -> %s\n", pkt->id, (unsigned long long)pkt->sector, + state_name[old_state], state_name[state]); +#endif + pkt->state = state; +} + +/* + * Scan the work queue to see if we can start a new packet. + * returns non-zero if any work was done. + */ +static int pkt_handle_queue(struct pktcdvd_device *pd) +{ + struct packet_data *pkt, *p; + struct bio *bio = NULL; + sector_t zone = 0; /* Suppress gcc warning */ + struct pkt_rb_node *node, *first_node; + struct rb_node *n; + + VPRINTK("handle_queue\n"); + + atomic_set(&pd->scan_queue, 0); + + if (list_empty(&pd->cdrw.pkt_free_list)) { + VPRINTK("handle_queue: no pkt\n"); + return 0; + } + + /* + * Try to find a zone we are not already working on. + */ + spin_lock(&pd->lock); + first_node = pkt_rbtree_find(pd, pd->current_sector); + if (!first_node) { + n = rb_first(&pd->bio_queue); + if (n) + first_node = rb_entry(n, struct pkt_rb_node, rb_node); + } + node = first_node; + while (node) { + bio = node->bio; + zone = ZONE(bio->bi_sector, pd); + list_for_each_entry(p, &pd->cdrw.pkt_active_list, list) { + if (p->sector == zone) + goto try_next_bio; + } + break; +try_next_bio: + node = pkt_rbtree_next(node); + if (!node) { + n = rb_first(&pd->bio_queue); + if (n) + node = rb_entry(n, struct pkt_rb_node, rb_node); + } + if (node == first_node) + node = NULL; + } + spin_unlock(&pd->lock); + if (!bio) { + VPRINTK("handle_queue: no bio\n"); + return 0; + } + + pkt = pkt_get_packet_data(pd, zone); + BUG_ON(!pkt); + + pd->current_sector = zone + pd->settings.size; + pkt->sector = zone; + pkt->frames = pd->settings.size >> 2; + BUG_ON(pkt->frames > PACKET_MAX_SIZE); + pkt->write_size = 0; + + /* + * Scan work queue for bios in the same zone and link them + * to this packet. + */ + spin_lock(&pd->lock); + VPRINTK("pkt_handle_queue: looking for zone %llx\n", (unsigned long long)zone); + while ((node = pkt_rbtree_find(pd, zone)) != NULL) { + bio = node->bio; + VPRINTK("pkt_handle_queue: found zone=%llx\n", + (unsigned long long)ZONE(bio->bi_sector, pd)); + if (ZONE(bio->bi_sector, pd) != zone) + break; + pkt_rbtree_erase(pd, node); + spin_lock(&pkt->lock); + pkt_add_list_last(bio, &pkt->orig_bios, &pkt->orig_bios_tail); + pkt->write_size += bio->bi_size / CD_FRAMESIZE; + spin_unlock(&pkt->lock); + } + spin_unlock(&pd->lock); + + pkt->sleep_time = max(PACKET_WAIT_TIME, 1); + pkt_set_state(pkt, PACKET_WAITING_STATE); + atomic_set(&pkt->run_sm, 1); + + spin_lock(&pd->cdrw.active_list_lock); + list_add(&pkt->list, &pd->cdrw.pkt_active_list); + spin_unlock(&pd->cdrw.active_list_lock); + + return 1; +} + +/* + * Assemble a bio to write one packet and queue the bio for processing + * by the underlying block device. + */ +static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt) +{ + struct bio *bio; + struct page *pages[PACKET_MAX_SIZE]; + int offsets[PACKET_MAX_SIZE]; + int f; + int frames_write; + + for (f = 0; f < pkt->frames; f++) { + pages[f] = pkt->pages[(f * CD_FRAMESIZE) / PAGE_SIZE]; + offsets[f] = (f * CD_FRAMESIZE) % PAGE_SIZE; + } + + /* + * Fill-in pages[] and offsets[] with data from orig_bios. + */ + frames_write = 0; + spin_lock(&pkt->lock); + for (bio = pkt->orig_bios; bio; bio = bio->bi_next) { + int segment = bio->bi_idx; + int src_offs = 0; + int first_frame = (bio->bi_sector - pkt->sector) / (CD_FRAMESIZE >> 9); + int num_frames = bio->bi_size / CD_FRAMESIZE; + BUG_ON(first_frame < 0); + BUG_ON(first_frame + num_frames > pkt->frames); + for (f = first_frame; f < first_frame + num_frames; f++) { + struct bio_vec *src_bvl = bio_iovec_idx(bio, segment); + + while (src_offs >= src_bvl->bv_len) { + src_offs -= src_bvl->bv_len; + segment++; + BUG_ON(segment >= bio->bi_vcnt); + src_bvl = bio_iovec_idx(bio, segment); + } + + if (src_bvl->bv_len - src_offs >= CD_FRAMESIZE) { + pages[f] = src_bvl->bv_page; + offsets[f] = src_bvl->bv_offset + src_offs; + } else { + pkt_copy_bio_data(bio, segment, src_offs, + pages[f], offsets[f]); + } + src_offs += CD_FRAMESIZE; + frames_write++; + } + } + pkt_set_state(pkt, PACKET_WRITE_WAIT_STATE); + spin_unlock(&pkt->lock); + + VPRINTK("pkt_start_write: Writing %d frames for zone %llx\n", + frames_write, (unsigned long long)pkt->sector); + BUG_ON(frames_write != pkt->write_size); + + if (test_bit(PACKET_MERGE_SEGS, &pd->flags) || (pkt->write_size < pkt->frames)) { + pkt_make_local_copy(pkt, pages, offsets); + pkt->cache_valid = 1; + } else { + pkt->cache_valid = 0; + } + + /* Start the write request */ + bio_init(pkt->w_bio); + pkt->w_bio->bi_max_vecs = PACKET_MAX_SIZE; + pkt->w_bio->bi_sector = pkt->sector; + pkt->w_bio->bi_bdev = pd->bdev; + pkt->w_bio->bi_end_io = pkt_end_io_packet_write; + pkt->w_bio->bi_private = pkt; + for (f = 0; f < pkt->frames; f++) { + if ((f + 1 < pkt->frames) && (pages[f + 1] == pages[f]) && + (offsets[f + 1] = offsets[f] + CD_FRAMESIZE)) { + if (!bio_add_page(pkt->w_bio, pages[f], CD_FRAMESIZE * 2, offsets[f])) + BUG(); + f++; + } else { + if (!bio_add_page(pkt->w_bio, pages[f], CD_FRAMESIZE, offsets[f])) + BUG(); + } + } + VPRINTK("pktcdvd: vcnt=%d\n", pkt->w_bio->bi_vcnt); + + atomic_set(&pkt->io_wait, 1); + pkt->w_bio->bi_rw = WRITE; + pkt_queue_bio(pd, pkt->w_bio, 0); +} + +static void pkt_finish_packet(struct packet_data *pkt, int uptodate) +{ + struct bio *bio, *next; + + if (!uptodate) + pkt->cache_valid = 0; + + /* Finish all bios corresponding to this packet */ + bio = pkt->orig_bios; + while (bio) { + next = bio->bi_next; + bio->bi_next = NULL; + bio_endio(bio, bio->bi_size, uptodate ? 0 : -EIO); + bio = next; + } + pkt->orig_bios = pkt->orig_bios_tail = NULL; +} + +static void pkt_run_state_machine(struct pktcdvd_device *pd, struct packet_data *pkt) +{ + int uptodate; + + VPRINTK("run_state_machine: pkt %d\n", pkt->id); + + for (;;) { + switch (pkt->state) { + case PACKET_WAITING_STATE: + if ((pkt->write_size < pkt->frames) && (pkt->sleep_time > 0)) + return; + + pkt->sleep_time = 0; + pkt_gather_data(pd, pkt); + pkt_set_state(pkt, PACKET_READ_WAIT_STATE); + break; + + case PACKET_READ_WAIT_STATE: + if (atomic_read(&pkt->io_wait) > 0) + return; + + if (atomic_read(&pkt->io_errors) > 0) { + pkt_set_state(pkt, PACKET_RECOVERY_STATE); + } else { + pkt_start_write(pd, pkt); + } + break; + + case PACKET_WRITE_WAIT_STATE: + if (atomic_read(&pkt->io_wait) > 0) + return; + + if (test_bit(BIO_UPTODATE, &pkt->w_bio->bi_flags)) { + pkt_set_state(pkt, PACKET_FINISHED_STATE); + } else { + pkt_set_state(pkt, PACKET_RECOVERY_STATE); + } + break; + + case PACKET_RECOVERY_STATE: + if (pkt_start_recovery(pkt)) { + pkt_start_write(pd, pkt); + } else { + VPRINTK("No recovery possible\n"); + pkt_set_state(pkt, PACKET_FINISHED_STATE); + } + break; + + case PACKET_FINISHED_STATE: + uptodate = test_bit(BIO_UPTODATE, &pkt->w_bio->bi_flags); + pkt_finish_packet(pkt, uptodate); + return; + + default: + BUG(); + break; + } + } +} + +static void pkt_handle_packets(struct pktcdvd_device *pd) +{ + struct packet_data *pkt, *next; + + VPRINTK("pkt_handle_packets\n"); + + /* + * Run state machine for active packets + */ + list_for_each_entry(pkt, &pd->cdrw.pkt_active_list, list) { + if (atomic_read(&pkt->run_sm) > 0) { + atomic_set(&pkt->run_sm, 0); + pkt_run_state_machine(pd, pkt); + } + } + + /* + * Move no longer active packets to the free list + */ + spin_lock(&pd->cdrw.active_list_lock); + list_for_each_entry_safe(pkt, next, &pd->cdrw.pkt_active_list, list) { + if (pkt->state == PACKET_FINISHED_STATE) { + list_del(&pkt->list); + pkt_put_packet_data(pd, pkt); + pkt_set_state(pkt, PACKET_IDLE_STATE); + atomic_set(&pd->scan_queue, 1); + } + } + spin_unlock(&pd->cdrw.active_list_lock); +} + +static void pkt_count_states(struct pktcdvd_device *pd, int *states) +{ + struct packet_data *pkt; + int i; + + for (i = 0; i <= PACKET_NUM_STATES; i++) + states[i] = 0; + + spin_lock(&pd->cdrw.active_list_lock); + list_for_each_entry(pkt, &pd->cdrw.pkt_active_list, list) { + states[pkt->state]++; + } + spin_unlock(&pd->cdrw.active_list_lock); +} + +/* + * kcdrwd is woken up when writes have been queued for one of our + * registered devices + */ +static int kcdrwd(void *foobar) +{ + struct pktcdvd_device *pd = foobar; + struct packet_data *pkt; + long min_sleep_time, residue; + + set_user_nice(current, -20); + + for (;;) { + DECLARE_WAITQUEUE(wait, current); + + /* + * Wait until there is something to do + */ + add_wait_queue(&pd->wqueue, &wait); + for (;;) { + set_current_state(TASK_INTERRUPTIBLE); + + /* Check if we need to run pkt_handle_queue */ + if (atomic_read(&pd->scan_queue) > 0) + goto work_to_do; + + /* Check if we need to run the state machine for some packet */ + list_for_each_entry(pkt, &pd->cdrw.pkt_active_list, list) { + if (atomic_read(&pkt->run_sm) > 0) + goto work_to_do; + } + + /* Check if we need to process the iosched queues */ + if (atomic_read(&pd->iosched.attention) != 0) + goto work_to_do; + + /* Otherwise, go to sleep */ + if (PACKET_DEBUG > 1) { + int states[PACKET_NUM_STATES]; + pkt_count_states(pd, states); + VPRINTK("kcdrwd: i:%d ow:%d rw:%d ww:%d rec:%d fin:%d\n", + states[0], states[1], states[2], states[3], + states[4], states[5]); + } + + min_sleep_time = MAX_SCHEDULE_TIMEOUT; + list_for_each_entry(pkt, &pd->cdrw.pkt_active_list, list) { + if (pkt->sleep_time && pkt->sleep_time < min_sleep_time) + min_sleep_time = pkt->sleep_time; + } + + generic_unplug_device(bdev_get_queue(pd->bdev)); + + VPRINTK("kcdrwd: sleeping\n"); + residue = schedule_timeout(min_sleep_time); + VPRINTK("kcdrwd: wake up\n"); + + /* make swsusp happy with our thread */ + if (current->flags & PF_FREEZE) + refrigerator(PF_FREEZE); + + list_for_each_entry(pkt, &pd->cdrw.pkt_active_list, list) { + if (!pkt->sleep_time) + continue; + pkt->sleep_time -= min_sleep_time - residue; + if (pkt->sleep_time <= 0) { + pkt->sleep_time = 0; + atomic_inc(&pkt->run_sm); + } + } + + if (signal_pending(current)) { + flush_signals(current); + } + if (kthread_should_stop()) + break; + } +work_to_do: + set_current_state(TASK_RUNNING); + remove_wait_queue(&pd->wqueue, &wait); + + if (kthread_should_stop()) + break; + + /* + * if pkt_handle_queue returns true, we can queue + * another request. + */ + while (pkt_handle_queue(pd)) + ; + + /* + * Handle packet state machine + */ + pkt_handle_packets(pd); + + /* + * Handle iosched queues + */ + pkt_iosched_process_queue(pd); + } + + return 0; +} + +static void pkt_print_settings(struct pktcdvd_device *pd) +{ + printk("pktcdvd: %s packets, ", pd->settings.fp ? "Fixed" : "Variable"); + printk("%u blocks, ", pd->settings.size >> 2); + printk("Mode-%c disc\n", pd->settings.block_mode == 8 ? '1' : '2'); +} + +static int pkt_mode_sense(struct pktcdvd_device *pd, struct packet_command *cgc, + int page_code, int page_control) +{ + memset(cgc->cmd, 0, sizeof(cgc->cmd)); + + cgc->cmd[0] = GPCMD_MODE_SENSE_10; + cgc->cmd[2] = page_code | (page_control << 6); + cgc->cmd[7] = cgc->buflen >> 8; + cgc->cmd[8] = cgc->buflen & 0xff; + cgc->data_direction = CGC_DATA_READ; + return pkt_generic_packet(pd, cgc); +} + +static int pkt_mode_select(struct pktcdvd_device *pd, struct packet_command *cgc) +{ + memset(cgc->cmd, 0, sizeof(cgc->cmd)); + memset(cgc->buffer, 0, 2); + cgc->cmd[0] = GPCMD_MODE_SELECT_10; + cgc->cmd[1] = 0x10; /* PF */ + cgc->cmd[7] = cgc->buflen >> 8; + cgc->cmd[8] = cgc->buflen & 0xff; + cgc->data_direction = CGC_DATA_WRITE; + return pkt_generic_packet(pd, cgc); +} + +static int pkt_get_disc_info(struct pktcdvd_device *pd, disc_information *di) +{ + struct packet_command cgc; + int ret; + + /* set up command and get the disc info */ + init_cdrom_command(&cgc, di, sizeof(*di), CGC_DATA_READ); + cgc.cmd[0] = GPCMD_READ_DISC_INFO; + cgc.cmd[8] = cgc.buflen = 2; + cgc.quiet = 1; + + if ((ret = pkt_generic_packet(pd, &cgc))) + return ret; + + /* not all drives have the same disc_info length, so requeue + * packet with the length the drive tells us it can supply + */ + cgc.buflen = be16_to_cpu(di->disc_information_length) + + sizeof(di->disc_information_length); + + if (cgc.buflen > sizeof(disc_information)) + cgc.buflen = sizeof(disc_information); + + cgc.cmd[8] = cgc.buflen; + return pkt_generic_packet(pd, &cgc); +} + +static int pkt_get_track_info(struct pktcdvd_device *pd, __u16 track, __u8 type, track_information *ti) +{ + struct packet_command cgc; + int ret; + + init_cdrom_command(&cgc, ti, 8, CGC_DATA_READ); + cgc.cmd[0] = GPCMD_READ_TRACK_RZONE_INFO; + cgc.cmd[1] = type & 3; + cgc.cmd[4] = (track & 0xff00) >> 8; + cgc.cmd[5] = track & 0xff; + cgc.cmd[8] = 8; + cgc.quiet = 1; + + if ((ret = pkt_generic_packet(pd, &cgc))) + return ret; + + cgc.buflen = be16_to_cpu(ti->track_information_length) + + sizeof(ti->track_information_length); + + if (cgc.buflen > sizeof(track_information)) + cgc.buflen = sizeof(track_information); + + cgc.cmd[8] = cgc.buflen; + return pkt_generic_packet(pd, &cgc); +} + +static int pkt_get_last_written(struct pktcdvd_device *pd, long *last_written) +{ + disc_information di; + track_information ti; + __u32 last_track; + int ret = -1; + + if ((ret = pkt_get_disc_info(pd, &di))) + return ret; + + last_track = (di.last_track_msb << 8) | di.last_track_lsb; + if ((ret = pkt_get_track_info(pd, last_track, 1, &ti))) + return ret; + + /* if this track is blank, try the previous. */ + if (ti.blank) { + last_track--; + if ((ret = pkt_get_track_info(pd, last_track, 1, &ti))) + return ret; + } + + /* if last recorded field is valid, return it. */ + if (ti.lra_v) { + *last_written = be32_to_cpu(ti.last_rec_address); + } else { + /* make it up instead */ + *last_written = be32_to_cpu(ti.track_start) + + be32_to_cpu(ti.track_size); + if (ti.free_blocks) + *last_written -= (be32_to_cpu(ti.free_blocks) + 7); + } + return 0; +} + +/* + * write mode select package based on pd->settings + */ +static int pkt_set_write_settings(struct pktcdvd_device *pd) +{ + struct packet_command cgc; + struct request_sense sense; + write_param_page *wp; + char buffer[128]; + int ret, size; + + /* doesn't apply to DVD+RW */ + if (pd->mmc3_profile == 0x1a) + return 0; + + memset(buffer, 0, sizeof(buffer)); + init_cdrom_command(&cgc, buffer, sizeof(*wp), CGC_DATA_READ); + cgc.sense = &sense; + if ((ret = pkt_mode_sense(pd, &cgc, GPMODE_WRITE_PARMS_PAGE, 0))) { + pkt_dump_sense(&cgc); + return ret; + } + + size = 2 + ((buffer[0] << 8) | (buffer[1] & 0xff)); + pd->mode_offset = (buffer[6] << 8) | (buffer[7] & 0xff); + if (size > sizeof(buffer)) + size = sizeof(buffer); + + /* + * now get it all + */ + init_cdrom_command(&cgc, buffer, size, CGC_DATA_READ); + cgc.sense = &sense; + if ((ret = pkt_mode_sense(pd, &cgc, GPMODE_WRITE_PARMS_PAGE, 0))) { + pkt_dump_sense(&cgc); + return ret; + } + + /* + * write page is offset header + block descriptor length + */ + wp = (write_param_page *) &buffer[sizeof(struct mode_page_header) + pd->mode_offset]; + + wp->fp = pd->settings.fp; + wp->track_mode = pd->settings.track_mode; + wp->write_type = pd->settings.write_type; + wp->data_block_type = pd->settings.block_mode; + + wp->multi_session = 0; + +#ifdef PACKET_USE_LS + wp->link_size = 7; + wp->ls_v = 1; +#endif + + if (wp->data_block_type == PACKET_BLOCK_MODE1) { + wp->session_format = 0; + wp->subhdr2 = 0x20; + } else if (wp->data_block_type == PACKET_BLOCK_MODE2) { + wp->session_format = 0x20; + wp->subhdr2 = 8; +#if 0 + wp->mcn[0] = 0x80; + memcpy(&wp->mcn[1], PACKET_MCN, sizeof(wp->mcn) - 1); +#endif + } else { + /* + * paranoia + */ + printk("pktcdvd: write mode wrong %d\n", wp->data_block_type); + return 1; + } + wp->packet_size = cpu_to_be32(pd->settings.size >> 2); + + cgc.buflen = cgc.cmd[8] = size; + if ((ret = pkt_mode_select(pd, &cgc))) { + pkt_dump_sense(&cgc); + return ret; + } + + pkt_print_settings(pd); + return 0; +} + +/* + * 0 -- we can write to this track, 1 -- we can't + */ +static int pkt_good_track(track_information *ti) +{ + /* + * only good for CD-RW at the moment, not DVD-RW + */ + + /* + * FIXME: only for FP + */ + if (ti->fp == 0) + return 0; + + /* + * "good" settings as per Mt Fuji. + */ + if (ti->rt == 0 && ti->blank == 0 && ti->packet == 1) + return 0; + + if (ti->rt == 0 && ti->blank == 1 && ti->packet == 1) + return 0; + + if (ti->rt == 1 && ti->blank == 0 && ti->packet == 1) + return 0; + + printk("pktcdvd: bad state %d-%d-%d\n", ti->rt, ti->blank, ti->packet); + return 1; +} + +/* + * 0 -- we can write to this disc, 1 -- we can't + */ +static int pkt_good_disc(struct pktcdvd_device *pd, disc_information *di) +{ + switch (pd->mmc3_profile) { + case 0x0a: /* CD-RW */ + case 0xffff: /* MMC3 not supported */ + break; + case 0x1a: /* DVD+RW */ + case 0x13: /* DVD-RW */ + return 0; + default: + printk("pktcdvd: Wrong disc profile (%x)\n", pd->mmc3_profile); + return 1; + } + + /* + * for disc type 0xff we should probably reserve a new track. + * but i'm not sure, should we leave this to user apps? probably. + */ + if (di->disc_type == 0xff) { + printk("pktcdvd: Unknown disc. No track?\n"); + return 1; + } + + if (di->disc_type != 0x20 && di->disc_type != 0) { + printk("pktcdvd: Wrong disc type (%x)\n", di->disc_type); + return 1; + } + + if (di->erasable == 0) { + printk("pktcdvd: Disc not erasable\n"); + return 1; + } + + if (di->border_status == PACKET_SESSION_RESERVED) { + printk("pktcdvd: Can't write to last track (reserved)\n"); + return 1; + } + + return 0; +} + +static int pkt_probe_settings(struct pktcdvd_device *pd) +{ + struct packet_command cgc; + unsigned char buf[12]; + disc_information di; + track_information ti; + int ret, track; + + init_cdrom_command(&cgc, buf, sizeof(buf), CGC_DATA_READ); + cgc.cmd[0] = GPCMD_GET_CONFIGURATION; + cgc.cmd[8] = 8; + ret = pkt_generic_packet(pd, &cgc); + pd->mmc3_profile = ret ? 0xffff : buf[6] << 8 | buf[7]; + + memset(&di, 0, sizeof(disc_information)); + memset(&ti, 0, sizeof(track_information)); + + if ((ret = pkt_get_disc_info(pd, &di))) { + printk("failed get_disc\n"); + return ret; + } + + if (pkt_good_disc(pd, &di)) + return -ENXIO; + + switch (pd->mmc3_profile) { + case 0x1a: /* DVD+RW */ + printk("pktcdvd: inserted media is DVD+RW\n"); + break; + case 0x13: /* DVD-RW */ + printk("pktcdvd: inserted media is DVD-RW\n"); + break; + default: + printk("pktcdvd: inserted media is CD-R%s\n", di.erasable ? "W" : ""); + break; + } + pd->type = di.erasable ? PACKET_CDRW : PACKET_CDR; + + track = 1; /* (di.last_track_msb << 8) | di.last_track_lsb; */ + if ((ret = pkt_get_track_info(pd, track, 1, &ti))) { + printk("pktcdvd: failed get_track\n"); + return ret; + } + + if (pkt_good_track(&ti)) { + printk("pktcdvd: can't write to this track\n"); + return -ENXIO; + } + + /* + * we keep packet size in 512 byte units, makes it easier to + * deal with request calculations. + */ + pd->settings.size = be32_to_cpu(ti.fixed_packet_size) << 2; + if (pd->settings.size == 0) { + printk("pktcdvd: detected zero packet size!\n"); + pd->settings.size = 128; + } + pd->settings.fp = ti.fp; + pd->offset = (be32_to_cpu(ti.track_start) << 2) & (pd->settings.size - 1); + + if (ti.nwa_v) { + pd->nwa = be32_to_cpu(ti.next_writable); + set_bit(PACKET_NWA_VALID, &pd->flags); + } + + /* + * in theory we could use lra on -RW media as well and just zero + * blocks that haven't been written yet, but in practice that + * is just a no-go. we'll use that for -R, naturally. + */ + if (ti.lra_v) { + pd->lra = be32_to_cpu(ti.last_rec_address); + set_bit(PACKET_LRA_VALID, &pd->flags); + } else { + pd->lra = 0xffffffff; + set_bit(PACKET_LRA_VALID, &pd->flags); + } + + /* + * fine for now + */ + pd->settings.link_loss = 7; + pd->settings.write_type = 0; /* packet */ + pd->settings.track_mode = ti.track_mode; + + /* + * mode1 or mode2 disc + */ + switch (ti.data_mode) { + case PACKET_MODE1: + pd->settings.block_mode = PACKET_BLOCK_MODE1; + break; + case PACKET_MODE2: + pd->settings.block_mode = PACKET_BLOCK_MODE2; + break; + default: + printk("pktcdvd: unknown data mode\n"); + return 1; + } + return 0; +} + +/* + * enable/disable write caching on drive + */ +static int pkt_write_caching(struct pktcdvd_device *pd, int set) +{ + struct packet_command cgc; + struct request_sense sense; + unsigned char buf[64]; + int ret; + + memset(buf, 0, sizeof(buf)); + init_cdrom_command(&cgc, buf, sizeof(buf), CGC_DATA_READ); + cgc.sense = &sense; + cgc.buflen = pd->mode_offset + 12; + + /* + * caching mode page might not be there, so quiet this command + */ + cgc.quiet = 1; + + if ((ret = pkt_mode_sense(pd, &cgc, GPMODE_WCACHING_PAGE, 0))) + return ret; + + buf[pd->mode_offset + 10] |= (!!set << 2); + + cgc.buflen = cgc.cmd[8] = 2 + ((buf[0] << 8) | (buf[1] & 0xff)); + ret = pkt_mode_select(pd, &cgc); + if (ret) { + printk("pktcdvd: write caching control failed\n"); + pkt_dump_sense(&cgc); + } else if (!ret && set) + printk("pktcdvd: enabled write caching on %s\n", pd->name); + return ret; +} + +static int pkt_lock_door(struct pktcdvd_device *pd, int lockflag) +{ + struct packet_command cgc; + + init_cdrom_command(&cgc, NULL, 0, CGC_DATA_NONE); + cgc.cmd[0] = GPCMD_PREVENT_ALLOW_MEDIUM_REMOVAL; + cgc.cmd[4] = lockflag ? 1 : 0; + return pkt_generic_packet(pd, &cgc); +} + +/* + * Returns drive maximum write speed + */ +static int pkt_get_max_speed(struct pktcdvd_device *pd, unsigned *write_speed) +{ + struct packet_command cgc; + struct request_sense sense; + unsigned char buf[256+18]; + unsigned char *cap_buf; + int ret, offset; + + memset(buf, 0, sizeof(buf)); + cap_buf = &buf[sizeof(struct mode_page_header) + pd->mode_offset]; + init_cdrom_command(&cgc, buf, sizeof(buf), CGC_DATA_UNKNOWN); + cgc.sense = &sense; + + ret = pkt_mode_sense(pd, &cgc, GPMODE_CAPABILITIES_PAGE, 0); + if (ret) { + cgc.buflen = pd->mode_offset + cap_buf[1] + 2 + + sizeof(struct mode_page_header); + ret = pkt_mode_sense(pd, &cgc, GPMODE_CAPABILITIES_PAGE, 0); + if (ret) { + pkt_dump_sense(&cgc); + return ret; + } + } + + offset = 20; /* Obsoleted field, used by older drives */ + if (cap_buf[1] >= 28) + offset = 28; /* Current write speed selected */ + if (cap_buf[1] >= 30) { + /* If the drive reports at least one "Logical Unit Write + * Speed Performance Descriptor Block", use the information + * in the first block. (contains the highest speed) + */ + int num_spdb = (cap_buf[30] << 8) + cap_buf[31]; + if (num_spdb > 0) + offset = 34; + } + + *write_speed = (cap_buf[offset] << 8) | cap_buf[offset + 1]; + return 0; +} + +/* These tables from cdrecord - I don't have orange book */ +/* standard speed CD-RW (1-4x) */ +static char clv_to_speed[16] = { + /* 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 */ + 0, 2, 4, 6, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; +/* high speed CD-RW (-10x) */ +static char hs_clv_to_speed[16] = { + /* 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 */ + 0, 2, 4, 6, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; +/* ultra high speed CD-RW */ +static char us_clv_to_speed[16] = { + /* 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 */ + 0, 2, 4, 8, 0, 0,16, 0,24,32,40,48, 0, 0, 0, 0 +}; + +/* + * reads the maximum media speed from ATIP + */ +static int pkt_media_speed(struct pktcdvd_device *pd, unsigned *speed) +{ + struct packet_command cgc; + struct request_sense sense; + unsigned char buf[64]; + unsigned int size, st, sp; + int ret; + + init_cdrom_command(&cgc, buf, 2, CGC_DATA_READ); + cgc.sense = &sense; + cgc.cmd[0] = GPCMD_READ_TOC_PMA_ATIP; + cgc.cmd[1] = 2; + cgc.cmd[2] = 4; /* READ ATIP */ + cgc.cmd[8] = 2; + ret = pkt_generic_packet(pd, &cgc); + if (ret) { + pkt_dump_sense(&cgc); + return ret; + } + size = ((unsigned int) buf[0]<<8) + buf[1] + 2; + if (size > sizeof(buf)) + size = sizeof(buf); + + init_cdrom_command(&cgc, buf, size, CGC_DATA_READ); + cgc.sense = &sense; + cgc.cmd[0] = GPCMD_READ_TOC_PMA_ATIP; + cgc.cmd[1] = 2; + cgc.cmd[2] = 4; + cgc.cmd[8] = size; + ret = pkt_generic_packet(pd, &cgc); + if (ret) { + pkt_dump_sense(&cgc); + return ret; + } + + if (!buf[6] & 0x40) { + printk("pktcdvd: Disc type is not CD-RW\n"); + return 1; + } + if (!buf[6] & 0x4) { + printk("pktcdvd: A1 values on media are not valid, maybe not CDRW?\n"); + return 1; + } + + st = (buf[6] >> 3) & 0x7; /* disc sub-type */ + + sp = buf[16] & 0xf; /* max speed from ATIP A1 field */ + + /* Info from cdrecord */ + switch (st) { + case 0: /* standard speed */ + *speed = clv_to_speed[sp]; + break; + case 1: /* high speed */ + *speed = hs_clv_to_speed[sp]; + break; + case 2: /* ultra high speed */ + *speed = us_clv_to_speed[sp]; + break; + default: + printk("pktcdvd: Unknown disc sub-type %d\n",st); + return 1; + } + if (*speed) { + printk("pktcdvd: Max. media speed: %d\n",*speed); + return 0; + } else { + printk("pktcdvd: Unknown speed %d for sub-type %d\n",sp,st); + return 1; + } +} + +static int pkt_perform_opc(struct pktcdvd_device *pd) +{ + struct packet_command cgc; + struct request_sense sense; + int ret; + + VPRINTK("pktcdvd: Performing OPC\n"); + + init_cdrom_command(&cgc, NULL, 0, CGC_DATA_NONE); + cgc.sense = &sense; + cgc.timeout = 60*HZ; + cgc.cmd[0] = GPCMD_SEND_OPC; + cgc.cmd[1] = 1; + if ((ret = pkt_generic_packet(pd, &cgc))) + pkt_dump_sense(&cgc); + return ret; +} + +static int pkt_open_write(struct pktcdvd_device *pd) +{ + int ret; + unsigned int write_speed, media_write_speed, read_speed; + + if ((ret = pkt_probe_settings(pd))) { + DPRINTK("pktcdvd: %s failed probe\n", pd->name); + return -EIO; + } + + if ((ret = pkt_set_write_settings(pd))) { + DPRINTK("pktcdvd: %s failed saving write settings\n", pd->name); + return -EIO; + } + + pkt_write_caching(pd, USE_WCACHING); + + if ((ret = pkt_get_max_speed(pd, &write_speed))) + write_speed = 16 * 177; + switch (pd->mmc3_profile) { + case 0x13: /* DVD-RW */ + case 0x1a: /* DVD+RW */ + DPRINTK("pktcdvd: write speed %ukB/s\n", write_speed); + break; + default: + if ((ret = pkt_media_speed(pd, &media_write_speed))) + media_write_speed = 16; + write_speed = min(write_speed, media_write_speed * 177); + DPRINTK("pktcdvd: write speed %ux\n", write_speed / 176); + break; + } + read_speed = write_speed; + + if ((ret = pkt_set_speed(pd, write_speed, read_speed))) { + DPRINTK("pktcdvd: %s couldn't set write speed\n", pd->name); + return -EIO; + } + pd->write_speed = write_speed; + pd->read_speed = read_speed; + + if ((ret = pkt_perform_opc(pd))) { + DPRINTK("pktcdvd: %s Optimum Power Calibration failed\n", pd->name); + } + + return 0; +} + +/* + * called at open time. + */ +static int pkt_open_dev(struct pktcdvd_device *pd, int write) +{ + int ret; + long lba; + request_queue_t *q; + + /* + * We need to re-open the cdrom device without O_NONBLOCK to be able + * to read/write from/to it. It is already opened in O_NONBLOCK mode + * so bdget() can't fail. + */ + bdget(pd->bdev->bd_dev); + if ((ret = blkdev_get(pd->bdev, FMODE_READ, O_RDONLY))) + goto out; + + if ((ret = pkt_get_last_written(pd, &lba))) { + printk("pktcdvd: pkt_get_last_written failed\n"); + goto out_putdev; + } + + set_capacity(pd->disk, lba << 2); + set_capacity(pd->bdev->bd_disk, lba << 2); + bd_set_size(pd->bdev, (loff_t)lba << 11); + + q = bdev_get_queue(pd->bdev); + if (write) { + if ((ret = pkt_open_write(pd))) + goto out_putdev; + /* + * Some CDRW drives can not handle writes larger than one packet, + * even if the size is a multiple of the packet size. + */ + spin_lock_irq(q->queue_lock); + blk_queue_max_sectors(q, pd->settings.size); + spin_unlock_irq(q->queue_lock); + set_bit(PACKET_WRITABLE, &pd->flags); + } else { + pkt_set_speed(pd, MAX_SPEED, MAX_SPEED); + clear_bit(PACKET_WRITABLE, &pd->flags); + } + + if ((ret = pkt_set_segment_merging(pd, q))) + goto out_putdev; + + if (write) + printk("pktcdvd: %lukB available on disc\n", lba << 1); + + return 0; + +out_putdev: + blkdev_put(pd->bdev); +out: + return ret; +} + +/* + * called when the device is closed. makes sure that the device flushes + * the internal cache before we close. + */ +static void pkt_release_dev(struct pktcdvd_device *pd, int flush) +{ + if (flush && pkt_flush_cache(pd)) + DPRINTK("pktcdvd: %s not flushing cache\n", pd->name); + + pkt_lock_door(pd, 0); + + pkt_set_speed(pd, MAX_SPEED, MAX_SPEED); + blkdev_put(pd->bdev); +} + +static struct pktcdvd_device *pkt_find_dev_from_minor(int dev_minor) +{ + if (dev_minor >= MAX_WRITERS) + return NULL; + return pkt_devs[dev_minor]; +} + +static int pkt_open(struct inode *inode, struct file *file) +{ + struct pktcdvd_device *pd = NULL; + int ret; + + VPRINTK("pktcdvd: entering open\n"); + + down(&ctl_mutex); + pd = pkt_find_dev_from_minor(iminor(inode)); + if (!pd) { + ret = -ENODEV; + goto out; + } + BUG_ON(pd->refcnt < 0); + + pd->refcnt++; + if (pd->refcnt == 1) { + if (pkt_open_dev(pd, file->f_mode & FMODE_WRITE)) { + ret = -EIO; + goto out_dec; + } + /* + * needed here as well, since ext2 (among others) may change + * the blocksize at mount time + */ + set_blocksize(inode->i_bdev, CD_FRAMESIZE); + } + + up(&ctl_mutex); + return 0; + +out_dec: + pd->refcnt--; +out: + VPRINTK("pktcdvd: failed open (%d)\n", ret); + up(&ctl_mutex); + return ret; +} + +static int pkt_close(struct inode *inode, struct file *file) +{ + struct pktcdvd_device *pd = inode->i_bdev->bd_disk->private_data; + int ret = 0; + + down(&ctl_mutex); + pd->refcnt--; + BUG_ON(pd->refcnt < 0); + if (pd->refcnt == 0) { + int flush = test_bit(PACKET_WRITABLE, &pd->flags); + pkt_release_dev(pd, flush); + } + up(&ctl_mutex); + return ret; +} + + +static void *psd_pool_alloc(int gfp_mask, void *data) +{ + return kmalloc(sizeof(struct packet_stacked_data), gfp_mask); +} + +static void psd_pool_free(void *ptr, void *data) +{ + kfree(ptr); +} + +static int pkt_end_io_read_cloned(struct bio *bio, unsigned int bytes_done, int err) +{ + struct packet_stacked_data *psd = bio->bi_private; + struct pktcdvd_device *pd = psd->pd; + + if (bio->bi_size) + return 1; + + bio_put(bio); + bio_endio(psd->bio, psd->bio->bi_size, err); + mempool_free(psd, psd_pool); + pkt_bio_finished(pd); + return 0; +} + +static int pkt_make_request(request_queue_t *q, struct bio *bio) +{ + struct pktcdvd_device *pd; + char b[BDEVNAME_SIZE]; + sector_t zone; + struct packet_data *pkt; + int was_empty, blocked_bio; + struct pkt_rb_node *node; + + pd = q->queuedata; + if (!pd) { + printk("pktcdvd: %s incorrect request queue\n", bdevname(bio->bi_bdev, b)); + goto end_io; + } + + /* + * Clone READ bios so we can have our own bi_end_io callback. + */ + if (bio_data_dir(bio) == READ) { + struct bio *cloned_bio = bio_clone(bio, GFP_NOIO); + struct packet_stacked_data *psd = mempool_alloc(psd_pool, GFP_NOIO); + + psd->pd = pd; + psd->bio = bio; + cloned_bio->bi_bdev = pd->bdev; + cloned_bio->bi_private = psd; + cloned_bio->bi_end_io = pkt_end_io_read_cloned; + pd->stats.secs_r += bio->bi_size >> 9; + pkt_queue_bio(pd, cloned_bio, 1); + return 0; + } + + if (!test_bit(PACKET_WRITABLE, &pd->flags)) { + printk("pktcdvd: WRITE for ro device %s (%llu)\n", + pd->name, (unsigned long long)bio->bi_sector); + goto end_io; + } + + if (!bio->bi_size || (bio->bi_size % CD_FRAMESIZE)) { + printk("pktcdvd: wrong bio size\n"); + goto end_io; + } + + blk_queue_bounce(q, &bio); + + zone = ZONE(bio->bi_sector, pd); + VPRINTK("pkt_make_request: start = %6llx stop = %6llx\n", + (unsigned long long)bio->bi_sector, + (unsigned long long)(bio->bi_sector + bio_sectors(bio))); + + /* Check if we have to split the bio */ + { + struct bio_pair *bp; + sector_t last_zone; + int first_sectors; + + last_zone = ZONE(bio->bi_sector + bio_sectors(bio) - 1, pd); + if (last_zone != zone) { + BUG_ON(last_zone != zone + pd->settings.size); + first_sectors = last_zone - bio->bi_sector; + bp = bio_split(bio, bio_split_pool, first_sectors); + BUG_ON(!bp); + pkt_make_request(q, &bp->bio1); + pkt_make_request(q, &bp->bio2); + bio_pair_release(bp); + return 0; + } + } + + /* + * If we find a matching packet in state WAITING or READ_WAIT, we can + * just append this bio to that packet. + */ + spin_lock(&pd->cdrw.active_list_lock); + blocked_bio = 0; + list_for_each_entry(pkt, &pd->cdrw.pkt_active_list, list) { + if (pkt->sector == zone) { + spin_lock(&pkt->lock); + if ((pkt->state == PACKET_WAITING_STATE) || + (pkt->state == PACKET_READ_WAIT_STATE)) { + pkt_add_list_last(bio, &pkt->orig_bios, + &pkt->orig_bios_tail); + pkt->write_size += bio->bi_size / CD_FRAMESIZE; + if ((pkt->write_size >= pkt->frames) && + (pkt->state == PACKET_WAITING_STATE)) { + atomic_inc(&pkt->run_sm); + wake_up(&pd->wqueue); + } + spin_unlock(&pkt->lock); + spin_unlock(&pd->cdrw.active_list_lock); + return 0; + } else { + blocked_bio = 1; + } + spin_unlock(&pkt->lock); + } + } + spin_unlock(&pd->cdrw.active_list_lock); + + /* + * No matching packet found. Store the bio in the work queue. + */ + node = mempool_alloc(pd->rb_pool, GFP_NOIO); + BUG_ON(!node); + node->bio = bio; + spin_lock(&pd->lock); + BUG_ON(pd->bio_queue_size < 0); + was_empty = (pd->bio_queue_size == 0); + pkt_rbtree_insert(pd, node); + spin_unlock(&pd->lock); + + /* + * Wake up the worker thread. + */ + atomic_set(&pd->scan_queue, 1); + if (was_empty) { + /* This wake_up is required for correct operation */ + wake_up(&pd->wqueue); + } else if (!list_empty(&pd->cdrw.pkt_free_list) && !blocked_bio) { + /* + * This wake up is not required for correct operation, + * but improves performance in some cases. + */ + wake_up(&pd->wqueue); + } + return 0; +end_io: + bio_io_error(bio, bio->bi_size); + return 0; +} + + + +static int pkt_merge_bvec(request_queue_t *q, struct bio *bio, struct bio_vec *bvec) +{ + struct pktcdvd_device *pd = q->queuedata; + sector_t zone = ZONE(bio->bi_sector, pd); + int used = ((bio->bi_sector - zone) << 9) + bio->bi_size; + int remaining = (pd->settings.size << 9) - used; + int remaining2; + + /* + * A bio <= PAGE_SIZE must be allowed. If it crosses a packet + * boundary, pkt_make_request() will split the bio. + */ + remaining2 = PAGE_SIZE - bio->bi_size; + remaining = max(remaining, remaining2); + + BUG_ON(remaining < 0); + return remaining; +} + +static void pkt_init_queue(struct pktcdvd_device *pd) +{ + request_queue_t *q = pd->disk->queue; + + blk_queue_make_request(q, pkt_make_request); + blk_queue_hardsect_size(q, CD_FRAMESIZE); + blk_queue_max_sectors(q, PACKET_MAX_SECTORS); + blk_queue_merge_bvec(q, pkt_merge_bvec); + q->queuedata = pd; +} + +static int pkt_seq_show(struct seq_file *m, void *p) +{ + struct pktcdvd_device *pd = m->private; + char *msg; + char bdev_buf[BDEVNAME_SIZE]; + int states[PACKET_NUM_STATES]; + + seq_printf(m, "Writer %s mapped to %s:\n", pd->name, + bdevname(pd->bdev, bdev_buf)); + + seq_printf(m, "\nSettings:\n"); + seq_printf(m, "\tpacket size:\t\t%dkB\n", pd->settings.size / 2); + + if (pd->settings.write_type == 0) + msg = "Packet"; + else + msg = "Unknown"; + seq_printf(m, "\twrite type:\t\t%s\n", msg); + + seq_printf(m, "\tpacket type:\t\t%s\n", pd->settings.fp ? "Fixed" : "Variable"); + seq_printf(m, "\tlink loss:\t\t%d\n", pd->settings.link_loss); + + seq_printf(m, "\ttrack mode:\t\t%d\n", pd->settings.track_mode); + + if (pd->settings.block_mode == PACKET_BLOCK_MODE1) + msg = "Mode 1"; + else if (pd->settings.block_mode == PACKET_BLOCK_MODE2) + msg = "Mode 2"; + else + msg = "Unknown"; + seq_printf(m, "\tblock mode:\t\t%s\n", msg); + + seq_printf(m, "\nStatistics:\n"); + seq_printf(m, "\tpackets started:\t%lu\n", pd->stats.pkt_started); + seq_printf(m, "\tpackets ended:\t\t%lu\n", pd->stats.pkt_ended); + seq_printf(m, "\twritten:\t\t%lukB\n", pd->stats.secs_w >> 1); + seq_printf(m, "\tread gather:\t\t%lukB\n", pd->stats.secs_rg >> 1); + seq_printf(m, "\tread:\t\t\t%lukB\n", pd->stats.secs_r >> 1); + + seq_printf(m, "\nMisc:\n"); + seq_printf(m, "\treference count:\t%d\n", pd->refcnt); + seq_printf(m, "\tflags:\t\t\t0x%lx\n", pd->flags); + seq_printf(m, "\tread speed:\t\t%ukB/s\n", pd->read_speed); + seq_printf(m, "\twrite speed:\t\t%ukB/s\n", pd->write_speed); + seq_printf(m, "\tstart offset:\t\t%lu\n", pd->offset); + seq_printf(m, "\tmode page offset:\t%u\n", pd->mode_offset); + + seq_printf(m, "\nQueue state:\n"); + seq_printf(m, "\tbios queued:\t\t%d\n", pd->bio_queue_size); + seq_printf(m, "\tbios pending:\t\t%d\n", atomic_read(&pd->cdrw.pending_bios)); + seq_printf(m, "\tcurrent sector:\t\t0x%llx\n", (unsigned long long)pd->current_sector); + + pkt_count_states(pd, states); + seq_printf(m, "\tstate:\t\t\ti:%d ow:%d rw:%d ww:%d rec:%d fin:%d\n", + states[0], states[1], states[2], states[3], states[4], states[5]); + + return 0; +} + +static int pkt_seq_open(struct inode *inode, struct file *file) +{ + return single_open(file, pkt_seq_show, PDE(inode)->data); +} + +static struct file_operations pkt_proc_fops = { + .open = pkt_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release +}; + +static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev) +{ + int i; + int ret = 0; + char b[BDEVNAME_SIZE]; + struct proc_dir_entry *proc; + struct block_device *bdev; + + if (pd->pkt_dev == dev) { + printk("pktcdvd: Recursive setup not allowed\n"); + return -EBUSY; + } + for (i = 0; i < MAX_WRITERS; i++) { + struct pktcdvd_device *pd2 = pkt_devs[i]; + if (!pd2) + continue; + if (pd2->bdev->bd_dev == dev) { + printk("pktcdvd: %s already setup\n", bdevname(pd2->bdev, b)); + return -EBUSY; + } + if (pd2->pkt_dev == dev) { + printk("pktcdvd: Can't chain pktcdvd devices\n"); + return -EBUSY; + } + } + + bdev = bdget(dev); + if (!bdev) + return -ENOMEM; + ret = blkdev_get(bdev, FMODE_READ, O_RDONLY | O_NONBLOCK); + if (ret) + return ret; + + /* This is safe, since we have a reference from open(). */ + __module_get(THIS_MODULE); + + if (!pkt_grow_pktlist(pd, CONFIG_CDROM_PKTCDVD_BUFFERS)) { + printk("pktcdvd: not enough memory for buffers\n"); + ret = -ENOMEM; + goto out_mem; + } + + pd->bdev = bdev; + set_blocksize(bdev, CD_FRAMESIZE); + + pkt_init_queue(pd); + + atomic_set(&pd->cdrw.pending_bios, 0); + pd->cdrw.thread = kthread_run(kcdrwd, pd, "%s", pd->name); + if (IS_ERR(pd->cdrw.thread)) { + printk("pktcdvd: can't start kernel thread\n"); + ret = -ENOMEM; + goto out_thread; + } + + proc = create_proc_entry(pd->name, 0, pkt_proc); + if (proc) { + proc->data = pd; + proc->proc_fops = &pkt_proc_fops; + } + DPRINTK("pktcdvd: writer %s mapped to %s\n", pd->name, bdevname(bdev, b)); + return 0; + +out_thread: + pkt_shrink_pktlist(pd); +out_mem: + blkdev_put(bdev); + /* This is safe: open() is still holding a reference. */ + module_put(THIS_MODULE); + return ret; +} + +static int pkt_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg) +{ + struct pktcdvd_device *pd = inode->i_bdev->bd_disk->private_data; + + VPRINTK("pkt_ioctl: cmd %x, dev %d:%d\n", cmd, imajor(inode), iminor(inode)); + BUG_ON(!pd); + + switch (cmd) { + /* + * forward selected CDROM ioctls to CD-ROM, for UDF + */ + case CDROMMULTISESSION: + case CDROMREADTOCENTRY: + case CDROM_LAST_WRITTEN: + case CDROM_SEND_PACKET: + case SCSI_IOCTL_SEND_COMMAND: + return ioctl_by_bdev(pd->bdev, cmd, arg); + + case CDROMEJECT: + /* + * The door gets locked when the device is opened, so we + * have to unlock it or else the eject command fails. + */ + pkt_lock_door(pd, 0); + return ioctl_by_bdev(pd->bdev, cmd, arg); + + default: + printk("pktcdvd: Unknown ioctl for %s (%x)\n", pd->name, cmd); + return -ENOTTY; + } + + return 0; +} + +static int pkt_media_changed(struct gendisk *disk) +{ + struct pktcdvd_device *pd = disk->private_data; + struct gendisk *attached_disk; + + if (!pd) + return 0; + if (!pd->bdev) + return 0; + attached_disk = pd->bdev->bd_disk; + if (!attached_disk) + return 0; + return attached_disk->fops->media_changed(attached_disk); +} + +static struct block_device_operations pktcdvd_ops = { + .owner = THIS_MODULE, + .open = pkt_open, + .release = pkt_close, + .ioctl = pkt_ioctl, + .media_changed = pkt_media_changed, +}; + +/* + * Set up mapping from pktcdvd device to CD-ROM device. + */ +static int pkt_setup_dev(struct pkt_ctrl_command *ctrl_cmd) +{ + int idx; + int ret = -ENOMEM; + struct pktcdvd_device *pd; + struct gendisk *disk; + dev_t dev = new_decode_dev(ctrl_cmd->dev); + + for (idx = 0; idx < MAX_WRITERS; idx++) + if (!pkt_devs[idx]) + break; + if (idx == MAX_WRITERS) { + printk("pktcdvd: max %d writers supported\n", MAX_WRITERS); + return -EBUSY; + } + + pd = kmalloc(sizeof(struct pktcdvd_device), GFP_KERNEL); + if (!pd) + return ret; + memset(pd, 0, sizeof(struct pktcdvd_device)); + + pd->rb_pool = mempool_create(PKT_RB_POOL_SIZE, pkt_rb_alloc, pkt_rb_free, NULL); + if (!pd->rb_pool) + goto out_mem; + + disk = alloc_disk(1); + if (!disk) + goto out_mem; + pd->disk = disk; + + spin_lock_init(&pd->lock); + spin_lock_init(&pd->iosched.lock); + sprintf(pd->name, "pktcdvd%d", idx); + init_waitqueue_head(&pd->wqueue); + pd->bio_queue = RB_ROOT; + + disk->major = pkt_major; + disk->first_minor = idx; + disk->fops = &pktcdvd_ops; + disk->flags = GENHD_FL_REMOVABLE; + sprintf(disk->disk_name, "pktcdvd%d", idx); + disk->private_data = pd; + disk->queue = blk_alloc_queue(GFP_KERNEL); + if (!disk->queue) + goto out_mem2; + + pd->pkt_dev = MKDEV(disk->major, disk->first_minor); + ret = pkt_new_dev(pd, dev); + if (ret) + goto out_new_dev; + + add_disk(disk); + pkt_devs[idx] = pd; + ctrl_cmd->pkt_dev = new_encode_dev(pd->pkt_dev); + return 0; + +out_new_dev: + blk_put_queue(disk->queue); +out_mem2: + put_disk(disk); +out_mem: + if (pd->rb_pool) + mempool_destroy(pd->rb_pool); + kfree(pd); + return ret; +} + +/* + * Tear down mapping from pktcdvd device to CD-ROM device. + */ +static int pkt_remove_dev(struct pkt_ctrl_command *ctrl_cmd) +{ + struct pktcdvd_device *pd; + int idx; + dev_t pkt_dev = new_decode_dev(ctrl_cmd->pkt_dev); + + for (idx = 0; idx < MAX_WRITERS; idx++) { + pd = pkt_devs[idx]; + if (pd && (pd->pkt_dev == pkt_dev)) + break; + } + if (idx == MAX_WRITERS) { + DPRINTK("pktcdvd: dev not setup\n"); + return -ENXIO; + } + + if (pd->refcnt > 0) + return -EBUSY; + + if (!IS_ERR(pd->cdrw.thread)) + kthread_stop(pd->cdrw.thread); + + blkdev_put(pd->bdev); + + pkt_shrink_pktlist(pd); + + remove_proc_entry(pd->name, pkt_proc); + DPRINTK("pktcdvd: writer %s unmapped\n", pd->name); + + del_gendisk(pd->disk); + blk_put_queue(pd->disk->queue); + put_disk(pd->disk); + + pkt_devs[idx] = NULL; + mempool_destroy(pd->rb_pool); + kfree(pd); + + /* This is safe: open() is still holding a reference. */ + module_put(THIS_MODULE); + return 0; +} + +static void pkt_get_status(struct pkt_ctrl_command *ctrl_cmd) +{ + struct pktcdvd_device *pd = pkt_find_dev_from_minor(ctrl_cmd->dev_index); + if (pd) { + ctrl_cmd->dev = new_encode_dev(pd->bdev->bd_dev); + ctrl_cmd->pkt_dev = new_encode_dev(pd->pkt_dev); + } else { + ctrl_cmd->dev = 0; + ctrl_cmd->pkt_dev = 0; + } + ctrl_cmd->num_devices = MAX_WRITERS; +} + +static int pkt_ctl_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg) +{ + void __user *argp = (void __user *)arg; + struct pkt_ctrl_command ctrl_cmd; + int ret = 0; + + if (cmd != PACKET_CTRL_CMD) + return -ENOTTY; + + if (copy_from_user(&ctrl_cmd, argp, sizeof(struct pkt_ctrl_command))) + return -EFAULT; + + switch (ctrl_cmd.command) { + case PKT_CTRL_CMD_SETUP: + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + down(&ctl_mutex); + ret = pkt_setup_dev(&ctrl_cmd); + up(&ctl_mutex); + break; + case PKT_CTRL_CMD_TEARDOWN: + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + down(&ctl_mutex); + ret = pkt_remove_dev(&ctrl_cmd); + up(&ctl_mutex); + break; + case PKT_CTRL_CMD_STATUS: + down(&ctl_mutex); + pkt_get_status(&ctrl_cmd); + up(&ctl_mutex); + break; + default: + return -ENOTTY; + } + + if (copy_to_user(argp, &ctrl_cmd, sizeof(struct pkt_ctrl_command))) + return -EFAULT; + return ret; +} + + +static struct file_operations pkt_ctl_fops = { + .ioctl = pkt_ctl_ioctl, + .owner = THIS_MODULE, +}; + +static struct miscdevice pkt_misc = { + .minor = MISC_DYNAMIC_MINOR, + .name = "pktcdvd", + .devfs_name = "pktcdvd/control", + .fops = &pkt_ctl_fops +}; + +int pkt_init(void) +{ + int ret; + + psd_pool = mempool_create(PSD_POOL_SIZE, psd_pool_alloc, psd_pool_free, NULL); + if (!psd_pool) + return -ENOMEM; + + ret = register_blkdev(pkt_major, "pktcdvd"); + if (ret < 0) { + printk("pktcdvd: Unable to register block device\n"); + goto out2; + } + if (!pkt_major) + pkt_major = ret; + + ret = misc_register(&pkt_misc); + if (ret) { + printk("pktcdvd: Unable to register misc device\n"); + goto out; + } + + init_MUTEX(&ctl_mutex); + + pkt_proc = proc_mkdir("pktcdvd", proc_root_driver); + + DPRINTK("pktcdvd: %s\n", VERSION_CODE); + return 0; + +out: + unregister_blkdev(pkt_major, "pktcdvd"); +out2: + mempool_destroy(psd_pool); + return ret; +} + +void pkt_exit(void) +{ + remove_proc_entry("pktcdvd", proc_root_driver); + misc_deregister(&pkt_misc); + unregister_blkdev(pkt_major, "pktcdvd"); + mempool_destroy(psd_pool); +} + +MODULE_DESCRIPTION("Packet writing layer for CD/DVD drives"); +MODULE_AUTHOR("Jens Axboe <axboe@suse.de>"); +MODULE_LICENSE("GPL"); + +module_init(pkt_init); +module_exit(pkt_exit); diff --git a/drivers/block/ub.c b/drivers/block/ub.c index f605535d3f56..dd3be0a06219 100644 --- a/drivers/block/ub.c +++ b/drivers/block/ub.c @@ -25,6 +25,7 @@ * -- prune comments, they are too volumnous * -- Exterminate P3 printks * -- Resove XXX's + * -- Redo "benh's retries", perhaps have spin-up code to handle them. V:D=? */ #include <linux/kernel.h> #include <linux/module.h> @@ -62,9 +63,9 @@ /* command block wrapper */ struct bulk_cb_wrap { - u32 Signature; /* contains 'USBC' */ + __le32 Signature; /* contains 'USBC' */ u32 Tag; /* unique per command id */ - u32 DataTransferLength; /* size of data */ + __le32 DataTransferLength; /* size of data */ u8 Flags; /* direction in bit 0 */ u8 Lun; /* LUN normally 0 */ u8 Length; /* of of the CDB */ @@ -78,9 +79,9 @@ struct bulk_cb_wrap { /* command status wrapper */ struct bulk_cs_wrap { - u32 Signature; /* should = 'USBS' */ + __le32 Signature; /* should = 'USBS' */ u32 Tag; /* same as original command */ - u32 Residue; /* amount not transferred */ + __le32 Residue; /* amount not transferred */ u8 Status; /* see below */ }; @@ -157,7 +158,8 @@ struct ub_scsi_cmd { struct ub_scsi_cmd *next; int error; /* Return code - valid upon done */ - int act_len; /* Return size */ + unsigned int act_len; /* Return size */ + unsigned char key, asc, ascq; /* May be valid if error==-EIO */ int stat_count; /* Retries getting status. */ @@ -490,6 +492,18 @@ static void ub_id_put(int id) */ static void ub_cleanup(struct ub_dev *sc) { + + /* + * If we zero disk->private_data BEFORE put_disk, we have to check + * for NULL all over the place in open, release, check_media and + * revalidate, because the block level semaphore is well inside the + * put_disk. But we cannot zero after the call, because *disk is gone. + * The sd.c is blatantly racy in this area. + */ + /* disk->private_data = NULL; */ + put_disk(sc->disk); + sc->disk = NULL; + ub_id_put(sc->id); kfree(sc); } @@ -661,9 +675,12 @@ static inline int ub_bd_rq_fn_1(request_queue_t *q) /* * build the command + * + * The call to blk_queue_hardsect_size() guarantees that request + * is aligned, but it is given in terms of 512 byte units, always. */ - block = rq->sector; - nblks = rq->nr_sectors; + block = rq->sector >> sc->capacity.bshift; + nblks = rq->nr_sectors >> sc->capacity.bshift; memset(cmd, 0, sizeof(struct ub_scsi_cmd)); cmd->cdb[0] = (ub_dir == UB_DIR_READ)? READ_10: WRITE_10; @@ -678,7 +695,7 @@ static inline int ub_bd_rq_fn_1(request_queue_t *q) cmd->dir = ub_dir; cmd->state = UB_CMDST_INIT; cmd->data = rq->buffer; - cmd->len = nblks * 512; + cmd->len = rq->nr_sectors * 512; cmd->done = ub_rw_cmd_done; cmd->back = rq; @@ -786,17 +803,16 @@ static int ub_scsi_cmd_start(struct ub_dev *sc, struct ub_scsi_cmd *cmd) sc->work_urb.error_count = 0; sc->work_urb.status = 0; - sc->work_timer.expires = jiffies + UB_URB_TIMEOUT; - add_timer(&sc->work_timer); - if ((rc = usb_submit_urb(&sc->work_urb, GFP_ATOMIC)) != 0) { /* XXX Clear stalls */ printk("ub: cmd #%d start failed (%d)\n", cmd->tag, rc); /* P3 */ - del_timer(&sc->work_timer); ub_complete(&sc->work_done); return rc; } + sc->work_timer.expires = jiffies + UB_URB_TIMEOUT; + add_timer(&sc->work_timer); + cmd->state = UB_CMDST_CMD; ub_cmdtr_state(sc, cmd); return 0; @@ -836,6 +852,7 @@ static void ub_scsi_action(unsigned long _dev) unsigned long flags; spin_lock_irqsave(&sc->lock, flags); + del_timer(&sc->work_timer); ub_scsi_dispatch(sc); spin_unlock_irqrestore(&sc->lock, flags); } @@ -968,18 +985,17 @@ static void ub_scsi_urb_compl(struct ub_dev *sc, struct ub_scsi_cmd *cmd) sc->work_urb.error_count = 0; sc->work_urb.status = 0; - sc->work_timer.expires = jiffies + UB_URB_TIMEOUT; - add_timer(&sc->work_timer); - if ((rc = usb_submit_urb(&sc->work_urb, GFP_ATOMIC)) != 0) { /* XXX Clear stalls */ printk("ub: data #%d submit failed (%d)\n", cmd->tag, rc); /* P3 */ - del_timer(&sc->work_timer); ub_complete(&sc->work_done); ub_state_done(sc, cmd, rc); return; } + sc->work_timer.expires = jiffies + UB_URB_TIMEOUT; + add_timer(&sc->work_timer); + cmd->state = UB_CMDST_DATA; ub_cmdtr_state(sc, cmd); @@ -1063,19 +1079,18 @@ static void ub_scsi_urb_compl(struct ub_dev *sc, struct ub_scsi_cmd *cmd) sc->work_urb.error_count = 0; sc->work_urb.status = 0; - sc->work_timer.expires = jiffies + UB_URB_TIMEOUT; - add_timer(&sc->work_timer); - rc = usb_submit_urb(&sc->work_urb, GFP_ATOMIC); if (rc != 0) { /* XXX Clear stalls */ printk("%s: CSW #%d submit failed (%d)\n", sc->name, cmd->tag, rc); /* P3 */ - del_timer(&sc->work_timer); ub_complete(&sc->work_done); ub_state_done(sc, cmd, rc); return; } + + sc->work_timer.expires = jiffies + UB_URB_TIMEOUT; + add_timer(&sc->work_timer); return; } @@ -1132,16 +1147,8 @@ static void ub_scsi_urb_compl(struct ub_dev *sc, struct ub_scsi_cmd *cmd) (*cmd->done)(sc, cmd); } else if (cmd->state == UB_CMDST_SENSE) { - /* - * We do not look at sense, because even if there was no sense, - * we get into UB_CMDST_SENSE from a STALL or CSW FAIL only. - * We request sense because we want to clear CHECK CONDITION - * on devices with delusions of SCSI, and not because we - * are curious in any way about the sense itself. - */ - /* if ((cmd->top_sense[2] & 0x0F) == NO_SENSE) { foo } */ - ub_state_done(sc, cmd, -EIO); + } else { printk(KERN_WARNING "%s: " "wrong command state %d on device %u\n", @@ -1186,18 +1193,17 @@ static void ub_state_stat(struct ub_dev *sc, struct ub_scsi_cmd *cmd) sc->work_urb.error_count = 0; sc->work_urb.status = 0; - sc->work_timer.expires = jiffies + UB_URB_TIMEOUT; - add_timer(&sc->work_timer); - if ((rc = usb_submit_urb(&sc->work_urb, GFP_ATOMIC)) != 0) { /* XXX Clear stalls */ printk("ub: CSW #%d submit failed (%d)\n", cmd->tag, rc); /* P3 */ - del_timer(&sc->work_timer); ub_complete(&sc->work_done); ub_state_done(sc, cmd, rc); return; } + sc->work_timer.expires = jiffies + UB_URB_TIMEOUT; + add_timer(&sc->work_timer); + cmd->stat_count = 0; cmd->state = UB_CMDST_STAT; ub_cmdtr_state(sc, cmd); @@ -1217,9 +1223,17 @@ static void ub_state_sense(struct ub_dev *sc, struct ub_scsi_cmd *cmd) goto error; } + /* + * ``If the allocation length is eighteen or greater, and a device + * server returns less than eithteen bytes of data, the application + * client should assume that the bytes not transferred would have been + * zeroes had the device server returned those bytes.'' + */ memset(&sc->top_sense, 0, UB_SENSE_SIZE); + scmd = &sc->top_rqs_cmd; scmd->cdb[0] = REQUEST_SENSE; + scmd->cdb[4] = UB_SENSE_SIZE; scmd->cdb_len = 6; scmd->dir = UB_DIR_READ; scmd->state = UB_CMDST_INIT; @@ -1271,14 +1285,13 @@ static int ub_submit_clear_stall(struct ub_dev *sc, struct ub_scsi_cmd *cmd, sc->work_urb.error_count = 0; sc->work_urb.status = 0; - sc->work_timer.expires = jiffies + UB_CTRL_TIMEOUT; - add_timer(&sc->work_timer); - if ((rc = usb_submit_urb(&sc->work_urb, GFP_ATOMIC)) != 0) { - del_timer(&sc->work_timer); ub_complete(&sc->work_done); return rc; } + + sc->work_timer.expires = jiffies + UB_CTRL_TIMEOUT; + add_timer(&sc->work_timer); return 0; } @@ -1289,8 +1302,15 @@ static void ub_top_sense_done(struct ub_dev *sc, struct ub_scsi_cmd *scmd) unsigned char *sense = scmd->data; struct ub_scsi_cmd *cmd; + /* + * Ignoring scmd->act_len, because the buffer was pre-zeroed. + */ ub_cmdtr_sense(sc, scmd, sense); + /* + * Find the command which triggered the unit attention or a check, + * save the sense into it, and advance its state machine. + */ if ((cmd = ub_cmdq_peek(sc)) == NULL) { printk(KERN_WARNING "%s: sense done while idle\n", sc->name); return; @@ -1308,6 +1328,10 @@ static void ub_top_sense_done(struct ub_dev *sc, struct ub_scsi_cmd *scmd) return; } + cmd->key = sense[2] & 0x0F; + cmd->asc = sense[12]; + cmd->ascq = sense[13]; + ub_scsi_urb_compl(sc, cmd); } @@ -1407,7 +1431,15 @@ static int ub_bd_open(struct inode *inode, struct file *filp) if (sc->removable || sc->readonly) check_disk_change(inode->i_bdev); - /* XXX sd.c and floppy.c bail on open if media is not present. */ + /* + * The sd.c considers ->media_present and ->changed not equivalent, + * under some pretty murky conditions (a failure of READ CAPACITY). + * We may need it one day. + */ + if (sc->removable && sc->changed && !(filp->f_flags & O_NDELAY)) { + rc = -ENOMEDIUM; + goto err_open; + } if (sc->readonly && (filp->f_mode & FMODE_WRITE)) { rc = -EROFS; @@ -1492,8 +1524,11 @@ static int ub_bd_revalidate(struct gendisk *disk) printk(KERN_INFO "%s: device %u capacity nsec %ld bsize %u\n", sc->name, sc->dev->devnum, sc->capacity.nsec, sc->capacity.bsize); + /* XXX Support sector size switching like in sr.c */ + blk_queue_hardsect_size(disk->queue, sc->capacity.bsize); set_capacity(disk, sc->capacity.nsec); // set_disk_ro(sdkp->disk, sc->readonly); + return 0; } @@ -1592,6 +1627,9 @@ static int ub_sync_tur(struct ub_dev *sc) rc = cmd->error; + if (rc == -EIO && cmd->key != 0) /* Retries for benh's key */ + rc = cmd->key; + err_submit: kfree(cmd); err_alloc: @@ -1654,8 +1692,8 @@ static int ub_sync_read_cap(struct ub_dev *sc, struct ub_capacity *ret) } /* sd.c special-cases sector size of 0 to mean 512. Needed? Safe? */ - nsec = be32_to_cpu(*(u32 *)p) + 1; - bsize = be32_to_cpu(*(u32 *)(p + 4)); + nsec = be32_to_cpu(*(__be32 *)p) + 1; + bsize = be32_to_cpu(*(__be32 *)(p + 4)); switch (bsize) { case 512: shift = 0; break; case 1024: shift = 1; break; @@ -1725,28 +1763,22 @@ static int ub_probe_clear_stall(struct ub_dev *sc, int stalled_pipe) sc->work_urb.error_count = 0; sc->work_urb.status = 0; - init_timer(&timer); - timer.function = ub_probe_timeout; - timer.data = (unsigned long) &compl; - timer.expires = jiffies + UB_CTRL_TIMEOUT; - add_timer(&timer); - if ((rc = usb_submit_urb(&sc->work_urb, GFP_KERNEL)) != 0) { printk(KERN_WARNING "%s: Unable to submit a probe clear (%d)\n", sc->name, rc); - del_timer_sync(&timer); return rc; } + init_timer(&timer); + timer.function = ub_probe_timeout; + timer.data = (unsigned long) &compl; + timer.expires = jiffies + UB_CTRL_TIMEOUT; + add_timer(&timer); + wait_for_completion(&compl); del_timer_sync(&timer); - /* - * Most of the time, URB was done and dev set to NULL, and so - * the unlink bounces out with ENODEV. We do not call usb_kill_urb - * because we still think about a backport to 2.4. - */ - usb_unlink_urb(&sc->work_urb); + usb_kill_urb(&sc->work_urb); /* reset the endpoint toggle */ usb_settoggle(sc->dev, endp, usb_pipeout(sc->last_pipe), 0); @@ -1813,6 +1845,7 @@ static int ub_probe(struct usb_interface *intf, request_queue_t *q; struct gendisk *disk; int rc; + int i; rc = -ENOMEM; if ((sc = kmalloc(sizeof(struct ub_dev), GFP_KERNEL)) == NULL) @@ -1879,7 +1912,11 @@ static int ub_probe(struct usb_interface *intf, * has to succeed, so we clear checks with an additional one here. * In any case it's not our business how revaliadation is implemented. */ - ub_sync_tur(sc); + for (i = 0; i < 3; i++) { /* Retries for benh's key */ + if ((rc = ub_sync_tur(sc)) <= 0) break; + if (rc != 0x6) break; + msleep(10); + } sc->removable = 1; /* XXX Query this from the device */ @@ -1915,7 +1952,7 @@ static int ub_probe(struct usb_interface *intf, blk_queue_max_phys_segments(q, UB_MAX_REQ_SG); // blk_queue_segment_boundary(q, CARM_SG_BOUNDARY); blk_queue_max_sectors(q, UB_MAX_SECTORS); - // blk_queue_hardsect_size(q, xxxxx); + blk_queue_hardsect_size(q, sc->capacity.bsize); /* * This is a serious infraction, caused by a deficiency in the @@ -2006,17 +2043,6 @@ static void ub_disconnect(struct usb_interface *intf) blk_cleanup_queue(q); /* - * If we zero disk->private_data BEFORE put_disk, we have to check - * for NULL all over the place in open, release, check_media and - * revalidate, because the block level semaphore is well inside the - * put_disk. But we cannot zero after the call, because *disk is gone. - * The sd.c is blatantly racy in this area. - */ - /* disk->private_data = NULL; */ - put_disk(disk); - sc->disk = NULL; - - /* * We really expect blk_cleanup_queue() to wait, so no amount * of paranoya is too much. * @@ -2035,6 +2061,13 @@ static void ub_disconnect(struct usb_interface *intf) spin_unlock_irqrestore(&sc->lock, flags); /* + * There is virtually no chance that other CPU runs times so long + * after ub_urb_complete should have called del_timer, but only if HCD + * didn't forget to deliver a callback on unlink. + */ + del_timer_sync(&sc->work_timer); + + /* * At this point there must be no commands coming from anyone * and no URBs left in transit. */ |
