summaryrefslogtreecommitdiff
path: root/drivers/block
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@ppc970.osdl.org>2004-10-18 20:50:22 -0700
committerLinus Torvalds <torvalds@ppc970.osdl.org>2004-10-18 20:50:22 -0700
commit098fc560ef2bbd1bde80845c898fa95db616eb6c (patch)
treeca722c6fdbdffe9b7cfd31d61e8f4aae906a319c /drivers/block
parentbffe01870598b7a0a77073e25ee94e026bc98e6b (diff)
parent2a136606fe21b603a0ce484fc578f862f8e8384d (diff)
Trivial Makefile merge
Diffstat (limited to 'drivers/block')
-rw-r--r--drivers/block/Kconfig33
-rw-r--r--drivers/block/Kconfig.iosched8
-rw-r--r--drivers/block/Makefile1
-rw-r--r--drivers/block/as-iosched.c122
-rw-r--r--drivers/block/cfq-iosched.c1574
-rw-r--r--drivers/block/cpqarray.c14
-rw-r--r--drivers/block/deadline-iosched.c136
-rw-r--r--drivers/block/elevator.c318
-rw-r--r--drivers/block/ll_rw_blk.c253
-rw-r--r--drivers/block/noop-iosched.c33
-rw-r--r--drivers/block/pktcdvd.c2679
-rw-r--r--drivers/block/ub.c165
12 files changed, 4735 insertions, 601 deletions
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index a1d50242b8cd..6a43c807497d 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -356,6 +356,39 @@ config LBD
your machine, or if you want to have a raid or loopback device
bigger than 2TB. Otherwise say N.
+config CDROM_PKTCDVD
+ tristate "Packet writing on CD/DVD media"
+ help
+ If you have a CDROM drive that supports packet writing, say Y to
+ include preliminary support. It should work with any MMC/Mt Fuji
+ compliant ATAPI or SCSI drive, which is just about any newer CD
+ writer.
+
+ Currently only writing to CD-RW, DVD-RW and DVD+RW discs is possible.
+ DVD-RW disks must be in restricted overwrite mode.
+
+ To compile this driver as a module, choose M here: the
+ module will be called pktcdvd.
+
+config CDROM_PKTCDVD_BUFFERS
+ int "Free buffers for data gathering"
+ depends on CDROM_PKTCDVD
+ default "8"
+ help
+ This controls the maximum number of active concurrent packets. More
+ concurrent packets can increase write performance, but also require
+ more memory. Each concurrent packet will require approximately 64Kb
+ of non-swappable kernel memory, memory which will be allocated at
+ pktsetup time.
+
+config CDROM_PKTCDVD_WCACHE
+ bool "Enable write caching"
+ depends on CDROM_PKTCDVD
+ help
+ If enabled, write caching will be set for the CD-R/W device. For now
+ this option is dangerous unless the CD-RW media is known good, as we
+ don't do deferred write error handling yet.
+
source "drivers/s390/block/Kconfig"
endmenu
diff --git a/drivers/block/Kconfig.iosched b/drivers/block/Kconfig.iosched
index d938c5fd130b..e0ba6c93717e 100644
--- a/drivers/block/Kconfig.iosched
+++ b/drivers/block/Kconfig.iosched
@@ -1,5 +1,5 @@
config IOSCHED_NOOP
- bool "No-op I/O scheduler" if EMBEDDED
+ bool
default y
---help---
The no-op I/O scheduler is a minimal scheduler that does basic merging
@@ -9,7 +9,7 @@ config IOSCHED_NOOP
the kernel.
config IOSCHED_AS
- bool "Anticipatory I/O scheduler" if EMBEDDED
+ tristate "Anticipatory I/O scheduler"
default y
---help---
The anticipatory I/O scheduler is the default disk scheduler. It is
@@ -18,7 +18,7 @@ config IOSCHED_AS
slower in some cases especially some database loads.
config IOSCHED_DEADLINE
- bool "Deadline I/O scheduler" if EMBEDDED
+ tristate "Deadline I/O scheduler"
default y
---help---
The deadline I/O scheduler is simple and compact, and is often as
@@ -28,7 +28,7 @@ config IOSCHED_DEADLINE
anticipatory I/O scheduler and so is a good choice.
config IOSCHED_CFQ
- bool "CFQ I/O scheduler" if EMBEDDED
+ tristate "CFQ I/O scheduler"
default y
---help---
The CFQ I/O scheduler tries to distribute bandwidth equally
diff --git a/drivers/block/Makefile b/drivers/block/Makefile
index c8fbbf14ce94..1cf09a1c065b 100644
--- a/drivers/block/Makefile
+++ b/drivers/block/Makefile
@@ -35,6 +35,7 @@ obj-$(CONFIG_BLK_DEV_XD) += xd.o
obj-$(CONFIG_BLK_CPQ_DA) += cpqarray.o
obj-$(CONFIG_BLK_CPQ_CISS_DA) += cciss.o
obj-$(CONFIG_BLK_DEV_DAC960) += DAC960.o
+obj-$(CONFIG_CDROM_PKTCDVD) += pktcdvd.o
obj-$(CONFIG_BLK_DEV_UMEM) += umem.o
obj-$(CONFIG_BLK_DEV_NBD) += nbd.o
diff --git a/drivers/block/as-iosched.c b/drivers/block/as-iosched.c
index 0ef6a665d93e..0aa3ee8c309b 100644
--- a/drivers/block/as-iosched.c
+++ b/drivers/block/as-iosched.c
@@ -614,7 +614,7 @@ static void as_antic_stop(struct as_data *ad)
static void as_antic_timeout(unsigned long data)
{
struct request_queue *q = (struct request_queue *)data;
- struct as_data *ad = q->elevator.elevator_data;
+ struct as_data *ad = q->elevator->elevator_data;
unsigned long flags;
spin_lock_irqsave(q->queue_lock, flags);
@@ -945,7 +945,7 @@ static void update_write_batch(struct as_data *ad)
*/
static void as_completed_request(request_queue_t *q, struct request *rq)
{
- struct as_data *ad = q->elevator.elevator_data;
+ struct as_data *ad = q->elevator->elevator_data;
struct as_rq *arq = RQ_DATA(rq);
WARN_ON(!list_empty(&rq->queuelist));
@@ -1030,7 +1030,7 @@ static void as_remove_queued_request(request_queue_t *q, struct request *rq)
{
struct as_rq *arq = RQ_DATA(rq);
const int data_dir = arq->is_sync;
- struct as_data *ad = q->elevator.elevator_data;
+ struct as_data *ad = q->elevator->elevator_data;
WARN_ON(arq->state != AS_RQ_QUEUED);
@@ -1361,7 +1361,7 @@ fifo_expired:
static struct request *as_next_request(request_queue_t *q)
{
- struct as_data *ad = q->elevator.elevator_data;
+ struct as_data *ad = q->elevator->elevator_data;
struct request *rq = NULL;
/*
@@ -1469,7 +1469,7 @@ static void as_add_request(struct as_data *ad, struct as_rq *arq)
*/
static void as_requeue_request(request_queue_t *q, struct request *rq)
{
- struct as_data *ad = q->elevator.elevator_data;
+ struct as_data *ad = q->elevator->elevator_data;
struct as_rq *arq = RQ_DATA(rq);
if (arq) {
@@ -1509,7 +1509,7 @@ static void as_account_queued_request(struct as_data *ad, struct request *rq)
static void
as_insert_request(request_queue_t *q, struct request *rq, int where)
{
- struct as_data *ad = q->elevator.elevator_data;
+ struct as_data *ad = q->elevator->elevator_data;
struct as_rq *arq = RQ_DATA(rq);
if (arq) {
@@ -1562,7 +1562,7 @@ as_insert_request(request_queue_t *q, struct request *rq, int where)
*/
static int as_queue_empty(request_queue_t *q)
{
- struct as_data *ad = q->elevator.elevator_data;
+ struct as_data *ad = q->elevator->elevator_data;
if (!list_empty(&ad->fifo_list[REQ_ASYNC])
|| !list_empty(&ad->fifo_list[REQ_SYNC])
@@ -1601,7 +1601,7 @@ as_latter_request(request_queue_t *q, struct request *rq)
static int
as_merge(request_queue_t *q, struct request **req, struct bio *bio)
{
- struct as_data *ad = q->elevator.elevator_data;
+ struct as_data *ad = q->elevator->elevator_data;
sector_t rb_key = bio->bi_sector + bio_sectors(bio);
struct request *__rq;
int ret;
@@ -1656,7 +1656,7 @@ out_insert:
static void as_merged_request(request_queue_t *q, struct request *req)
{
- struct as_data *ad = q->elevator.elevator_data;
+ struct as_data *ad = q->elevator->elevator_data;
struct as_rq *arq = RQ_DATA(req);
/*
@@ -1701,7 +1701,7 @@ static void
as_merged_requests(request_queue_t *q, struct request *req,
struct request *next)
{
- struct as_data *ad = q->elevator.elevator_data;
+ struct as_data *ad = q->elevator->elevator_data;
struct as_rq *arq = RQ_DATA(req);
struct as_rq *anext = RQ_DATA(next);
@@ -1788,7 +1788,7 @@ static void as_work_handler(void *data)
static void as_put_request(request_queue_t *q, struct request *rq)
{
- struct as_data *ad = q->elevator.elevator_data;
+ struct as_data *ad = q->elevator->elevator_data;
struct as_rq *arq = RQ_DATA(rq);
if (!arq) {
@@ -1807,7 +1807,7 @@ static void as_put_request(request_queue_t *q, struct request *rq)
static int as_set_request(request_queue_t *q, struct request *rq, int gfp_mask)
{
- struct as_data *ad = q->elevator.elevator_data;
+ struct as_data *ad = q->elevator->elevator_data;
struct as_rq *arq = mempool_alloc(ad->arq_pool, gfp_mask);
if (arq) {
@@ -1828,21 +1828,21 @@ static int as_set_request(request_queue_t *q, struct request *rq, int gfp_mask)
static int as_may_queue(request_queue_t *q, int rw)
{
- int ret = 0;
- struct as_data *ad = q->elevator.elevator_data;
+ int ret = ELV_MQUEUE_MAY;
+ struct as_data *ad = q->elevator->elevator_data;
struct io_context *ioc;
if (ad->antic_status == ANTIC_WAIT_REQ ||
ad->antic_status == ANTIC_WAIT_NEXT) {
ioc = as_get_io_context();
if (ad->io_context == ioc)
- ret = 1;
+ ret = ELV_MQUEUE_MUST;
put_io_context(ioc);
}
return ret;
}
-static void as_exit(request_queue_t *q, elevator_t *e)
+static void as_exit_queue(elevator_t *e)
{
struct as_data *ad = e->elevator_data;
@@ -1862,7 +1862,7 @@ static void as_exit(request_queue_t *q, elevator_t *e)
* initialize elevator private data (as_data), and alloc a arq for
* each request on the free lists
*/
-static int as_init(request_queue_t *q, elevator_t *e)
+static int as_init_queue(request_queue_t *q, elevator_t *e)
{
struct as_data *ad;
int i;
@@ -1962,10 +1962,10 @@ static ssize_t as_est_show(struct as_data *ad, char *page)
return pos;
}
-#define SHOW_FUNCTION(__FUNC, __VAR) \
+#define SHOW_FUNCTION(__FUNC, __VAR) \
static ssize_t __FUNC(struct as_data *ad, char *page) \
-{ \
- return as_var_show(__VAR, (page)); \
+{ \
+ return as_var_show(jiffies_to_msecs((__VAR)), (page)); \
}
SHOW_FUNCTION(as_readexpire_show, ad->fifo_expire[REQ_SYNC]);
SHOW_FUNCTION(as_writeexpire_show, ad->fifo_expire[REQ_ASYNC]);
@@ -1982,6 +1982,7 @@ static ssize_t __FUNC(struct as_data *ad, const char *page, size_t count) \
*(__PTR) = (MIN); \
else if (*(__PTR) > (MAX)) \
*(__PTR) = (MAX); \
+ *(__PTR) = msecs_to_jiffies(*(__PTR)); \
return ret; \
}
STORE_FUNCTION(as_readexpire_store, &ad->fifo_expire[REQ_SYNC], 0, INT_MAX);
@@ -2070,39 +2071,64 @@ static struct kobj_type as_ktype = {
.default_attrs = default_attrs,
};
-static int __init as_slab_setup(void)
+static struct elevator_type iosched_as = {
+ .ops = {
+ .elevator_merge_fn = as_merge,
+ .elevator_merged_fn = as_merged_request,
+ .elevator_merge_req_fn = as_merged_requests,
+ .elevator_next_req_fn = as_next_request,
+ .elevator_add_req_fn = as_insert_request,
+ .elevator_remove_req_fn = as_remove_request,
+ .elevator_requeue_req_fn = as_requeue_request,
+ .elevator_queue_empty_fn = as_queue_empty,
+ .elevator_completed_req_fn = as_completed_request,
+ .elevator_former_req_fn = as_former_request,
+ .elevator_latter_req_fn = as_latter_request,
+ .elevator_set_req_fn = as_set_request,
+ .elevator_put_req_fn = as_put_request,
+ .elevator_may_queue_fn = as_may_queue,
+ .elevator_init_fn = as_init_queue,
+ .elevator_exit_fn = as_exit_queue,
+ },
+
+ .elevator_ktype = &as_ktype,
+ .elevator_name = "anticipatory",
+ .elevator_owner = THIS_MODULE,
+};
+
+int as_init(void)
{
+ int ret;
+
arq_pool = kmem_cache_create("as_arq", sizeof(struct as_rq),
0, 0, NULL, NULL);
-
if (!arq_pool)
- panic("as: can't init slab pool\n");
+ return -ENOMEM;
- return 0;
+ ret = elv_register(&iosched_as);
+ if (!ret) {
+ /*
+ * don't allow AS to get unregistered, since we would have
+ * to browse all tasks in the system and release their
+ * as_io_context first
+ */
+ __module_get(THIS_MODULE);
+ return 0;
+ }
+
+ kmem_cache_destroy(arq_pool);
+ return ret;
}
-subsys_initcall(as_slab_setup);
-
-elevator_t iosched_as = {
- .elevator_merge_fn = as_merge,
- .elevator_merged_fn = as_merged_request,
- .elevator_merge_req_fn = as_merged_requests,
- .elevator_next_req_fn = as_next_request,
- .elevator_add_req_fn = as_insert_request,
- .elevator_remove_req_fn = as_remove_request,
- .elevator_requeue_req_fn = as_requeue_request,
- .elevator_queue_empty_fn = as_queue_empty,
- .elevator_completed_req_fn = as_completed_request,
- .elevator_former_req_fn = as_former_request,
- .elevator_latter_req_fn = as_latter_request,
- .elevator_set_req_fn = as_set_request,
- .elevator_put_req_fn = as_put_request,
- .elevator_may_queue_fn = as_may_queue,
- .elevator_init_fn = as_init,
- .elevator_exit_fn = as_exit,
-
- .elevator_ktype = &as_ktype,
- .elevator_name = "anticipatory",
-};
+void as_exit(void)
+{
+ kmem_cache_destroy(arq_pool);
+ elv_unregister(&iosched_as);
+}
+
+module_init(as_init);
+module_exit(as_exit);
-EXPORT_SYMBOL(iosched_as);
+MODULE_AUTHOR("Nick Piggin");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("anticipatory IO scheduler");
diff --git a/drivers/block/cfq-iosched.c b/drivers/block/cfq-iosched.c
index 068f4eae0b5c..cf7fc7609e67 100644
--- a/drivers/block/cfq-iosched.c
+++ b/drivers/block/cfq-iosched.c
@@ -22,96 +22,216 @@
#include <linux/rbtree.h>
#include <linux/mempool.h>
+static unsigned long max_elapsed_crq;
+static unsigned long max_elapsed_dispatch;
+
/*
* tunables
*/
-static int cfq_quantum = 4;
-static int cfq_queued = 8;
+static int cfq_quantum = 4; /* max queue in one round of service */
+static int cfq_queued = 8; /* minimum rq allocate limit per-queue*/
+static int cfq_service = HZ; /* period over which service is avg */
+static int cfq_fifo_expire_r = HZ / 2; /* fifo timeout for sync requests */
+static int cfq_fifo_expire_w = 5 * HZ; /* fifo timeout for async requests */
+static int cfq_fifo_rate = HZ / 8; /* fifo expiry rate */
+static int cfq_back_max = 16 * 1024; /* maximum backwards seek, in KiB */
+static int cfq_back_penalty = 2; /* penalty of a backwards seek */
+/*
+ * for the hash of cfqq inside the cfqd
+ */
#define CFQ_QHASH_SHIFT 6
#define CFQ_QHASH_ENTRIES (1 << CFQ_QHASH_SHIFT)
-#define list_entry_qhash(entry) list_entry((entry), struct cfq_queue, cfq_hash)
+#define list_entry_qhash(entry) hlist_entry((entry), struct cfq_queue, cfq_hash)
-#define CFQ_MHASH_SHIFT 8
+/*
+ * for the hash of crq inside the cfqq
+ */
+#define CFQ_MHASH_SHIFT 6
#define CFQ_MHASH_BLOCK(sec) ((sec) >> 3)
#define CFQ_MHASH_ENTRIES (1 << CFQ_MHASH_SHIFT)
-#define CFQ_MHASH_FN(sec) (hash_long(CFQ_MHASH_BLOCK((sec)),CFQ_MHASH_SHIFT))
-#define ON_MHASH(crq) !list_empty(&(crq)->hash)
+#define CFQ_MHASH_FN(sec) hash_long(CFQ_MHASH_BLOCK(sec), CFQ_MHASH_SHIFT)
#define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors)
-#define list_entry_hash(ptr) list_entry((ptr), struct cfq_rq, hash)
+#define list_entry_hash(ptr) hlist_entry((ptr), struct cfq_rq, hash)
#define list_entry_cfqq(ptr) list_entry((ptr), struct cfq_queue, cfq_list)
-#define RQ_DATA(rq) ((struct cfq_rq *) (rq)->elevator_private)
+#define RQ_DATA(rq) (rq)->elevator_private
+
+/*
+ * rb-tree defines
+ */
+#define RB_NONE (2)
+#define RB_EMPTY(node) ((node)->rb_node == NULL)
+#define RB_CLEAR_COLOR(node) (node)->rb_color = RB_NONE
+#define RB_CLEAR(node) do { \
+ (node)->rb_parent = NULL; \
+ RB_CLEAR_COLOR((node)); \
+ (node)->rb_right = NULL; \
+ (node)->rb_left = NULL; \
+} while (0)
+#define RB_CLEAR_ROOT(root) ((root)->rb_node = NULL)
+#define ON_RB(node) ((node)->rb_color != RB_NONE)
+#define rb_entry_crq(node) rb_entry((node), struct cfq_rq, rb_node)
+#define rq_rb_key(rq) (rq)->sector
+
+/*
+ * threshold for switching off non-tag accounting
+ */
+#define CFQ_MAX_TAG (4)
+
+/*
+ * sort key types and names
+ */
+enum {
+ CFQ_KEY_PGID,
+ CFQ_KEY_TGID,
+ CFQ_KEY_UID,
+ CFQ_KEY_GID,
+ CFQ_KEY_LAST,
+};
+
+static char *cfq_key_types[] = { "pgid", "tgid", "uid", "gid", NULL };
+
+/*
+ * spare queue
+ */
+#define CFQ_KEY_SPARE (~0UL)
static kmem_cache_t *crq_pool;
static kmem_cache_t *cfq_pool;
-static mempool_t *cfq_mpool;
+static kmem_cache_t *cfq_ioc_pool;
struct cfq_data {
struct list_head rr_list;
- struct list_head *dispatch;
- struct list_head *cfq_hash;
+ struct list_head empty_list;
- struct list_head *crq_hash;
+ struct hlist_head *cfq_hash;
+ struct hlist_head *crq_hash;
+ /* queues on rr_list (ie they have pending requests */
unsigned int busy_queues;
+
unsigned int max_queued;
+ atomic_t ref;
+
+ int key_type;
+
mempool_t *crq_pool;
request_queue_t *queue;
+ sector_t last_sector;
+
+ int rq_in_driver;
+
/*
- * tunables
+ * tunables, see top of file
*/
unsigned int cfq_quantum;
unsigned int cfq_queued;
+ unsigned int cfq_fifo_expire_r;
+ unsigned int cfq_fifo_expire_w;
+ unsigned int cfq_fifo_batch_expire;
+ unsigned int cfq_back_penalty;
+ unsigned int cfq_back_max;
+ unsigned int find_best_crq;
+
+ unsigned int cfq_tagged;
};
struct cfq_queue {
- struct list_head cfq_hash;
+ /* reference count */
+ atomic_t ref;
+ /* parent cfq_data */
+ struct cfq_data *cfqd;
+ /* hash of mergeable requests */
+ struct hlist_node cfq_hash;
+ /* hash key */
+ unsigned long key;
+ /* whether queue is on rr (or empty) list */
+ int on_rr;
+ /* on either rr or empty list of cfqd */
struct list_head cfq_list;
+ /* sorted list of pending requests */
struct rb_root sort_list;
- int pid;
+ /* if fifo isn't expired, next request to serve */
+ struct cfq_rq *next_crq;
+ /* requests queued in sort_list */
int queued[2];
-#if 0
- /*
- * with a simple addition like this, we can do io priorities. almost.
- * does need a split request free list, too.
- */
- int io_prio
-#endif
+ /* currently allocated requests */
+ int allocated[2];
+ /* fifo list of requests in sort_list */
+ struct list_head fifo[2];
+ /* last time fifo expired */
+ unsigned long last_fifo_expire;
+
+ int key_type;
+
+ unsigned long service_start;
+ unsigned long service_used;
+
+ unsigned int max_rate;
+
+ /* number of requests that have been handed to the driver */
+ int in_flight;
+ /* number of currently allocated requests */
+ int alloc_limit[2];
};
struct cfq_rq {
struct rb_node rb_node;
sector_t rb_key;
-
struct request *request;
+ struct hlist_node hash;
struct cfq_queue *cfq_queue;
+ struct cfq_io_context *io_context;
+
+ unsigned long service_start;
+ unsigned long queue_start;
- struct list_head hash;
+ unsigned int in_flight : 1;
+ unsigned int accounted : 1;
+ unsigned int is_sync : 1;
+ unsigned int is_write : 1;
};
-static void cfq_put_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq);
-static struct cfq_queue *cfq_find_cfq_hash(struct cfq_data *cfqd, int pid);
-static void cfq_dispatch_sort(struct cfq_data *cfqd, struct cfq_queue *cfqq,
- struct cfq_rq *crq);
+static struct cfq_queue *cfq_find_cfq_hash(struct cfq_data *, unsigned long);
+static void cfq_dispatch_sort(request_queue_t *, struct cfq_rq *);
+static void cfq_update_next_crq(struct cfq_rq *);
+static void cfq_put_cfqd(struct cfq_data *cfqd);
/*
- * lots of deadline iosched dupes, can be abstracted later...
+ * what the fairness is based on (ie how processes are grouped and
+ * differentiated)
*/
-static inline void __cfq_del_crq_hash(struct cfq_rq *crq)
+static inline unsigned long
+cfq_hash_key(struct cfq_data *cfqd, struct task_struct *tsk)
{
- list_del_init(&crq->hash);
+ /*
+ * optimize this so that ->key_type is the offset into the struct
+ */
+ switch (cfqd->key_type) {
+ case CFQ_KEY_PGID:
+ return process_group(tsk);
+ default:
+ case CFQ_KEY_TGID:
+ return tsk->tgid;
+ case CFQ_KEY_UID:
+ return tsk->uid;
+ case CFQ_KEY_GID:
+ return tsk->gid;
+ }
}
+/*
+ * lots of deadline iosched dupes, can be abstracted later...
+ */
static inline void cfq_del_crq_hash(struct cfq_rq *crq)
{
- if (ON_MHASH(crq))
- __cfq_del_crq_hash(crq);
+ hlist_del_init(&crq->hash);
}
static void cfq_remove_merge_hints(request_queue_t *q, struct cfq_rq *crq)
@@ -120,32 +240,32 @@ static void cfq_remove_merge_hints(request_queue_t *q, struct cfq_rq *crq)
if (q->last_merge == crq->request)
q->last_merge = NULL;
+
+ cfq_update_next_crq(crq);
}
static inline void cfq_add_crq_hash(struct cfq_data *cfqd, struct cfq_rq *crq)
{
- struct request *rq = crq->request;
+ const int hash_idx = CFQ_MHASH_FN(rq_hash_key(crq->request));
- BUG_ON(ON_MHASH(crq));
+ BUG_ON(!hlist_unhashed(&crq->hash));
- list_add(&crq->hash, &cfqd->crq_hash[CFQ_MHASH_FN(rq_hash_key(rq))]);
+ hlist_add_head(&crq->hash, &cfqd->crq_hash[hash_idx]);
}
static struct request *cfq_find_rq_hash(struct cfq_data *cfqd, sector_t offset)
{
- struct list_head *hash_list = &cfqd->crq_hash[CFQ_MHASH_FN(offset)];
- struct list_head *entry, *next = hash_list->next;
+ struct hlist_head *hash_list = &cfqd->crq_hash[CFQ_MHASH_FN(offset)];
+ struct hlist_node *entry, *next;
- while ((entry = next) != hash_list) {
+ hlist_for_each_safe(entry, next, hash_list) {
struct cfq_rq *crq = list_entry_hash(entry);
struct request *__rq = crq->request;
- next = entry->next;
-
- BUG_ON(!ON_MHASH(crq));
+ BUG_ON(hlist_unhashed(&crq->hash));
if (!rq_mergeable(__rq)) {
- __cfq_del_crq_hash(crq);
+ cfq_del_crq_hash(crq);
continue;
}
@@ -157,29 +277,257 @@ static struct request *cfq_find_rq_hash(struct cfq_data *cfqd, sector_t offset)
}
/*
- * rb tree support functions
+ * Lifted from AS - choose which of crq1 and crq2 that is best served now.
+ * We choose the request that is closest to the head right now. Distance
+ * behind the head are penalized and only allowed to a certain extent.
*/
-#define RB_NONE (2)
-#define RB_EMPTY(node) ((node)->rb_node == NULL)
-#define RB_CLEAR(node) ((node)->rb_color = RB_NONE)
-#define RB_CLEAR_ROOT(root) ((root)->rb_node = NULL)
-#define ON_RB(node) ((node)->rb_color != RB_NONE)
-#define rb_entry_crq(node) rb_entry((node), struct cfq_rq, rb_node)
-#define rq_rb_key(rq) (rq)->sector
+static struct cfq_rq *
+cfq_choose_req(struct cfq_data *cfqd, struct cfq_rq *crq1, struct cfq_rq *crq2)
+{
+ sector_t last, s1, s2, d1 = 0, d2 = 0;
+ int r1_wrap = 0, r2_wrap = 0; /* requests are behind the disk head */
+ unsigned long back_max;
+
+ if (crq1 == NULL || crq1 == crq2)
+ return crq2;
+ if (crq2 == NULL)
+ return crq1;
-static inline void cfq_del_crq_rb(struct cfq_queue *cfqq, struct cfq_rq *crq)
+ s1 = crq1->request->sector;
+ s2 = crq2->request->sector;
+
+ last = cfqd->last_sector;
+
+#if 0
+ if (!list_empty(&cfqd->queue->queue_head)) {
+ struct list_head *entry = &cfqd->queue->queue_head;
+ unsigned long distance = ~0UL;
+ struct request *rq;
+
+ while ((entry = entry->prev) != &cfqd->queue->queue_head) {
+ rq = list_entry_rq(entry);
+
+ if (blk_barrier_rq(rq))
+ break;
+
+ if (distance < abs(s1 - rq->sector + rq->nr_sectors)) {
+ distance = abs(s1 - rq->sector +rq->nr_sectors);
+ last = rq->sector + rq->nr_sectors;
+ }
+ if (distance < abs(s2 - rq->sector + rq->nr_sectors)) {
+ distance = abs(s2 - rq->sector +rq->nr_sectors);
+ last = rq->sector + rq->nr_sectors;
+ }
+ }
+ }
+#endif
+
+ /*
+ * by definition, 1KiB is 2 sectors
+ */
+ back_max = cfqd->cfq_back_max * 2;
+
+ /*
+ * Strict one way elevator _except_ in the case where we allow
+ * short backward seeks which are biased as twice the cost of a
+ * similar forward seek.
+ */
+ if (s1 >= last)
+ d1 = s1 - last;
+ else if (s1 + back_max >= last)
+ d1 = (last - s1) * cfqd->cfq_back_penalty;
+ else
+ r1_wrap = 1;
+
+ if (s2 >= last)
+ d2 = s2 - last;
+ else if (s2 + back_max >= last)
+ d2 = (last - s2) * cfqd->cfq_back_penalty;
+ else
+ r2_wrap = 1;
+
+ /* Found required data */
+ if (!r1_wrap && r2_wrap)
+ return crq1;
+ else if (!r2_wrap && r1_wrap)
+ return crq2;
+ else if (r1_wrap && r2_wrap) {
+ /* both behind the head */
+ if (s1 <= s2)
+ return crq1;
+ else
+ return crq2;
+ }
+
+ /* Both requests in front of the head */
+ if (d1 < d2)
+ return crq1;
+ else if (d2 < d1)
+ return crq2;
+ else {
+ if (s1 >= s2)
+ return crq1;
+ else
+ return crq2;
+ }
+}
+
+/*
+ * would be nice to take fifo expire time into account as well
+ */
+static struct cfq_rq *
+cfq_find_next_crq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
+ struct cfq_rq *last)
+{
+ struct cfq_rq *crq_next = NULL, *crq_prev = NULL;
+ struct rb_node *rbnext, *rbprev;
+
+ if (!ON_RB(&last->rb_node))
+ return NULL;
+
+ if ((rbnext = rb_next(&last->rb_node)) == NULL)
+ rbnext = rb_first(&cfqq->sort_list);
+
+ rbprev = rb_prev(&last->rb_node);
+
+ if (rbprev)
+ crq_prev = rb_entry_crq(rbprev);
+ if (rbnext)
+ crq_next = rb_entry_crq(rbnext);
+
+ return cfq_choose_req(cfqd, crq_next, crq_prev);
+}
+
+static void cfq_update_next_crq(struct cfq_rq *crq)
{
+ struct cfq_queue *cfqq = crq->cfq_queue;
+
+ if (cfqq->next_crq == crq)
+ cfqq->next_crq = cfq_find_next_crq(cfqq->cfqd, cfqq, crq);
+}
+
+static int cfq_check_sort_rr_list(struct cfq_queue *cfqq)
+{
+ struct list_head *head = &cfqq->cfqd->rr_list;
+ struct list_head *next, *prev;
+
+ /*
+ * list might still be ordered
+ */
+ next = cfqq->cfq_list.next;
+ if (next != head) {
+ struct cfq_queue *cnext = list_entry_cfqq(next);
+
+ if (cfqq->service_used > cnext->service_used)
+ return 1;
+ }
+
+ prev = cfqq->cfq_list.prev;
+ if (prev != head) {
+ struct cfq_queue *cprev = list_entry_cfqq(prev);
+
+ if (cfqq->service_used < cprev->service_used)
+ return 1;
+ }
+
+ return 0;
+}
+
+static void cfq_sort_rr_list(struct cfq_queue *cfqq, int new_queue)
+{
+ struct list_head *entry = &cfqq->cfqd->rr_list;
+
+ if (!cfqq->on_rr)
+ return;
+ if (!new_queue && !cfq_check_sort_rr_list(cfqq))
+ return;
+
+ list_del(&cfqq->cfq_list);
+
+ /*
+ * sort by our mean service_used, sub-sort by in-flight requests
+ */
+ while ((entry = entry->prev) != &cfqq->cfqd->rr_list) {
+ struct cfq_queue *__cfqq = list_entry_cfqq(entry);
+
+ if (cfqq->service_used > __cfqq->service_used)
+ break;
+ else if (cfqq->service_used == __cfqq->service_used) {
+ struct list_head *prv;
+
+ while ((prv = entry->prev) != &cfqq->cfqd->rr_list) {
+ __cfqq = list_entry_cfqq(prv);
+
+ WARN_ON(__cfqq->service_used > cfqq->service_used);
+ if (cfqq->service_used != __cfqq->service_used)
+ break;
+ if (cfqq->in_flight > __cfqq->in_flight)
+ break;
+
+ entry = prv;
+ }
+ }
+ }
+
+ list_add(&cfqq->cfq_list, entry);
+}
+
+/*
+ * add to busy list of queues for service, trying to be fair in ordering
+ * the pending list according to requests serviced
+ */
+static inline void
+cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
+{
+ /*
+ * it's currently on the empty list
+ */
+ cfqq->on_rr = 1;
+ cfqd->busy_queues++;
+
+ if (time_after(jiffies, cfqq->service_start + cfq_service))
+ cfqq->service_used >>= 3;
+
+ cfq_sort_rr_list(cfqq, 1);
+}
+
+static inline void
+cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
+{
+ list_move(&cfqq->cfq_list, &cfqd->empty_list);
+ cfqq->on_rr = 0;
+
+ BUG_ON(!cfqd->busy_queues);
+ cfqd->busy_queues--;
+}
+
+/*
+ * rb tree support functions
+ */
+static inline void cfq_del_crq_rb(struct cfq_rq *crq)
+{
+ struct cfq_queue *cfqq = crq->cfq_queue;
+
if (ON_RB(&crq->rb_node)) {
- cfqq->queued[rq_data_dir(crq->request)]--;
+ struct cfq_data *cfqd = cfqq->cfqd;
+
+ BUG_ON(!cfqq->queued[crq->is_sync]);
+
+ cfq_update_next_crq(crq);
+
+ cfqq->queued[crq->is_sync]--;
rb_erase(&crq->rb_node, &cfqq->sort_list);
- crq->cfq_queue = NULL;
+ RB_CLEAR_COLOR(&crq->rb_node);
+
+ if (RB_EMPTY(&cfqq->sort_list) && cfqq->on_rr)
+ cfq_del_cfqq_rr(cfqd, cfqq);
}
}
static struct cfq_rq *
-__cfq_add_crq_rb(struct cfq_queue *cfqq, struct cfq_rq *crq)
+__cfq_add_crq_rb(struct cfq_rq *crq)
{
- struct rb_node **p = &cfqq->sort_list.rb_node;
+ struct rb_node **p = &crq->cfq_queue->sort_list.rb_node;
struct rb_node *parent = NULL;
struct cfq_rq *__crq;
@@ -199,30 +547,50 @@ __cfq_add_crq_rb(struct cfq_queue *cfqq, struct cfq_rq *crq)
return NULL;
}
-static void
-cfq_add_crq_rb(struct cfq_data *cfqd, struct cfq_queue *cfqq,struct cfq_rq *crq)
+static void cfq_add_crq_rb(struct cfq_rq *crq)
{
+ struct cfq_queue *cfqq = crq->cfq_queue;
+ struct cfq_data *cfqd = cfqq->cfqd;
struct request *rq = crq->request;
struct cfq_rq *__alias;
crq->rb_key = rq_rb_key(rq);
- cfqq->queued[rq_data_dir(rq)]++;
-retry:
- __alias = __cfq_add_crq_rb(cfqq, crq);
- if (!__alias) {
- rb_insert_color(&crq->rb_node, &cfqq->sort_list);
- crq->cfq_queue = cfqq;
- return;
+ cfqq->queued[crq->is_sync]++;
+
+ /*
+ * looks a little odd, but the first insert might return an alias.
+ * if that happens, put the alias on the dispatch list
+ */
+ while ((__alias = __cfq_add_crq_rb(crq)) != NULL)
+ cfq_dispatch_sort(cfqd->queue, __alias);
+
+ rb_insert_color(&crq->rb_node, &cfqq->sort_list);
+
+ if (!cfqq->on_rr)
+ cfq_add_cfqq_rr(cfqd, cfqq);
+
+ /*
+ * check if this request is a better next-serve candidate
+ */
+ cfqq->next_crq = cfq_choose_req(cfqd, cfqq->next_crq, crq);
+}
+
+static inline void
+cfq_reposition_crq_rb(struct cfq_queue *cfqq, struct cfq_rq *crq)
+{
+ if (ON_RB(&crq->rb_node)) {
+ rb_erase(&crq->rb_node, &cfqq->sort_list);
+ cfqq->queued[crq->is_sync]--;
}
- cfq_dispatch_sort(cfqd, cfqq, __alias);
- goto retry;
+ cfq_add_crq_rb(crq);
}
static struct request *
cfq_find_rq_rb(struct cfq_data *cfqd, sector_t sector)
{
- struct cfq_queue *cfqq = cfq_find_cfq_hash(cfqd, current->tgid);
+ const unsigned long key = cfq_hash_key(cfqd, current);
+ struct cfq_queue *cfqq = cfq_find_cfq_hash(cfqd, key);
struct rb_node *n;
if (!cfqq)
@@ -244,30 +612,44 @@ out:
return NULL;
}
-static void cfq_remove_request(request_queue_t *q, struct request *rq)
+/*
+ * make sure the service time gets corrected on reissue of this request
+ */
+static void cfq_requeue_request(request_queue_t *q, struct request *rq)
{
- struct cfq_data *cfqd = q->elevator.elevator_data;
struct cfq_rq *crq = RQ_DATA(rq);
if (crq) {
struct cfq_queue *cfqq = crq->cfq_queue;
+ if (cfqq->cfqd->cfq_tagged) {
+ cfqq->service_used--;
+ cfq_sort_rr_list(cfqq, 0);
+ }
+
+ crq->accounted = 0;
+ cfqq->cfqd->rq_in_driver--;
+ }
+ list_add(&rq->queuelist, &q->queue_head);
+}
+
+static void cfq_remove_request(request_queue_t *q, struct request *rq)
+{
+ struct cfq_rq *crq = RQ_DATA(rq);
+
+ if (crq) {
cfq_remove_merge_hints(q, crq);
list_del_init(&rq->queuelist);
- if (cfqq) {
- cfq_del_crq_rb(cfqq, crq);
-
- if (RB_EMPTY(&cfqq->sort_list))
- cfq_put_queue(cfqd, cfqq);
- }
+ if (crq->cfq_queue)
+ cfq_del_crq_rb(crq);
}
}
static int
cfq_merge(request_queue_t *q, struct request **req, struct bio *bio)
{
- struct cfq_data *cfqd = q->elevator.elevator_data;
+ struct cfq_data *cfqd = q->elevator->elevator_data;
struct request *__rq;
int ret;
@@ -305,7 +687,7 @@ out_insert:
static void cfq_merged_request(request_queue_t *q, struct request *req)
{
- struct cfq_data *cfqd = q->elevator.elevator_data;
+ struct cfq_data *cfqd = q->elevator->elevator_data;
struct cfq_rq *crq = RQ_DATA(req);
cfq_del_crq_hash(crq);
@@ -314,193 +696,546 @@ static void cfq_merged_request(request_queue_t *q, struct request *req)
if (ON_RB(&crq->rb_node) && (rq_rb_key(req) != crq->rb_key)) {
struct cfq_queue *cfqq = crq->cfq_queue;
- cfq_del_crq_rb(cfqq, crq);
- cfq_add_crq_rb(cfqd, cfqq, crq);
+ cfq_update_next_crq(crq);
+ cfq_reposition_crq_rb(cfqq, crq);
}
q->last_merge = req;
}
static void
-cfq_merged_requests(request_queue_t *q, struct request *req,
+cfq_merged_requests(request_queue_t *q, struct request *rq,
struct request *next)
{
- cfq_merged_request(q, req);
+ struct cfq_rq *crq = RQ_DATA(rq);
+ struct cfq_rq *cnext = RQ_DATA(next);
+
+ cfq_merged_request(q, rq);
+
+ if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist)) {
+ if (time_before(cnext->queue_start, crq->queue_start)) {
+ list_move(&rq->queuelist, &next->queuelist);
+ crq->queue_start = cnext->queue_start;
+ }
+ }
+
+ cfq_update_next_crq(cnext);
cfq_remove_request(q, next);
}
-static void
-cfq_dispatch_sort(struct cfq_data *cfqd, struct cfq_queue *cfqq,
- struct cfq_rq *crq)
+/*
+ * we dispatch cfqd->cfq_quantum requests in total from the rr_list queues,
+ * this function sector sorts the selected request to minimize seeks. we start
+ * at cfqd->last_sector, not 0.
+ */
+static void cfq_dispatch_sort(request_queue_t *q, struct cfq_rq *crq)
{
- struct list_head *head = cfqd->dispatch, *entry = head;
+ struct cfq_data *cfqd = q->elevator->elevator_data;
+ struct cfq_queue *cfqq = crq->cfq_queue;
+ struct list_head *head = &q->queue_head, *entry = head;
struct request *__rq;
+ sector_t last;
- cfq_del_crq_rb(cfqq, crq);
- cfq_remove_merge_hints(cfqd->queue, crq);
+ cfq_del_crq_rb(crq);
+ cfq_remove_merge_hints(q, crq);
+ list_del(&crq->request->queuelist);
- if (!list_empty(head)) {
- __rq = list_entry_rq(head->next);
+ last = cfqd->last_sector;
+ while ((entry = entry->prev) != head) {
+ __rq = list_entry_rq(entry);
- if (crq->request->sector < __rq->sector) {
- entry = head->prev;
- goto link;
+ if (blk_barrier_rq(crq->request))
+ break;
+ if (!blk_fs_request(crq->request))
+ break;
+
+ if (crq->request->sector > __rq->sector)
+ break;
+ if (__rq->sector > last && crq->request->sector < last) {
+ last = crq->request->sector;
+ break;
}
}
- while ((entry = entry->prev) != head) {
- __rq = list_entry_rq(entry);
+ cfqd->last_sector = last;
+ crq->in_flight = 1;
+ cfqq->in_flight++;
+ list_add(&crq->request->queuelist, entry);
+}
- if (crq->request->sector <= __rq->sector)
- break;
+/*
+ * return expired entry, or NULL to just start from scratch in rbtree
+ */
+static inline struct cfq_rq *cfq_check_fifo(struct cfq_queue *cfqq)
+{
+ struct cfq_data *cfqd = cfqq->cfqd;
+ const int reads = !list_empty(&cfqq->fifo[0]);
+ const int writes = !list_empty(&cfqq->fifo[1]);
+ unsigned long now = jiffies;
+ struct cfq_rq *crq;
+
+ if (time_before(now, cfqq->last_fifo_expire + cfqd->cfq_fifo_batch_expire))
+ return NULL;
+
+ crq = RQ_DATA(list_entry(cfqq->fifo[0].next, struct request, queuelist));
+ if (reads && time_after(now, crq->queue_start + cfqd->cfq_fifo_expire_r)) {
+ cfqq->last_fifo_expire = now;
+ return crq;
+ }
+
+ crq = RQ_DATA(list_entry(cfqq->fifo[1].next, struct request, queuelist));
+ if (writes && time_after(now, crq->queue_start + cfqd->cfq_fifo_expire_w)) {
+ cfqq->last_fifo_expire = now;
+ return crq;
}
-link:
- list_add_tail(&crq->request->queuelist, entry);
+ return NULL;
}
+/*
+ * dispatch a single request from given queue
+ */
static inline void
-__cfq_dispatch_requests(request_queue_t *q, struct cfq_data *cfqd,
- struct cfq_queue *cfqq)
+cfq_dispatch_request(request_queue_t *q, struct cfq_data *cfqd,
+ struct cfq_queue *cfqq)
{
- struct cfq_rq *crq = rb_entry_crq(rb_first(&cfqq->sort_list));
+ struct cfq_rq *crq;
+
+ /*
+ * follow expired path, else get first next available
+ */
+ if ((crq = cfq_check_fifo(cfqq)) == NULL) {
+ if (cfqd->find_best_crq)
+ crq = cfqq->next_crq;
+ else
+ crq = rb_entry_crq(rb_first(&cfqq->sort_list));
+ }
+
+ cfqd->last_sector = crq->request->sector + crq->request->nr_sectors;
- cfq_dispatch_sort(cfqd, cfqq, crq);
+ /*
+ * finally, insert request into driver list
+ */
+ cfq_dispatch_sort(q, crq);
}
-static int cfq_dispatch_requests(request_queue_t *q, struct cfq_data *cfqd)
+static int cfq_dispatch_requests(request_queue_t *q, int max_dispatch)
{
+ struct cfq_data *cfqd = q->elevator->elevator_data;
struct cfq_queue *cfqq;
struct list_head *entry, *tmp;
- int ret, queued, good_queues;
+ int queued, busy_queues, first_round;
if (list_empty(&cfqd->rr_list))
return 0;
- queued = ret = 0;
+ queued = 0;
+ first_round = 1;
restart:
- good_queues = 0;
+ busy_queues = 0;
list_for_each_safe(entry, tmp, &cfqd->rr_list) {
- cfqq = list_entry_cfqq(cfqd->rr_list.next);
+ cfqq = list_entry_cfqq(entry);
BUG_ON(RB_EMPTY(&cfqq->sort_list));
- __cfq_dispatch_requests(q, cfqd, cfqq);
+ /*
+ * first round of queueing, only select from queues that
+ * don't already have io in-flight
+ */
+ if (first_round && cfqq->in_flight)
+ continue;
- if (RB_EMPTY(&cfqq->sort_list))
- cfq_put_queue(cfqd, cfqq);
- else
- good_queues++;
+ cfq_dispatch_request(q, cfqd, cfqq);
+
+ if (!RB_EMPTY(&cfqq->sort_list))
+ busy_queues++;
queued++;
- ret = 1;
}
- if ((queued < cfqd->cfq_quantum) && good_queues)
+ if ((queued < max_dispatch) && (busy_queues || first_round)) {
+ first_round = 0;
goto restart;
+ }
- return ret;
+ return queued;
+}
+
+static inline void cfq_account_dispatch(struct cfq_rq *crq)
+{
+ struct cfq_queue *cfqq = crq->cfq_queue;
+ struct cfq_data *cfqd = cfqq->cfqd;
+ unsigned long now, elapsed;
+
+ /*
+ * accounted bit is necessary since some drivers will call
+ * elv_next_request() many times for the same request (eg ide)
+ */
+ if (crq->accounted)
+ return;
+
+ now = jiffies;
+ if (cfqq->service_start == ~0UL)
+ cfqq->service_start = now;
+
+ /*
+ * on drives with tagged command queueing, command turn-around time
+ * doesn't necessarily reflect the time spent processing this very
+ * command inside the drive. so do the accounting differently there,
+ * by just sorting on the number of requests
+ */
+ if (cfqd->cfq_tagged) {
+ if (time_after(now, cfqq->service_start + cfq_service)) {
+ cfqq->service_start = now;
+ cfqq->service_used /= 10;
+ }
+
+ cfqq->service_used++;
+ cfq_sort_rr_list(cfqq, 0);
+ }
+
+ elapsed = now - crq->queue_start;
+ if (elapsed > max_elapsed_dispatch)
+ max_elapsed_dispatch = elapsed;
+
+ crq->accounted = 1;
+ crq->service_start = now;
+
+ if (++cfqd->rq_in_driver >= CFQ_MAX_TAG && !cfqd->cfq_tagged) {
+ cfqq->cfqd->cfq_tagged = 1;
+ printk("cfq: depth %d reached, tagging now on\n", CFQ_MAX_TAG);
+ }
+}
+
+static inline void
+cfq_account_completion(struct cfq_queue *cfqq, struct cfq_rq *crq)
+{
+ struct cfq_data *cfqd = cfqq->cfqd;
+
+ WARN_ON(!cfqd->rq_in_driver);
+ cfqd->rq_in_driver--;
+
+ if (!cfqd->cfq_tagged) {
+ unsigned long now = jiffies;
+ unsigned long duration = now - crq->service_start;
+
+ if (time_after(now, cfqq->service_start + cfq_service)) {
+ cfqq->service_start = now;
+ cfqq->service_used >>= 3;
+ }
+
+ cfqq->service_used += duration;
+ cfq_sort_rr_list(cfqq, 0);
+
+ if (duration > max_elapsed_crq)
+ max_elapsed_crq = duration;
+ }
}
static struct request *cfq_next_request(request_queue_t *q)
{
- struct cfq_data *cfqd = q->elevator.elevator_data;
+ struct cfq_data *cfqd = q->elevator->elevator_data;
struct request *rq;
- if (!list_empty(cfqd->dispatch)) {
+ if (!list_empty(&q->queue_head)) {
struct cfq_rq *crq;
dispatch:
- rq = list_entry_rq(cfqd->dispatch->next);
+ rq = list_entry_rq(q->queue_head.next);
- crq = RQ_DATA(rq);
- if (crq)
+ if ((crq = RQ_DATA(rq)) != NULL) {
cfq_remove_merge_hints(q, crq);
+ cfq_account_dispatch(crq);
+ }
return rq;
}
- if (cfq_dispatch_requests(q, cfqd))
+ if (cfq_dispatch_requests(q, cfqd->cfq_quantum))
goto dispatch;
return NULL;
}
+/*
+ * task holds one reference to the queue, dropped when task exits. each crq
+ * in-flight on this queue also holds a reference, dropped when crq is freed.
+ *
+ * queue lock must be held here.
+ */
+static void cfq_put_queue(struct cfq_queue *cfqq)
+{
+ BUG_ON(!atomic_read(&cfqq->ref));
+
+ if (!atomic_dec_and_test(&cfqq->ref))
+ return;
+
+ BUG_ON(rb_first(&cfqq->sort_list));
+ BUG_ON(cfqq->on_rr);
+
+ cfq_put_cfqd(cfqq->cfqd);
+
+ /*
+ * it's on the empty list and still hashed
+ */
+ list_del(&cfqq->cfq_list);
+ hlist_del(&cfqq->cfq_hash);
+ kmem_cache_free(cfq_pool, cfqq);
+}
+
static inline struct cfq_queue *
-__cfq_find_cfq_hash(struct cfq_data *cfqd, int pid, const int hashval)
+__cfq_find_cfq_hash(struct cfq_data *cfqd, unsigned long key, const int hashval)
{
- struct list_head *hash_list = &cfqd->cfq_hash[hashval];
- struct list_head *entry;
+ struct hlist_head *hash_list = &cfqd->cfq_hash[hashval];
+ struct hlist_node *entry, *next;
- list_for_each(entry, hash_list) {
+ hlist_for_each_safe(entry, next, hash_list) {
struct cfq_queue *__cfqq = list_entry_qhash(entry);
- if (__cfqq->pid == pid)
+ if (__cfqq->key == key)
return __cfqq;
}
return NULL;
}
-static struct cfq_queue *cfq_find_cfq_hash(struct cfq_data *cfqd, int pid)
+static struct cfq_queue *
+cfq_find_cfq_hash(struct cfq_data *cfqd, unsigned long key)
{
- const int hashval = hash_long(current->tgid, CFQ_QHASH_SHIFT);
+ return __cfq_find_cfq_hash(cfqd, key, hash_long(key, CFQ_QHASH_SHIFT));
+}
+
+static inline void
+cfq_rehash_cfqq(struct cfq_data *cfqd, struct cfq_queue **cfqq,
+ struct cfq_io_context *cic)
+{
+ unsigned long hashkey = cfq_hash_key(cfqd, current);
+ unsigned long hashval = hash_long(hashkey, CFQ_QHASH_SHIFT);
+ struct cfq_queue *__cfqq;
+ unsigned long flags;
+
+ spin_lock_irqsave(cfqd->queue->queue_lock, flags);
- return __cfq_find_cfq_hash(cfqd, pid, hashval);
+ hlist_del(&(*cfqq)->cfq_hash);
+
+ __cfqq = __cfq_find_cfq_hash(cfqd, hashkey, hashval);
+ if (!__cfqq || __cfqq == *cfqq) {
+ __cfqq = *cfqq;
+ hlist_add_head(&__cfqq->cfq_hash, &cfqd->cfq_hash[hashval]);
+ __cfqq->key_type = cfqd->key_type;
+ } else {
+ atomic_inc(&__cfqq->ref);
+ cic->cfqq = __cfqq;
+ cfq_put_queue(*cfqq);
+ *cfqq = __cfqq;
+ }
+
+ cic->cfqq = __cfqq;
+ spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
}
-static void cfq_put_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq)
+static void cfq_free_io_context(struct cfq_io_context *cic)
{
- cfqd->busy_queues--;
- list_del(&cfqq->cfq_list);
- list_del(&cfqq->cfq_hash);
- mempool_free(cfqq, cfq_mpool);
+ kmem_cache_free(cfq_ioc_pool, cic);
}
-static struct cfq_queue *__cfq_get_queue(struct cfq_data *cfqd, int pid,
- int gfp_mask)
+/*
+ * locking hierarchy is: io_context lock -> queue locks
+ */
+static void cfq_exit_io_context(struct cfq_io_context *cic)
{
- const int hashval = hash_long(current->tgid, CFQ_QHASH_SHIFT);
+ struct cfq_queue *cfqq = cic->cfqq;
+ struct list_head *entry = &cic->list;
+ request_queue_t *q;
+ unsigned long flags;
+
+ /*
+ * put the reference this task is holding to the various queues
+ */
+ spin_lock_irqsave(&cic->ioc->lock, flags);
+ while ((entry = cic->list.next) != &cic->list) {
+ struct cfq_io_context *__cic;
+
+ __cic = list_entry(entry, struct cfq_io_context, list);
+ list_del(entry);
+
+ q = __cic->cfqq->cfqd->queue;
+ spin_lock(q->queue_lock);
+ cfq_put_queue(__cic->cfqq);
+ spin_unlock(q->queue_lock);
+ }
+
+ q = cfqq->cfqd->queue;
+ spin_lock(q->queue_lock);
+ cfq_put_queue(cfqq);
+ spin_unlock(q->queue_lock);
+
+ cic->cfqq = NULL;
+ spin_unlock_irqrestore(&cic->ioc->lock, flags);
+}
+
+static struct cfq_io_context *cfq_alloc_io_context(int gfp_flags)
+{
+ struct cfq_io_context *cic = kmem_cache_alloc(cfq_ioc_pool, gfp_flags);
+
+ if (cic) {
+ cic->dtor = cfq_free_io_context;
+ cic->exit = cfq_exit_io_context;
+ INIT_LIST_HEAD(&cic->list);
+ cic->cfqq = NULL;
+ }
+
+ return cic;
+}
+
+/*
+ * Setup general io context and cfq io context. There can be several cfq
+ * io contexts per general io context, if this process is doing io to more
+ * than one device managed by cfq. Note that caller is holding a reference to
+ * cfqq, so we don't need to worry about it disappearing
+ */
+static struct cfq_io_context *
+cfq_get_io_context(struct cfq_queue **cfqq, int gfp_flags)
+{
+ struct cfq_data *cfqd = (*cfqq)->cfqd;
+ struct cfq_queue *__cfqq = *cfqq;
+ struct cfq_io_context *cic;
+ struct io_context *ioc;
+
+ might_sleep_if(gfp_flags & __GFP_WAIT);
+
+ ioc = get_io_context(gfp_flags);
+ if (!ioc)
+ return NULL;
+
+ if ((cic = ioc->cic) == NULL) {
+ cic = cfq_alloc_io_context(gfp_flags);
+
+ if (cic == NULL)
+ goto err;
+
+ ioc->cic = cic;
+ cic->ioc = ioc;
+ cic->cfqq = __cfqq;
+ atomic_inc(&__cfqq->ref);
+ } else {
+ struct cfq_io_context *__cic;
+ unsigned long flags;
+
+ /*
+ * since the first cic on the list is actually the head
+ * itself, need to check this here or we'll duplicate an
+ * cic per ioc for no reason
+ */
+ if (cic->cfqq == __cfqq)
+ goto out;
+
+ /*
+ * cic exists, check if we already are there. linear search
+ * should be ok here, the list will usually not be more than
+ * 1 or a few entries long
+ */
+ spin_lock_irqsave(&ioc->lock, flags);
+ list_for_each_entry(__cic, &cic->list, list) {
+ /*
+ * this process is already holding a reference to
+ * this queue, so no need to get one more
+ */
+ if (__cic->cfqq == __cfqq) {
+ cic = __cic;
+ spin_unlock_irqrestore(&ioc->lock, flags);
+ goto out;
+ }
+ }
+ spin_unlock_irqrestore(&ioc->lock, flags);
+
+ /*
+ * nope, process doesn't have a cic assoicated with this
+ * cfqq yet. get a new one and add to list
+ */
+ __cic = cfq_alloc_io_context(gfp_flags);
+ if (__cic == NULL)
+ goto err;
+
+ __cic->ioc = ioc;
+ __cic->cfqq = __cfqq;
+ atomic_inc(&__cfqq->ref);
+ spin_lock_irqsave(&ioc->lock, flags);
+ list_add(&__cic->list, &cic->list);
+ spin_unlock_irqrestore(&ioc->lock, flags);
+
+ cic = __cic;
+ *cfqq = __cfqq;
+ }
+
+out:
+ /*
+ * if key_type has been changed on the fly, we lazily rehash
+ * each queue at lookup time
+ */
+ if ((*cfqq)->key_type != cfqd->key_type)
+ cfq_rehash_cfqq(cfqd, cfqq, cic);
+
+ return cic;
+err:
+ put_io_context(ioc);
+ return NULL;
+}
+
+static struct cfq_queue *
+__cfq_get_queue(struct cfq_data *cfqd, unsigned long key, int gfp_mask)
+{
+ const int hashval = hash_long(key, CFQ_QHASH_SHIFT);
struct cfq_queue *cfqq, *new_cfqq = NULL;
- request_queue_t *q = cfqd->queue;
retry:
- cfqq = __cfq_find_cfq_hash(cfqd, pid, hashval);
+ cfqq = __cfq_find_cfq_hash(cfqd, key, hashval);
if (!cfqq) {
if (new_cfqq) {
cfqq = new_cfqq;
new_cfqq = NULL;
} else if (gfp_mask & __GFP_WAIT) {
- spin_unlock_irq(q->queue_lock);
- new_cfqq = mempool_alloc(cfq_mpool, gfp_mask);
- spin_lock_irq(q->queue_lock);
+ spin_unlock_irq(cfqd->queue->queue_lock);
+ new_cfqq = kmem_cache_alloc(cfq_pool, gfp_mask);
+ spin_lock_irq(cfqd->queue->queue_lock);
goto retry;
} else
- return NULL;
+ goto out;
+
+ memset(cfqq, 0, sizeof(*cfqq));
- INIT_LIST_HEAD(&cfqq->cfq_hash);
+ INIT_HLIST_NODE(&cfqq->cfq_hash);
INIT_LIST_HEAD(&cfqq->cfq_list);
RB_CLEAR_ROOT(&cfqq->sort_list);
-
- cfqq->pid = pid;
- cfqq->queued[0] = cfqq->queued[1] = 0;
- list_add(&cfqq->cfq_hash, &cfqd->cfq_hash[hashval]);
+ INIT_LIST_HEAD(&cfqq->fifo[0]);
+ INIT_LIST_HEAD(&cfqq->fifo[1]);
+
+ cfqq->key = key;
+ hlist_add_head(&cfqq->cfq_hash, &cfqd->cfq_hash[hashval]);
+ atomic_set(&cfqq->ref, 0);
+ cfqq->cfqd = cfqd;
+ atomic_inc(&cfqd->ref);
+ cfqq->key_type = cfqd->key_type;
+ cfqq->service_start = ~0UL;
}
if (new_cfqq)
- mempool_free(new_cfqq, cfq_mpool);
+ kmem_cache_free(cfq_pool, new_cfqq);
+ atomic_inc(&cfqq->ref);
+out:
+ WARN_ON((gfp_mask & __GFP_WAIT) && !cfqq);
return cfqq;
}
-static struct cfq_queue *cfq_get_queue(struct cfq_data *cfqd, int pid,
- int gfp_mask)
+static struct cfq_queue *
+cfq_get_queue(struct cfq_data *cfqd, unsigned long key, int gfp_mask)
{
request_queue_t *q = cfqd->queue;
struct cfq_queue *cfqq;
spin_lock_irq(q->queue_lock);
- cfqq = __cfq_get_queue(cfqd, pid, gfp_mask);
+ cfqq = __cfq_get_queue(cfqd, key, gfp_mask);
spin_unlock_irq(q->queue_lock);
return cfqq;
@@ -508,40 +1243,30 @@ static struct cfq_queue *cfq_get_queue(struct cfq_data *cfqd, int pid,
static void cfq_enqueue(struct cfq_data *cfqd, struct cfq_rq *crq)
{
- struct cfq_queue *cfqq;
+ crq->is_sync = 0;
+ if (rq_data_dir(crq->request) == READ || current->flags & PF_SYNCWRITE)
+ crq->is_sync = 1;
- cfqq = __cfq_get_queue(cfqd, current->tgid, GFP_ATOMIC);
- if (cfqq) {
- cfq_add_crq_rb(cfqd, cfqq, crq);
+ cfq_add_crq_rb(crq);
+ crq->queue_start = jiffies;
- if (list_empty(&cfqq->cfq_list)) {
- list_add(&cfqq->cfq_list, &cfqd->rr_list);
- cfqd->busy_queues++;
- }
- } else {
- /*
- * should can only happen if the request wasn't allocated
- * through blk_alloc_request(), eg stack requests from ide-cd
- * (those should be removed) _and_ we are in OOM.
- */
- list_add_tail(&crq->request->queuelist, cfqd->dispatch);
- }
+ list_add_tail(&crq->request->queuelist, &crq->cfq_queue->fifo[crq->is_sync]);
}
static void
cfq_insert_request(request_queue_t *q, struct request *rq, int where)
{
- struct cfq_data *cfqd = q->elevator.elevator_data;
+ struct cfq_data *cfqd = q->elevator->elevator_data;
struct cfq_rq *crq = RQ_DATA(rq);
switch (where) {
case ELEVATOR_INSERT_BACK:
- while (cfq_dispatch_requests(q, cfqd))
+ while (cfq_dispatch_requests(q, cfqd->cfq_quantum))
;
- list_add_tail(&rq->queuelist, cfqd->dispatch);
+ list_add_tail(&rq->queuelist, &q->queue_head);
break;
case ELEVATOR_INSERT_FRONT:
- list_add(&rq->queuelist, cfqd->dispatch);
+ list_add(&rq->queuelist, &q->queue_head);
break;
case ELEVATOR_INSERT_SORT:
BUG_ON(!blk_fs_request(rq));
@@ -562,12 +1287,27 @@ cfq_insert_request(request_queue_t *q, struct request *rq, int where)
static int cfq_queue_empty(request_queue_t *q)
{
- struct cfq_data *cfqd = q->elevator.elevator_data;
+ struct cfq_data *cfqd = q->elevator->elevator_data;
- if (list_empty(cfqd->dispatch) && list_empty(&cfqd->rr_list))
- return 1;
+ return list_empty(&q->queue_head) && list_empty(&cfqd->rr_list);
+}
+
+static void cfq_completed_request(request_queue_t *q, struct request *rq)
+{
+ struct cfq_rq *crq = RQ_DATA(rq);
+
+ if (unlikely(!blk_fs_request(rq)))
+ return;
+
+ if (crq->in_flight) {
+ struct cfq_queue *cfqq = crq->cfq_queue;
+
+ WARN_ON(!cfqq->in_flight);
+ cfqq->in_flight--;
+
+ cfq_account_completion(cfqq, crq);
+ }
- return 0;
}
static struct request *
@@ -596,92 +1336,169 @@ cfq_latter_request(request_queue_t *q, struct request *rq)
static int cfq_may_queue(request_queue_t *q, int rw)
{
- struct cfq_data *cfqd = q->elevator.elevator_data;
+ struct cfq_data *cfqd = q->elevator->elevator_data;
struct cfq_queue *cfqq;
- int ret = 1;
+ int ret = ELV_MQUEUE_MAY;
- if (!cfqd->busy_queues)
- goto out;
+ if (current->flags & PF_MEMALLOC)
+ return ELV_MQUEUE_MAY;
- cfqq = cfq_find_cfq_hash(cfqd, current->tgid);
+ cfqq = cfq_find_cfq_hash(cfqd, cfq_hash_key(cfqd, current));
if (cfqq) {
- int limit = (q->nr_requests - cfqd->cfq_queued) / cfqd->busy_queues;
+ int limit = cfqd->max_queued;
+
+ if (cfqq->allocated[rw] < cfqd->cfq_queued)
+ return ELV_MQUEUE_MUST;
- if (limit < 3)
- limit = 3;
+ if (cfqd->busy_queues)
+ limit = q->nr_requests / cfqd->busy_queues;
+
+ if (limit < cfqd->cfq_queued)
+ limit = cfqd->cfq_queued;
else if (limit > cfqd->max_queued)
limit = cfqd->max_queued;
- if (cfqq->queued[rw] > limit)
- ret = 0;
+ if (cfqq->allocated[rw] >= limit) {
+ if (limit > cfqq->alloc_limit[rw])
+ cfqq->alloc_limit[rw] = limit;
+
+ ret = ELV_MQUEUE_NO;
+ }
}
-out:
+
return ret;
}
+static void cfq_check_waiters(request_queue_t *q, struct cfq_queue *cfqq)
+{
+ struct request_list *rl = &q->rq;
+ const int write = waitqueue_active(&rl->wait[WRITE]);
+ const int read = waitqueue_active(&rl->wait[READ]);
+
+ if (read && cfqq->allocated[READ] < cfqq->alloc_limit[READ])
+ wake_up(&rl->wait[READ]);
+ if (write && cfqq->allocated[WRITE] < cfqq->alloc_limit[WRITE])
+ wake_up(&rl->wait[WRITE]);
+}
+
+/*
+ * queue lock held here
+ */
static void cfq_put_request(request_queue_t *q, struct request *rq)
{
- struct cfq_data *cfqd = q->elevator.elevator_data;
+ struct cfq_data *cfqd = q->elevator->elevator_data;
struct cfq_rq *crq = RQ_DATA(rq);
- struct request_list *rl;
- int other_rw;
if (crq) {
+ struct cfq_queue *cfqq = crq->cfq_queue;
+
BUG_ON(q->last_merge == rq);
- BUG_ON(ON_MHASH(crq));
+ BUG_ON(!hlist_unhashed(&crq->hash));
+
+ if (crq->io_context)
+ put_io_context(crq->io_context->ioc);
+
+ if (!cfqq->allocated[crq->is_write]) {
+ WARN_ON(1);
+ cfqq->allocated[crq->is_write] = 1;
+ }
+ cfqq->allocated[crq->is_write]--;
mempool_free(crq, cfqd->crq_pool);
rq->elevator_private = NULL;
- }
- /*
- * work-around for may_queue "bug": if a read gets issued and refused
- * to queue because writes ate all the allowed slots and no other
- * reads are pending for this queue, it could get stuck infinitely
- * since freed_request() only checks the waitqueue for writes when
- * freeing them. or vice versa for a single write vs many reads.
- * so check here whether "the other" data direction might be able
- * to queue and wake them
- */
- rl = &q->rq;
- other_rw = rq_data_dir(rq) ^ 1;
- if (rl->count[other_rw] <= q->nr_requests) {
smp_mb();
- if (waitqueue_active(&rl->wait[other_rw]))
- wake_up(&rl->wait[other_rw]);
+ cfq_check_waiters(q, cfqq);
+ cfq_put_queue(cfqq);
}
}
+/*
+ * Allocate cfq data structures associated with this request. A queue and
+ */
static int cfq_set_request(request_queue_t *q, struct request *rq, int gfp_mask)
{
- struct cfq_data *cfqd = q->elevator.elevator_data;
+ struct cfq_data *cfqd = q->elevator->elevator_data;
+ struct cfq_io_context *cic;
+ const int rw = rq_data_dir(rq);
struct cfq_queue *cfqq;
struct cfq_rq *crq;
+ unsigned long flags;
+
+ might_sleep_if(gfp_mask & __GFP_WAIT);
+
+ spin_lock_irqsave(q->queue_lock, flags);
+
+ cfqq = __cfq_get_queue(cfqd, cfq_hash_key(cfqd, current), gfp_mask);
+ if (!cfqq) {
+#if 0
+ cfqq = cfq_get_queue(cfqd, CFQ_KEY_SPARE, gfp_mask);
+ printk("%s: got spare queue\n", current->comm);
+#else
+ goto out_lock;
+#endif
+ }
+
+ if (cfqq->allocated[rw] >= cfqd->max_queued)
+ goto out_lock;
+
+ spin_unlock_irqrestore(q->queue_lock, flags);
/*
- * prepare a queue up front, so cfq_enqueue() doesn't have to
+ * if hashing type has changed, the cfq_queue might change here. we
+ * don't bother rechecking ->allocated since it should be a rare
+ * event
*/
- cfqq = cfq_get_queue(cfqd, current->tgid, gfp_mask);
- if (!cfqq)
- return 1;
+ cic = cfq_get_io_context(&cfqq, gfp_mask);
+ if (!cic)
+ goto err;
crq = mempool_alloc(cfqd->crq_pool, gfp_mask);
if (crq) {
- memset(crq, 0, sizeof(*crq));
RB_CLEAR(&crq->rb_node);
+ crq->rb_key = 0;
crq->request = rq;
- crq->cfq_queue = NULL;
- INIT_LIST_HEAD(&crq->hash);
+ INIT_HLIST_NODE(&crq->hash);
+ crq->cfq_queue = cfqq;
+ crq->io_context = cic;
+ crq->service_start = crq->queue_start = 0;
+ crq->in_flight = crq->accounted = crq->is_sync = 0;
+ crq->is_write = rw;
rq->elevator_private = crq;
+ cfqq->allocated[rw]++;
+ cfqq->alloc_limit[rw] = 0;
return 0;
}
+ put_io_context(cic->ioc);
+err:
+ spin_lock_irqsave(q->queue_lock, flags);
+ cfq_put_queue(cfqq);
+out_lock:
+ spin_unlock_irqrestore(q->queue_lock, flags);
return 1;
}
-static void cfq_exit(request_queue_t *q, elevator_t *e)
+static void cfq_put_cfqd(struct cfq_data *cfqd)
{
- struct cfq_data *cfqd = e->elevator_data;
+ request_queue_t *q = cfqd->queue;
+ elevator_t *e = q->elevator;
+ struct cfq_queue *cfqq;
+
+ if (!atomic_dec_and_test(&cfqd->ref))
+ return;
+
+ /*
+ * kill spare queue, getting it means we have two refences to it.
+ * drop both
+ */
+ spin_lock_irq(q->queue_lock);
+ cfqq = __cfq_get_queue(cfqd, CFQ_KEY_SPARE, GFP_ATOMIC);
+ cfq_put_queue(cfqq);
+ cfq_put_queue(cfqq);
+ spin_unlock_irq(q->queue_lock);
+
+ blk_put_queue(q);
e->elevator_data = NULL;
mempool_destroy(cfqd->crq_pool);
@@ -690,9 +1507,15 @@ static void cfq_exit(request_queue_t *q, elevator_t *e)
kfree(cfqd);
}
-static int cfq_init(request_queue_t *q, elevator_t *e)
+static void cfq_exit_queue(elevator_t *e)
+{
+ cfq_put_cfqd(e->elevator_data);
+}
+
+static int cfq_init_queue(request_queue_t *q, elevator_t *e)
{
struct cfq_data *cfqd;
+ struct cfq_queue *cfqq;
int i;
cfqd = kmalloc(sizeof(*cfqd), GFP_KERNEL);
@@ -701,12 +1524,13 @@ static int cfq_init(request_queue_t *q, elevator_t *e)
memset(cfqd, 0, sizeof(*cfqd));
INIT_LIST_HEAD(&cfqd->rr_list);
+ INIT_LIST_HEAD(&cfqd->empty_list);
- cfqd->crq_hash = kmalloc(sizeof(struct list_head) * CFQ_MHASH_ENTRIES, GFP_KERNEL);
+ cfqd->crq_hash = kmalloc(sizeof(struct hlist_head) * CFQ_MHASH_ENTRIES, GFP_KERNEL);
if (!cfqd->crq_hash)
goto out_crqhash;
- cfqd->cfq_hash = kmalloc(sizeof(struct list_head) * CFQ_QHASH_ENTRIES, GFP_KERNEL);
+ cfqd->cfq_hash = kmalloc(sizeof(struct hlist_head) * CFQ_QHASH_ENTRIES, GFP_KERNEL);
if (!cfqd->cfq_hash)
goto out_cfqhash;
@@ -715,25 +1539,44 @@ static int cfq_init(request_queue_t *q, elevator_t *e)
goto out_crqpool;
for (i = 0; i < CFQ_MHASH_ENTRIES; i++)
- INIT_LIST_HEAD(&cfqd->crq_hash[i]);
+ INIT_HLIST_HEAD(&cfqd->crq_hash[i]);
for (i = 0; i < CFQ_QHASH_ENTRIES; i++)
- INIT_LIST_HEAD(&cfqd->cfq_hash[i]);
+ INIT_HLIST_HEAD(&cfqd->cfq_hash[i]);
- cfqd->dispatch = &q->queue_head;
e->elevator_data = cfqd;
+
cfqd->queue = q;
+ atomic_inc(&q->refcnt);
+
+ /*
+ * setup spare failure queue
+ */
+ cfqq = cfq_get_queue(cfqd, CFQ_KEY_SPARE, GFP_KERNEL);
+ if (!cfqq)
+ goto out_spare;
/*
* just set it to some high value, we want anyone to be able to queue
* some requests. fairness is handled differently
*/
- cfqd->max_queued = q->nr_requests;
- q->nr_requests = 8192;
+ q->nr_requests = 1024;
+ cfqd->max_queued = q->nr_requests / 16;
+ q->nr_batching = cfq_queued;
+ cfqd->key_type = CFQ_KEY_TGID;
+ cfqd->find_best_crq = 1;
+ atomic_set(&cfqd->ref, 1);
cfqd->cfq_queued = cfq_queued;
cfqd->cfq_quantum = cfq_quantum;
+ cfqd->cfq_fifo_expire_r = cfq_fifo_expire_r;
+ cfqd->cfq_fifo_expire_w = cfq_fifo_expire_w;
+ cfqd->cfq_fifo_batch_expire = cfq_fifo_rate;
+ cfqd->cfq_back_max = cfq_back_max;
+ cfqd->cfq_back_penalty = cfq_back_penalty;
return 0;
+out_spare:
+ mempool_destroy(cfqd->crq_pool);
out_crqpool:
kfree(cfqd->cfq_hash);
out_cfqhash:
@@ -743,29 +1586,39 @@ out_crqhash:
return -ENOMEM;
}
+static void cfq_slab_kill(void)
+{
+ if (crq_pool)
+ kmem_cache_destroy(crq_pool);
+ if (cfq_pool)
+ kmem_cache_destroy(cfq_pool);
+ if (cfq_ioc_pool)
+ kmem_cache_destroy(cfq_ioc_pool);
+}
+
static int __init cfq_slab_setup(void)
{
crq_pool = kmem_cache_create("crq_pool", sizeof(struct cfq_rq), 0, 0,
NULL, NULL);
-
if (!crq_pool)
- panic("cfq_iosched: can't init crq pool\n");
+ goto fail;
cfq_pool = kmem_cache_create("cfq_pool", sizeof(struct cfq_queue), 0, 0,
NULL, NULL);
-
if (!cfq_pool)
- panic("cfq_iosched: can't init cfq pool\n");
+ goto fail;
- cfq_mpool = mempool_create(64, mempool_alloc_slab, mempool_free_slab, cfq_pool);
-
- if (!cfq_mpool)
- panic("cfq_iosched: can't init cfq mpool\n");
+ cfq_ioc_pool = kmem_cache_create("cfq_ioc_pool",
+ sizeof(struct cfq_io_context), 0, 0, NULL, NULL);
+ if (!cfq_ioc_pool)
+ goto fail;
return 0;
+fail:
+ cfq_slab_kill();
+ return -ENOMEM;
}
-subsys_initcall(cfq_slab_setup);
/*
* sysfs parts below -->
@@ -791,27 +1644,135 @@ cfq_var_store(unsigned int *var, const char *page, size_t count)
return count;
}
-#define SHOW_FUNCTION(__FUNC, __VAR) \
+static ssize_t
+cfq_clear_elapsed(struct cfq_data *cfqd, const char *page, size_t count)
+{
+ max_elapsed_dispatch = max_elapsed_crq = 0;
+ return count;
+}
+
+static ssize_t
+cfq_set_key_type(struct cfq_data *cfqd, const char *page, size_t count)
+{
+ spin_lock_irq(cfqd->queue->queue_lock);
+ if (!strncmp(page, "pgid", 4))
+ cfqd->key_type = CFQ_KEY_PGID;
+ else if (!strncmp(page, "tgid", 4))
+ cfqd->key_type = CFQ_KEY_TGID;
+ else if (!strncmp(page, "uid", 3))
+ cfqd->key_type = CFQ_KEY_UID;
+ else if (!strncmp(page, "gid", 3))
+ cfqd->key_type = CFQ_KEY_GID;
+ spin_unlock_irq(cfqd->queue->queue_lock);
+ return count;
+}
+
+static ssize_t
+cfq_read_key_type(struct cfq_data *cfqd, char *page)
+{
+ ssize_t len = 0;
+ int i;
+
+ for (i = CFQ_KEY_PGID; i < CFQ_KEY_LAST; i++) {
+ if (cfqd->key_type == i)
+ len += sprintf(page+len, "[%s] ", cfq_key_types[i]);
+ else
+ len += sprintf(page+len, "%s ", cfq_key_types[i]);
+ }
+ len += sprintf(page+len, "\n");
+ return len;
+}
+
+static ssize_t
+cfq_status_show(struct cfq_data *cfqd, char *page)
+{
+ struct list_head *entry;
+ struct cfq_queue *cfqq;
+ ssize_t len;
+ int i = 0, queues;
+
+ len = sprintf(page, "Busy queues: %u\n", cfqd->busy_queues);
+ len += sprintf(page+len, "key type: %s\n",
+ cfq_key_types[cfqd->key_type]);
+ len += sprintf(page+len, "last sector: %Lu\n",
+ (unsigned long long)cfqd->last_sector);
+ len += sprintf(page+len, "max time in iosched: %lu\n",
+ max_elapsed_dispatch);
+ len += sprintf(page+len, "max completion time: %lu\n", max_elapsed_crq);
+
+ len += sprintf(page+len, "Busy queue list:\n");
+ spin_lock_irq(cfqd->queue->queue_lock);
+ list_for_each(entry, &cfqd->rr_list) {
+ i++;
+ cfqq = list_entry_cfqq(entry);
+ len += sprintf(page+len, " cfqq: key=%lu alloc=%d/%d, "
+ "queued=%d/%d, last_fifo=%lu, service_used=%lu\n",
+ cfqq->key, cfqq->allocated[0], cfqq->allocated[1],
+ cfqq->queued[0], cfqq->queued[1],
+ cfqq->last_fifo_expire, cfqq->service_used);
+ }
+ len += sprintf(page+len, " busy queues total: %d\n", i);
+ queues = i;
+
+ len += sprintf(page+len, "Empty queue list:\n");
+ i = 0;
+ list_for_each(entry, &cfqd->empty_list) {
+ i++;
+ cfqq = list_entry_cfqq(entry);
+ len += sprintf(page+len, " cfqq: key=%lu alloc=%d/%d, "
+ "queued=%d/%d, last_fifo=%lu, service_used=%lu\n",
+ cfqq->key, cfqq->allocated[0], cfqq->allocated[1],
+ cfqq->queued[0], cfqq->queued[1],
+ cfqq->last_fifo_expire, cfqq->service_used);
+ }
+ len += sprintf(page+len, " empty queues total: %d\n", i);
+ queues += i;
+ len += sprintf(page+len, "Total queues: %d\n", queues);
+ spin_unlock_irq(cfqd->queue->queue_lock);
+ return len;
+}
+
+#define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \
static ssize_t __FUNC(struct cfq_data *cfqd, char *page) \
{ \
- return cfq_var_show(__VAR, (page)); \
+ unsigned int __data = __VAR; \
+ if (__CONV) \
+ __data = jiffies_to_msecs(__data); \
+ return cfq_var_show(__data, (page)); \
}
-SHOW_FUNCTION(cfq_quantum_show, cfqd->cfq_quantum);
-SHOW_FUNCTION(cfq_queued_show, cfqd->cfq_queued);
+SHOW_FUNCTION(cfq_quantum_show, cfqd->cfq_quantum, 0);
+SHOW_FUNCTION(cfq_queued_show, cfqd->cfq_queued, 0);
+SHOW_FUNCTION(cfq_fifo_expire_r_show, cfqd->cfq_fifo_expire_r, 1);
+SHOW_FUNCTION(cfq_fifo_expire_w_show, cfqd->cfq_fifo_expire_w, 1);
+SHOW_FUNCTION(cfq_fifo_batch_expire_show, cfqd->cfq_fifo_batch_expire, 1);
+SHOW_FUNCTION(cfq_find_best_show, cfqd->find_best_crq, 0);
+SHOW_FUNCTION(cfq_back_max_show, cfqd->cfq_back_max, 0);
+SHOW_FUNCTION(cfq_back_penalty_show, cfqd->cfq_back_penalty, 0);
#undef SHOW_FUNCTION
-#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX) \
+#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \
static ssize_t __FUNC(struct cfq_data *cfqd, const char *page, size_t count) \
{ \
- int ret = cfq_var_store(__PTR, (page), count); \
- if (*(__PTR) < (MIN)) \
- *(__PTR) = (MIN); \
- else if (*(__PTR) > (MAX)) \
- *(__PTR) = (MAX); \
+ unsigned int __data; \
+ int ret = cfq_var_store(&__data, (page), count); \
+ if (__data < (MIN)) \
+ __data = (MIN); \
+ else if (__data > (MAX)) \
+ __data = (MAX); \
+ if (__CONV) \
+ *(__PTR) = msecs_to_jiffies(__data); \
+ else \
+ *(__PTR) = __data; \
return ret; \
}
-STORE_FUNCTION(cfq_quantum_store, &cfqd->cfq_quantum, 1, INT_MAX);
-STORE_FUNCTION(cfq_queued_store, &cfqd->cfq_queued, 1, INT_MAX);
+STORE_FUNCTION(cfq_quantum_store, &cfqd->cfq_quantum, 1, UINT_MAX, 0);
+STORE_FUNCTION(cfq_queued_store, &cfqd->cfq_queued, 1, UINT_MAX, 0);
+STORE_FUNCTION(cfq_fifo_expire_r_store, &cfqd->cfq_fifo_expire_r, 1, UINT_MAX, 1);
+STORE_FUNCTION(cfq_fifo_expire_w_store, &cfqd->cfq_fifo_expire_w, 1, UINT_MAX, 1);
+STORE_FUNCTION(cfq_fifo_batch_expire_store, &cfqd->cfq_fifo_batch_expire, 0, UINT_MAX, 1);
+STORE_FUNCTION(cfq_find_best_store, &cfqd->find_best_crq, 0, 1, 0);
+STORE_FUNCTION(cfq_back_max_store, &cfqd->cfq_back_max, 0, UINT_MAX, 0);
+STORE_FUNCTION(cfq_back_penalty_store, &cfqd->cfq_back_penalty, 1, UINT_MAX, 0);
#undef STORE_FUNCTION
static struct cfq_fs_entry cfq_quantum_entry = {
@@ -824,10 +1785,62 @@ static struct cfq_fs_entry cfq_queued_entry = {
.show = cfq_queued_show,
.store = cfq_queued_store,
};
+static struct cfq_fs_entry cfq_fifo_expire_r_entry = {
+ .attr = {.name = "fifo_expire_sync", .mode = S_IRUGO | S_IWUSR },
+ .show = cfq_fifo_expire_r_show,
+ .store = cfq_fifo_expire_r_store,
+};
+static struct cfq_fs_entry cfq_fifo_expire_w_entry = {
+ .attr = {.name = "fifo_expire_async", .mode = S_IRUGO | S_IWUSR },
+ .show = cfq_fifo_expire_w_show,
+ .store = cfq_fifo_expire_w_store,
+};
+static struct cfq_fs_entry cfq_fifo_batch_expire_entry = {
+ .attr = {.name = "fifo_batch_expire", .mode = S_IRUGO | S_IWUSR },
+ .show = cfq_fifo_batch_expire_show,
+ .store = cfq_fifo_batch_expire_store,
+};
+static struct cfq_fs_entry cfq_find_best_entry = {
+ .attr = {.name = "find_best_crq", .mode = S_IRUGO | S_IWUSR },
+ .show = cfq_find_best_show,
+ .store = cfq_find_best_store,
+};
+static struct cfq_fs_entry cfq_back_max_entry = {
+ .attr = {.name = "back_seek_max", .mode = S_IRUGO | S_IWUSR },
+ .show = cfq_back_max_show,
+ .store = cfq_back_max_store,
+};
+static struct cfq_fs_entry cfq_back_penalty_entry = {
+ .attr = {.name = "back_seek_penalty", .mode = S_IRUGO | S_IWUSR },
+ .show = cfq_back_penalty_show,
+ .store = cfq_back_penalty_store,
+};
+static struct cfq_fs_entry cfq_clear_elapsed_entry = {
+ .attr = {.name = "clear_elapsed", .mode = S_IWUSR },
+ .store = cfq_clear_elapsed,
+};
+static struct cfq_fs_entry cfq_misc_entry = {
+ .attr = {.name = "show_status", .mode = S_IRUGO },
+ .show = cfq_status_show,
+};
+static struct cfq_fs_entry cfq_key_type_entry = {
+ .attr = {.name = "key_type", .mode = S_IRUGO | S_IWUSR },
+ .show = cfq_read_key_type,
+ .store = cfq_set_key_type,
+};
static struct attribute *default_attrs[] = {
&cfq_quantum_entry.attr,
&cfq_queued_entry.attr,
+ &cfq_fifo_expire_r_entry.attr,
+ &cfq_fifo_expire_w_entry.attr,
+ &cfq_fifo_batch_expire_entry.attr,
+ &cfq_key_type_entry.attr,
+ &cfq_find_best_entry.attr,
+ &cfq_back_max_entry.attr,
+ &cfq_back_penalty_entry.attr,
+ &cfq_clear_elapsed_entry.attr,
+ &cfq_misc_entry.attr,
NULL,
};
@@ -868,23 +1881,56 @@ struct kobj_type cfq_ktype = {
.default_attrs = default_attrs,
};
-elevator_t iosched_cfq = {
- .elevator_name = "cfq",
- .elevator_ktype = &cfq_ktype,
- .elevator_merge_fn = cfq_merge,
- .elevator_merged_fn = cfq_merged_request,
- .elevator_merge_req_fn = cfq_merged_requests,
- .elevator_next_req_fn = cfq_next_request,
- .elevator_add_req_fn = cfq_insert_request,
- .elevator_remove_req_fn = cfq_remove_request,
- .elevator_queue_empty_fn = cfq_queue_empty,
- .elevator_former_req_fn = cfq_former_request,
- .elevator_latter_req_fn = cfq_latter_request,
- .elevator_set_req_fn = cfq_set_request,
- .elevator_put_req_fn = cfq_put_request,
- .elevator_may_queue_fn = cfq_may_queue,
- .elevator_init_fn = cfq_init,
- .elevator_exit_fn = cfq_exit,
+static struct elevator_type iosched_cfq = {
+ .ops = {
+ .elevator_merge_fn = cfq_merge,
+ .elevator_merged_fn = cfq_merged_request,
+ .elevator_merge_req_fn = cfq_merged_requests,
+ .elevator_next_req_fn = cfq_next_request,
+ .elevator_add_req_fn = cfq_insert_request,
+ .elevator_remove_req_fn = cfq_remove_request,
+ .elevator_requeue_req_fn = cfq_requeue_request,
+ .elevator_queue_empty_fn = cfq_queue_empty,
+ .elevator_completed_req_fn = cfq_completed_request,
+ .elevator_former_req_fn = cfq_former_request,
+ .elevator_latter_req_fn = cfq_latter_request,
+ .elevator_set_req_fn = cfq_set_request,
+ .elevator_put_req_fn = cfq_put_request,
+ .elevator_may_queue_fn = cfq_may_queue,
+ .elevator_init_fn = cfq_init_queue,
+ .elevator_exit_fn = cfq_exit_queue,
+ },
+ .elevator_ktype = &cfq_ktype,
+ .elevator_name = "cfq",
+ .elevator_owner = THIS_MODULE,
};
-EXPORT_SYMBOL(iosched_cfq);
+int cfq_init(void)
+{
+ int ret;
+
+ if (cfq_slab_setup())
+ return -ENOMEM;
+
+ ret = elv_register(&iosched_cfq);
+ if (!ret) {
+ __module_get(THIS_MODULE);
+ return 0;
+ }
+
+ cfq_slab_kill();
+ return ret;
+}
+
+void cfq_exit(void)
+{
+ cfq_slab_kill();
+ elv_unregister(&iosched_cfq);
+}
+
+module_init(cfq_init);
+module_exit(cfq_exit);
+
+MODULE_AUTHOR("Jens Axboe");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Completely Fair Queueing IO scheduler");
diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c
index 204b3182900d..dc896a12283b 100644
--- a/drivers/block/cpqarray.c
+++ b/drivers/block/cpqarray.c
@@ -21,7 +21,6 @@
*/
#include <linux/config.h> /* CONFIG_PROC_FS */
#include <linux/module.h>
-#include <linux/version.h>
#include <linux/types.h>
#include <linux/pci.h>
#include <linux/bio.h>
@@ -732,7 +731,6 @@ static void __iomem *remap_pci_mem(ulong base, ulong size)
}
#ifndef MODULE
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,3,13)
/*
* Config string is a comma separated set of i/o addresses of EISA cards.
*/
@@ -749,18 +747,6 @@ static int cpqarray_setup(char *str)
__setup("smart2=", cpqarray_setup);
-#else
-
-/*
- * Copy the contents of the ints[] array passed to us by init.
- */
-void cpqarray_setup(char *str, int *ints)
-{
- int i;
- for(i=0; i<ints[0] && i<8; i++)
- eisa[i] = ints[i+1];
-}
-#endif
#endif
/*
diff --git a/drivers/block/deadline-iosched.c b/drivers/block/deadline-iosched.c
index fb7ab733c709..f482e8bdb4d6 100644
--- a/drivers/block/deadline-iosched.c
+++ b/drivers/block/deadline-iosched.c
@@ -289,7 +289,7 @@ deadline_find_first_drq(struct deadline_data *dd, int data_dir)
static inline void
deadline_add_request(struct request_queue *q, struct request *rq)
{
- struct deadline_data *dd = q->elevator.elevator_data;
+ struct deadline_data *dd = q->elevator->elevator_data;
struct deadline_rq *drq = RQ_DATA(rq);
const int data_dir = rq_data_dir(drq->request);
@@ -317,7 +317,7 @@ static void deadline_remove_request(request_queue_t *q, struct request *rq)
struct deadline_rq *drq = RQ_DATA(rq);
if (drq) {
- struct deadline_data *dd = q->elevator.elevator_data;
+ struct deadline_data *dd = q->elevator->elevator_data;
list_del_init(&drq->fifo);
deadline_remove_merge_hints(q, drq);
@@ -328,7 +328,7 @@ static void deadline_remove_request(request_queue_t *q, struct request *rq)
static int
deadline_merge(request_queue_t *q, struct request **req, struct bio *bio)
{
- struct deadline_data *dd = q->elevator.elevator_data;
+ struct deadline_data *dd = q->elevator->elevator_data;
struct request *__rq;
int ret;
@@ -383,7 +383,7 @@ out_insert:
static void deadline_merged_request(request_queue_t *q, struct request *req)
{
- struct deadline_data *dd = q->elevator.elevator_data;
+ struct deadline_data *dd = q->elevator->elevator_data;
struct deadline_rq *drq = RQ_DATA(req);
/*
@@ -407,7 +407,7 @@ static void
deadline_merged_requests(request_queue_t *q, struct request *req,
struct request *next)
{
- struct deadline_data *dd = q->elevator.elevator_data;
+ struct deadline_data *dd = q->elevator->elevator_data;
struct deadline_rq *drq = RQ_DATA(req);
struct deadline_rq *dnext = RQ_DATA(next);
@@ -604,7 +604,7 @@ dispatch_request:
static struct request *deadline_next_request(request_queue_t *q)
{
- struct deadline_data *dd = q->elevator.elevator_data;
+ struct deadline_data *dd = q->elevator->elevator_data;
struct request *rq;
/*
@@ -625,7 +625,7 @@ dispatch:
static void
deadline_insert_request(request_queue_t *q, struct request *rq, int where)
{
- struct deadline_data *dd = q->elevator.elevator_data;
+ struct deadline_data *dd = q->elevator->elevator_data;
/* barriers must flush the reorder queue */
if (unlikely(rq->flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)
@@ -653,7 +653,7 @@ deadline_insert_request(request_queue_t *q, struct request *rq, int where)
static int deadline_queue_empty(request_queue_t *q)
{
- struct deadline_data *dd = q->elevator.elevator_data;
+ struct deadline_data *dd = q->elevator->elevator_data;
if (!list_empty(&dd->fifo_list[WRITE])
|| !list_empty(&dd->fifo_list[READ])
@@ -687,7 +687,7 @@ deadline_latter_request(request_queue_t *q, struct request *rq)
return NULL;
}
-static void deadline_exit(request_queue_t *q, elevator_t *e)
+static void deadline_exit_queue(elevator_t *e)
{
struct deadline_data *dd = e->elevator_data;
@@ -703,7 +703,7 @@ static void deadline_exit(request_queue_t *q, elevator_t *e)
* initialize elevator private data (deadline_data), and alloc a drq for
* each request on the free lists
*/
-static int deadline_init(request_queue_t *q, elevator_t *e)
+static int deadline_init_queue(request_queue_t *q, elevator_t *e)
{
struct deadline_data *dd;
int i;
@@ -748,7 +748,7 @@ static int deadline_init(request_queue_t *q, elevator_t *e)
static void deadline_put_request(request_queue_t *q, struct request *rq)
{
- struct deadline_data *dd = q->elevator.elevator_data;
+ struct deadline_data *dd = q->elevator->elevator_data;
struct deadline_rq *drq = RQ_DATA(rq);
if (drq) {
@@ -760,7 +760,7 @@ static void deadline_put_request(request_queue_t *q, struct request *rq)
static int
deadline_set_request(request_queue_t *q, struct request *rq, int gfp_mask)
{
- struct deadline_data *dd = q->elevator.elevator_data;
+ struct deadline_data *dd = q->elevator->elevator_data;
struct deadline_rq *drq;
drq = mempool_alloc(dd->drq_pool, gfp_mask);
@@ -805,33 +805,41 @@ deadline_var_store(unsigned int *var, const char *page, size_t count)
return count;
}
-#define SHOW_FUNCTION(__FUNC, __VAR) \
+#define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \
static ssize_t __FUNC(struct deadline_data *dd, char *page) \
{ \
- return deadline_var_show(__VAR, (page)); \
-}
-SHOW_FUNCTION(deadline_readexpire_show, dd->fifo_expire[READ]);
-SHOW_FUNCTION(deadline_writeexpire_show, dd->fifo_expire[WRITE]);
-SHOW_FUNCTION(deadline_writesstarved_show, dd->writes_starved);
-SHOW_FUNCTION(deadline_frontmerges_show, dd->front_merges);
-SHOW_FUNCTION(deadline_fifobatch_show, dd->fifo_batch);
+ unsigned int __data = __VAR; \
+ if (__CONV) \
+ __data = jiffies_to_msecs(__data); \
+ return deadline_var_show(__data, (page)); \
+}
+SHOW_FUNCTION(deadline_readexpire_show, dd->fifo_expire[READ], 1);
+SHOW_FUNCTION(deadline_writeexpire_show, dd->fifo_expire[WRITE], 1);
+SHOW_FUNCTION(deadline_writesstarved_show, dd->writes_starved, 0);
+SHOW_FUNCTION(deadline_frontmerges_show, dd->front_merges, 0);
+SHOW_FUNCTION(deadline_fifobatch_show, dd->fifo_batch, 0);
#undef SHOW_FUNCTION
-#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX) \
+#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \
static ssize_t __FUNC(struct deadline_data *dd, const char *page, size_t count) \
{ \
- int ret = deadline_var_store(__PTR, (page), count); \
- if (*(__PTR) < (MIN)) \
- *(__PTR) = (MIN); \
- else if (*(__PTR) > (MAX)) \
- *(__PTR) = (MAX); \
+ unsigned int __data; \
+ int ret = deadline_var_store(&__data, (page), count); \
+ if (__data < (MIN)) \
+ __data = (MIN); \
+ else if (__data > (MAX)) \
+ __data = (MAX); \
+ if (__CONV) \
+ *(__PTR) = msecs_to_jiffies(__data); \
+ else \
+ *(__PTR) = __data; \
return ret; \
}
-STORE_FUNCTION(deadline_readexpire_store, &dd->fifo_expire[READ], 0, INT_MAX);
-STORE_FUNCTION(deadline_writeexpire_store, &dd->fifo_expire[WRITE], 0, INT_MAX);
-STORE_FUNCTION(deadline_writesstarved_store, &dd->writes_starved, INT_MIN, INT_MAX);
-STORE_FUNCTION(deadline_frontmerges_store, &dd->front_merges, 0, 1);
-STORE_FUNCTION(deadline_fifobatch_store, &dd->fifo_batch, 0, INT_MAX);
+STORE_FUNCTION(deadline_readexpire_store, &dd->fifo_expire[READ], 0, INT_MAX, 1);
+STORE_FUNCTION(deadline_writeexpire_store, &dd->fifo_expire[WRITE], 0, INT_MAX, 1);
+STORE_FUNCTION(deadline_writesstarved_store, &dd->writes_starved, INT_MIN, INT_MAX, 0);
+STORE_FUNCTION(deadline_frontmerges_store, &dd->front_merges, 0, 1, 0);
+STORE_FUNCTION(deadline_fifobatch_store, &dd->fifo_batch, 0, INT_MAX, 0);
#undef STORE_FUNCTION
static struct deadline_fs_entry deadline_readexpire_entry = {
@@ -906,36 +914,54 @@ struct kobj_type deadline_ktype = {
.default_attrs = default_attrs,
};
-static int __init deadline_slab_setup(void)
+static struct elevator_type iosched_deadline = {
+ .ops = {
+ .elevator_merge_fn = deadline_merge,
+ .elevator_merged_fn = deadline_merged_request,
+ .elevator_merge_req_fn = deadline_merged_requests,
+ .elevator_next_req_fn = deadline_next_request,
+ .elevator_add_req_fn = deadline_insert_request,
+ .elevator_remove_req_fn = deadline_remove_request,
+ .elevator_queue_empty_fn = deadline_queue_empty,
+ .elevator_former_req_fn = deadline_former_request,
+ .elevator_latter_req_fn = deadline_latter_request,
+ .elevator_set_req_fn = deadline_set_request,
+ .elevator_put_req_fn = deadline_put_request,
+ .elevator_init_fn = deadline_init_queue,
+ .elevator_exit_fn = deadline_exit_queue,
+ },
+
+ .elevator_ktype = &deadline_ktype,
+ .elevator_name = "deadline",
+ .elevator_owner = THIS_MODULE,
+};
+
+int deadline_init(void)
{
+ int ret;
+
drq_pool = kmem_cache_create("deadline_drq", sizeof(struct deadline_rq),
0, 0, NULL, NULL);
if (!drq_pool)
- panic("deadline: can't init slab pool\n");
+ return -ENOMEM;
- return 0;
+ ret = elv_register(&iosched_deadline);
+ if (ret)
+ kmem_cache_destroy(drq_pool);
+
+ return ret;
}
-subsys_initcall(deadline_slab_setup);
-
-elevator_t iosched_deadline = {
- .elevator_merge_fn = deadline_merge,
- .elevator_merged_fn = deadline_merged_request,
- .elevator_merge_req_fn = deadline_merged_requests,
- .elevator_next_req_fn = deadline_next_request,
- .elevator_add_req_fn = deadline_insert_request,
- .elevator_remove_req_fn = deadline_remove_request,
- .elevator_queue_empty_fn = deadline_queue_empty,
- .elevator_former_req_fn = deadline_former_request,
- .elevator_latter_req_fn = deadline_latter_request,
- .elevator_set_req_fn = deadline_set_request,
- .elevator_put_req_fn = deadline_put_request,
- .elevator_init_fn = deadline_init,
- .elevator_exit_fn = deadline_exit,
-
- .elevator_ktype = &deadline_ktype,
- .elevator_name = "deadline",
-};
+void deadline_exit(void)
+{
+ kmem_cache_destroy(drq_pool);
+ elv_unregister(&iosched_deadline);
+}
+
+module_init(deadline_init);
+module_exit(deadline_exit);
-EXPORT_SYMBOL(iosched_deadline);
+MODULE_AUTHOR("Jens Axboe");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("deadline IO scheduler");
diff --git a/drivers/block/elevator.c b/drivers/block/elevator.c
index 35c9385ac133..1b4f6a70c0ca 100644
--- a/drivers/block/elevator.c
+++ b/drivers/block/elevator.c
@@ -37,6 +37,9 @@
#include <asm/uaccess.h>
+static spinlock_t elv_list_lock = SPIN_LOCK_UNLOCKED;
+static LIST_HEAD(elv_list);
+
/*
* can we safely merge with this request?
*/
@@ -60,6 +63,7 @@ inline int elv_rq_merge_ok(struct request *rq, struct bio *bio)
return 0;
}
+EXPORT_SYMBOL(elv_rq_merge_ok);
inline int elv_try_merge(struct request *__rq, struct bio *bio)
{
@@ -77,6 +81,7 @@ inline int elv_try_merge(struct request *__rq, struct bio *bio)
return ret;
}
+EXPORT_SYMBOL(elv_try_merge);
inline int elv_try_last_merge(request_queue_t *q, struct bio *bio)
{
@@ -85,31 +90,117 @@ inline int elv_try_last_merge(request_queue_t *q, struct bio *bio)
return ELEVATOR_NO_MERGE;
}
+EXPORT_SYMBOL(elv_try_last_merge);
-/*
- * general block -> elevator interface starts here
- */
-int elevator_init(request_queue_t *q, elevator_t *type)
+struct elevator_type *elevator_find(const char *name)
+{
+ struct elevator_type *e = NULL;
+ struct list_head *entry;
+
+ spin_lock_irq(&elv_list_lock);
+ list_for_each(entry, &elv_list) {
+ struct elevator_type *__e;
+
+ __e = list_entry(entry, struct elevator_type, list);
+
+ if (!strcmp(__e->elevator_name, name)) {
+ e = __e;
+ break;
+ }
+ }
+ spin_unlock_irq(&elv_list_lock);
+
+ return e;
+}
+
+static int elevator_attach(request_queue_t *q, struct elevator_type *e,
+ struct elevator_queue *eq)
{
- elevator_t *e = &q->elevator;
+ int ret = 0;
- memcpy(e, type, sizeof(*e));
+ if (!try_module_get(e->elevator_owner))
+ return -EINVAL;
+
+ memset(eq, 0, sizeof(*eq));
+ eq->ops = &e->ops;
+ eq->elevator_type = e;
INIT_LIST_HEAD(&q->queue_head);
q->last_merge = NULL;
+ q->elevator = eq;
+
+ if (eq->ops->elevator_init_fn)
+ ret = eq->ops->elevator_init_fn(q, eq);
- if (e->elevator_init_fn)
- return e->elevator_init_fn(q, e);
+ return ret;
+}
+
+static char chosen_elevator[16];
+
+static void elevator_setup_default(void)
+{
+ /*
+ * check if default is set and exists
+ */
+ if (chosen_elevator[0] && elevator_find(chosen_elevator))
+ return;
+
+#if defined(CONFIG_IOSCHED_AS)
+ strcpy(chosen_elevator, "anticipatory");
+#elif defined(CONFIG_IOSCHED_DEADLINE)
+ strcpy(chosen_elevator, "deadline");
+#elif defined(CONFIG_IOSCHED_CFQ)
+ strcpy(chosen_elevator, "cfq");
+#elif defined(CONFIG_IOSCHED_NOOP)
+ strcpy(chosen_elevator, "noop");
+#else
+#error "You must build at least 1 IO scheduler into the kernel"
+#endif
+ printk("elevator: using %s as default io scheduler\n", chosen_elevator);
+}
+static int __init elevator_setup(char *str)
+{
+ strncpy(chosen_elevator, str, sizeof(chosen_elevator) - 1);
return 0;
}
-void elevator_exit(request_queue_t *q)
+__setup("elevator=", elevator_setup);
+
+int elevator_init(request_queue_t *q, char *name)
+{
+ struct elevator_type *e = NULL;
+ struct elevator_queue *eq;
+ int ret = 0;
+
+ elevator_setup_default();
+
+ if (!name)
+ name = chosen_elevator;
+
+ e = elevator_find(name);
+ if (!e)
+ return -EINVAL;
+
+ eq = kmalloc(sizeof(struct elevator_queue), GFP_KERNEL);
+ if (!eq)
+ return -ENOMEM;
+
+ ret = elevator_attach(q, e, eq);
+ if (ret)
+ kfree(eq);
+
+ return ret;
+}
+
+void elevator_exit(elevator_t *e)
{
- elevator_t *e = &q->elevator;
+ if (e->ops->elevator_exit_fn)
+ e->ops->elevator_exit_fn(e);
- if (e->elevator_exit_fn)
- e->elevator_exit_fn(q, e);
+ module_put(e->elevator_type->elevator_owner);
+ e->elevator_type = NULL;
+ kfree(e);
}
int elevator_global_init(void)
@@ -119,32 +210,32 @@ int elevator_global_init(void)
int elv_merge(request_queue_t *q, struct request **req, struct bio *bio)
{
- elevator_t *e = &q->elevator;
+ elevator_t *e = q->elevator;
- if (e->elevator_merge_fn)
- return e->elevator_merge_fn(q, req, bio);
+ if (e->ops->elevator_merge_fn)
+ return e->ops->elevator_merge_fn(q, req, bio);
return ELEVATOR_NO_MERGE;
}
void elv_merged_request(request_queue_t *q, struct request *rq)
{
- elevator_t *e = &q->elevator;
+ elevator_t *e = q->elevator;
- if (e->elevator_merged_fn)
- e->elevator_merged_fn(q, rq);
+ if (e->ops->elevator_merged_fn)
+ e->ops->elevator_merged_fn(q, rq);
}
void elv_merge_requests(request_queue_t *q, struct request *rq,
struct request *next)
{
- elevator_t *e = &q->elevator;
+ elevator_t *e = q->elevator;
if (q->last_merge == next)
q->last_merge = NULL;
- if (e->elevator_merge_req_fn)
- e->elevator_merge_req_fn(q, rq, next);
+ if (e->ops->elevator_merge_req_fn)
+ e->ops->elevator_merge_req_fn(q, rq, next);
}
void elv_requeue_request(request_queue_t *q, struct request *rq)
@@ -160,8 +251,8 @@ void elv_requeue_request(request_queue_t *q, struct request *rq)
* if iosched has an explicit requeue hook, then use that. otherwise
* just put the request at the front of the queue
*/
- if (q->elevator.elevator_requeue_req_fn)
- q->elevator.elevator_requeue_req_fn(q, rq);
+ if (q->elevator->ops->elevator_requeue_req_fn)
+ q->elevator->ops->elevator_requeue_req_fn(q, rq);
else
__elv_add_request(q, rq, ELEVATOR_INSERT_FRONT, 0);
}
@@ -180,7 +271,7 @@ void __elv_add_request(request_queue_t *q, struct request *rq, int where,
blk_plug_device(q);
rq->q = q;
- q->elevator.elevator_add_req_fn(q, rq, where);
+ q->elevator->ops->elevator_add_req_fn(q, rq, where);
if (blk_queue_plugged(q)) {
int nrq = q->rq.count[READ] + q->rq.count[WRITE] - q->in_flight;
@@ -203,7 +294,7 @@ void elv_add_request(request_queue_t *q, struct request *rq, int where,
static inline struct request *__elv_next_request(request_queue_t *q)
{
- return q->elevator.elevator_next_req_fn(q);
+ return q->elevator->ops->elevator_next_req_fn(q);
}
struct request *elv_next_request(request_queue_t *q)
@@ -252,7 +343,7 @@ struct request *elv_next_request(request_queue_t *q)
void elv_remove_request(request_queue_t *q, struct request *rq)
{
- elevator_t *e = &q->elevator;
+ elevator_t *e = q->elevator;
/*
* the time frame between a request being removed from the lists
@@ -274,16 +365,16 @@ void elv_remove_request(request_queue_t *q, struct request *rq)
if (rq == q->last_merge)
q->last_merge = NULL;
- if (e->elevator_remove_req_fn)
- e->elevator_remove_req_fn(q, rq);
+ if (e->ops->elevator_remove_req_fn)
+ e->ops->elevator_remove_req_fn(q, rq);
}
int elv_queue_empty(request_queue_t *q)
{
- elevator_t *e = &q->elevator;
+ elevator_t *e = q->elevator;
- if (e->elevator_queue_empty_fn)
- return e->elevator_queue_empty_fn(q);
+ if (e->ops->elevator_queue_empty_fn)
+ return e->ops->elevator_queue_empty_fn(q);
return list_empty(&q->queue_head);
}
@@ -292,10 +383,10 @@ struct request *elv_latter_request(request_queue_t *q, struct request *rq)
{
struct list_head *next;
- elevator_t *e = &q->elevator;
+ elevator_t *e = q->elevator;
- if (e->elevator_latter_req_fn)
- return e->elevator_latter_req_fn(q, rq);
+ if (e->ops->elevator_latter_req_fn)
+ return e->ops->elevator_latter_req_fn(q, rq);
next = rq->queuelist.next;
if (next != &q->queue_head && next != &rq->queuelist)
@@ -308,10 +399,10 @@ struct request *elv_former_request(request_queue_t *q, struct request *rq)
{
struct list_head *prev;
- elevator_t *e = &q->elevator;
+ elevator_t *e = q->elevator;
- if (e->elevator_former_req_fn)
- return e->elevator_former_req_fn(q, rq);
+ if (e->ops->elevator_former_req_fn)
+ return e->ops->elevator_former_req_fn(q, rq);
prev = rq->queuelist.prev;
if (prev != &q->queue_head && prev != &rq->queuelist)
@@ -322,10 +413,10 @@ struct request *elv_former_request(request_queue_t *q, struct request *rq)
int elv_set_request(request_queue_t *q, struct request *rq, int gfp_mask)
{
- elevator_t *e = &q->elevator;
+ elevator_t *e = q->elevator;
- if (e->elevator_set_req_fn)
- return e->elevator_set_req_fn(q, rq, gfp_mask);
+ if (e->ops->elevator_set_req_fn)
+ return e->ops->elevator_set_req_fn(q, rq, gfp_mask);
rq->elevator_private = NULL;
return 0;
@@ -333,25 +424,25 @@ int elv_set_request(request_queue_t *q, struct request *rq, int gfp_mask)
void elv_put_request(request_queue_t *q, struct request *rq)
{
- elevator_t *e = &q->elevator;
+ elevator_t *e = q->elevator;
- if (e->elevator_put_req_fn)
- e->elevator_put_req_fn(q, rq);
+ if (e->ops->elevator_put_req_fn)
+ e->ops->elevator_put_req_fn(q, rq);
}
int elv_may_queue(request_queue_t *q, int rw)
{
- elevator_t *e = &q->elevator;
+ elevator_t *e = q->elevator;
- if (e->elevator_may_queue_fn)
- return e->elevator_may_queue_fn(q, rw);
+ if (e->ops->elevator_may_queue_fn)
+ return e->ops->elevator_may_queue_fn(q, rw);
- return 0;
+ return ELV_MQUEUE_MAY;
}
void elv_completed_request(request_queue_t *q, struct request *rq)
{
- elevator_t *e = &q->elevator;
+ elevator_t *e = q->elevator;
/*
* request is released from the driver, io must be done
@@ -359,22 +450,20 @@ void elv_completed_request(request_queue_t *q, struct request *rq)
if (blk_account_rq(rq))
q->in_flight--;
- if (e->elevator_completed_req_fn)
- e->elevator_completed_req_fn(q, rq);
+ if (e->ops->elevator_completed_req_fn)
+ e->ops->elevator_completed_req_fn(q, rq);
}
int elv_register_queue(struct request_queue *q)
{
- elevator_t *e;
-
- e = &q->elevator;
+ elevator_t *e = q->elevator;
e->kobj.parent = kobject_get(&q->kobj);
if (!e->kobj.parent)
return -EBUSY;
snprintf(e->kobj.name, KOBJ_NAME_LEN, "%s", "iosched");
- e->kobj.ktype = e->elevator_ktype;
+ e->kobj.ktype = e->elevator_type->elevator_ktype;
return kobject_register(&e->kobj);
}
@@ -382,12 +471,131 @@ int elv_register_queue(struct request_queue *q)
void elv_unregister_queue(struct request_queue *q)
{
if (q) {
- elevator_t * e = &q->elevator;
+ elevator_t *e = q->elevator;
kobject_unregister(&e->kobj);
kobject_put(&q->kobj);
}
}
+int elv_register(struct elevator_type *e)
+{
+ if (elevator_find(e->elevator_name))
+ BUG();
+
+ spin_lock_irq(&elv_list_lock);
+ list_add_tail(&e->list, &elv_list);
+ spin_unlock_irq(&elv_list_lock);
+
+ printk("io scheduler %s registered\n", e->elevator_name);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(elv_register);
+
+void elv_unregister(struct elevator_type *e)
+{
+ spin_lock_irq(&elv_list_lock);
+ list_del_init(&e->list);
+ spin_unlock_irq(&elv_list_lock);
+}
+EXPORT_SYMBOL_GPL(elv_unregister);
+
+/*
+ * switch to new_e io scheduler. be careful not to introduce deadlocks -
+ * we don't free the old io scheduler, before we have allocated what we
+ * need for the new one. this way we have a chance of going back to the old
+ * one, if the new one fails init for some reason
+ */
+static void elevator_switch(request_queue_t *q, struct elevator_type *new_e)
+{
+ elevator_t *e = kmalloc(sizeof(elevator_t), GFP_KERNEL);
+ elevator_t *old_elevator;
+
+ if (!e) {
+ printk("elevator: out of memory\n");
+ return;
+ }
+
+ blk_wait_queue_drained(q);
+
+ /*
+ * unregister old elevator data
+ */
+ elv_unregister_queue(q);
+ old_elevator = q->elevator;
+
+ /*
+ * attach and start new elevator
+ */
+ if (elevator_attach(q, new_e, e))
+ goto fail;
+
+ if (elv_register_queue(q))
+ goto fail_register;
+
+ /*
+ * finally exit old elevator and start queue again
+ */
+ elevator_exit(old_elevator);
+ blk_finish_queue_drain(q);
+ return;
+
+fail_register:
+ /*
+ * switch failed, exit the new io scheduler and reattach the old
+ * one again (along with re-adding the sysfs dir)
+ */
+ elevator_exit(e);
+fail:
+ q->elevator = old_elevator;
+ elv_register_queue(q);
+ blk_finish_queue_drain(q);
+ printk("elevator: switch to %s failed\n", new_e->elevator_name);
+}
+
+ssize_t elv_iosched_store(request_queue_t *q, const char *name, size_t count)
+{
+ char elevator_name[ELV_NAME_MAX];
+ struct elevator_type *e;
+
+ memset(elevator_name, 0, sizeof(elevator_name));
+ strncpy(elevator_name, name, sizeof(elevator_name));
+
+ if (elevator_name[strlen(elevator_name) - 1] == '\n')
+ elevator_name[strlen(elevator_name) - 1] = '\0';
+
+ e = elevator_find(elevator_name);
+ if (!e) {
+ printk("elevator: type %s not found\n", elevator_name);
+ return -EINVAL;
+ }
+
+ elevator_switch(q, e);
+ return count;
+}
+
+ssize_t elv_iosched_show(request_queue_t *q, char *name)
+{
+ elevator_t *e = q->elevator;
+ struct elevator_type *elv = e->elevator_type;
+ struct list_head *entry;
+ int len = 0;
+
+ spin_lock_irq(q->queue_lock);
+ list_for_each(entry, &elv_list) {
+ struct elevator_type *__e;
+
+ __e = list_entry(entry, struct elevator_type, list);
+ if (!strcmp(elv->elevator_name, __e->elevator_name))
+ len += sprintf(name+len, "[%s] ", elv->elevator_name);
+ else
+ len += sprintf(name+len, "%s ", __e->elevator_name);
+ }
+ spin_unlock_irq(q->queue_lock);
+
+ len += sprintf(len+name, "\n");
+ return len;
+}
+
module_init(elevator_global_init);
EXPORT_SYMBOL(elv_add_request);
diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c
index 26fdf6be6bd0..3ba6430899df 100644
--- a/drivers/block/ll_rw_blk.c
+++ b/drivers/block/ll_rw_blk.c
@@ -243,6 +243,7 @@ void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn)
blk_queue_hardsect_size(q, 512);
blk_queue_dma_alignment(q, 511);
blk_queue_congestion_threshold(q);
+ q->nr_batching = BLK_BATCH_REQ;
q->unplug_thresh = 4; /* hmm */
q->unplug_delay = (3 * HZ) / 1000; /* 3 milliseconds */
@@ -1395,7 +1396,8 @@ void blk_cleanup_queue(request_queue_t * q)
if (!atomic_dec_and_test(&q->refcnt))
return;
- elevator_exit(q);
+ if (q->elevator)
+ elevator_exit(q->elevator);
del_timer_sync(&q->unplug_timer);
kblockd_flush();
@@ -1418,6 +1420,7 @@ static int blk_init_free_list(request_queue_t *q)
rl->count[READ] = rl->count[WRITE] = 0;
init_waitqueue_head(&rl->wait[READ]);
init_waitqueue_head(&rl->wait[WRITE]);
+ init_waitqueue_head(&rl->drain);
rl->rq_pool = mempool_create(BLKDEV_MIN_RQ, mempool_alloc_slab, mempool_free_slab, request_cachep);
@@ -1429,45 +1432,6 @@ static int blk_init_free_list(request_queue_t *q)
static int __make_request(request_queue_t *, struct bio *);
-static elevator_t *chosen_elevator =
-#if defined(CONFIG_IOSCHED_AS)
- &iosched_as;
-#elif defined(CONFIG_IOSCHED_DEADLINE)
- &iosched_deadline;
-#elif defined(CONFIG_IOSCHED_CFQ)
- &iosched_cfq;
-#elif defined(CONFIG_IOSCHED_NOOP)
- &elevator_noop;
-#else
- NULL;
-#error "You must have at least 1 I/O scheduler selected"
-#endif
-
-#if defined(CONFIG_IOSCHED_AS) || defined(CONFIG_IOSCHED_DEADLINE) || defined (CONFIG_IOSCHED_NOOP)
-static int __init elevator_setup(char *str)
-{
-#ifdef CONFIG_IOSCHED_DEADLINE
- if (!strcmp(str, "deadline"))
- chosen_elevator = &iosched_deadline;
-#endif
-#ifdef CONFIG_IOSCHED_AS
- if (!strcmp(str, "as"))
- chosen_elevator = &iosched_as;
-#endif
-#ifdef CONFIG_IOSCHED_CFQ
- if (!strcmp(str, "cfq"))
- chosen_elevator = &iosched_cfq;
-#endif
-#ifdef CONFIG_IOSCHED_NOOP
- if (!strcmp(str, "noop"))
- chosen_elevator = &elevator_noop;
-#endif
- return 1;
-}
-
-__setup("elevator=", elevator_setup);
-#endif /* CONFIG_IOSCHED_AS || CONFIG_IOSCHED_DEADLINE || CONFIG_IOSCHED_NOOP */
-
request_queue_t *blk_alloc_queue(int gfp_mask)
{
request_queue_t *q = kmem_cache_alloc(requestq_cachep, gfp_mask);
@@ -1520,21 +1484,14 @@ EXPORT_SYMBOL(blk_alloc_queue);
**/
request_queue_t *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock)
{
- request_queue_t *q;
- static int printed;
+ request_queue_t *q = blk_alloc_queue(GFP_KERNEL);
- q = blk_alloc_queue(GFP_KERNEL);
if (!q)
return NULL;
if (blk_init_free_list(q))
goto out_init;
- if (!printed) {
- printed = 1;
- printk("Using %s io scheduler\n", chosen_elevator->elevator_name);
- }
-
q->request_fn = rfn;
q->back_merge_fn = ll_back_merge_fn;
q->front_merge_fn = ll_front_merge_fn;
@@ -1555,8 +1512,10 @@ request_queue_t *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock)
/*
* all done
*/
- if (!elevator_init(q, chosen_elevator))
+ if (!elevator_init(q, NULL)) {
+ blk_queue_congestion_threshold(q);
return q;
+ }
blk_cleanup_queue(q);
out_init:
@@ -1584,13 +1543,20 @@ static inline void blk_free_request(request_queue_t *q, struct request *rq)
mempool_free(rq, q->rq.rq_pool);
}
-static inline struct request *blk_alloc_request(request_queue_t *q,int gfp_mask)
+static inline struct request *blk_alloc_request(request_queue_t *q, int rw,
+ int gfp_mask)
{
struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);
if (!rq)
return NULL;
+ /*
+ * first three bits are identical in rq->flags and bio->bi_rw,
+ * see bio.h and blkdev.h
+ */
+ rq->flags = rw;
+
if (!elv_set_request(q, rq, gfp_mask))
return rq;
@@ -1602,7 +1568,7 @@ static inline struct request *blk_alloc_request(request_queue_t *q,int gfp_mask)
* ioc_batching returns true if the ioc is a valid batching request and
* should be given priority access to a request.
*/
-static inline int ioc_batching(struct io_context *ioc)
+static inline int ioc_batching(request_queue_t *q, struct io_context *ioc)
{
if (!ioc)
return 0;
@@ -1612,7 +1578,7 @@ static inline int ioc_batching(struct io_context *ioc)
* even if the batch times out, otherwise we could theoretically
* lose wakeups.
*/
- return ioc->nr_batch_requests == BLK_BATCH_REQ ||
+ return ioc->nr_batch_requests == q->nr_batching ||
(ioc->nr_batch_requests > 0
&& time_before(jiffies, ioc->last_waited + BLK_BATCH_TIME));
}
@@ -1623,12 +1589,12 @@ static inline int ioc_batching(struct io_context *ioc)
* is the behaviour we want though - once it gets a wakeup it should be given
* a nice run.
*/
-void ioc_set_batching(struct io_context *ioc)
+void ioc_set_batching(request_queue_t *q, struct io_context *ioc)
{
- if (!ioc || ioc_batching(ioc))
+ if (!ioc || ioc_batching(q, ioc))
return;
- ioc->nr_batch_requests = BLK_BATCH_REQ;
+ ioc->nr_batch_requests = q->nr_batching;
ioc->last_waited = jiffies;
}
@@ -1644,11 +1610,14 @@ static void freed_request(request_queue_t *q, int rw)
if (rl->count[rw] < queue_congestion_off_threshold(q))
clear_queue_congested(q, rw);
if (rl->count[rw]+1 <= q->nr_requests) {
+ smp_mb();
if (waitqueue_active(&rl->wait[rw]))
wake_up(&rl->wait[rw]);
- if (!waitqueue_active(&rl->wait[rw]))
- blk_clear_queue_full(q, rw);
+ blk_clear_queue_full(q, rw);
}
+ if (unlikely(waitqueue_active(&rl->drain)) &&
+ !rl->count[READ] && !rl->count[WRITE])
+ wake_up(&rl->drain);
}
#define blkdev_free_rq(list) list_entry((list)->next, struct request, queuelist)
@@ -1661,6 +1630,9 @@ static struct request *get_request(request_queue_t *q, int rw, int gfp_mask)
struct request_list *rl = &q->rq;
struct io_context *ioc = get_io_context(gfp_mask);
+ if (unlikely(test_bit(QUEUE_FLAG_DRAIN, &q->queue_flags)))
+ return NULL;
+
spin_lock_irq(q->queue_lock);
if (rl->count[rw]+1 >= q->nr_requests) {
/*
@@ -1670,13 +1642,22 @@ static struct request *get_request(request_queue_t *q, int rw, int gfp_mask)
* will be blocked.
*/
if (!blk_queue_full(q, rw)) {
- ioc_set_batching(ioc);
+ ioc_set_batching(q, ioc);
blk_set_queue_full(q, rw);
}
}
- if (blk_queue_full(q, rw)
- && !ioc_batching(ioc) && !elv_may_queue(q, rw)) {
+ switch (elv_may_queue(q, rw)) {
+ case ELV_MQUEUE_NO:
+ spin_unlock_irq(q->queue_lock);
+ goto out;
+ case ELV_MQUEUE_MAY:
+ break;
+ case ELV_MQUEUE_MUST:
+ goto get_rq;
+ }
+
+ if (blk_queue_full(q, rw) && !ioc_batching(q, ioc)) {
/*
* The queue is full and the allocating process is not a
* "batcher", and not exempted by the IO scheduler
@@ -1685,12 +1666,13 @@ static struct request *get_request(request_queue_t *q, int rw, int gfp_mask)
goto out;
}
+get_rq:
rl->count[rw]++;
if (rl->count[rw] >= queue_congestion_on_threshold(q))
set_queue_congested(q, rw);
spin_unlock_irq(q->queue_lock);
- rq = blk_alloc_request(q, gfp_mask);
+ rq = blk_alloc_request(q, rw, gfp_mask);
if (!rq) {
/*
* Allocation failed presumably due to memory. Undo anything
@@ -1705,17 +1687,11 @@ static struct request *get_request(request_queue_t *q, int rw, int gfp_mask)
goto out;
}
- if (ioc_batching(ioc))
+ if (ioc_batching(q, ioc))
ioc->nr_batch_requests--;
INIT_LIST_HEAD(&rq->queuelist);
- /*
- * first three bits are identical in rq->flags and bio->bi_rw,
- * see bio.h and blkdev.h
- */
- rq->flags = rw;
-
rq->errors = 0;
rq->rq_status = RQ_ACTIVE;
rq->bio = rq->biotail = NULL;
@@ -1764,7 +1740,7 @@ static struct request *get_request_wait(request_queue_t *q, int rw)
* See ioc_batching, ioc_set_batching
*/
ioc = get_io_context(GFP_NOIO);
- ioc_set_batching(ioc);
+ ioc_set_batching(q, ioc);
put_io_context(ioc);
}
finish_wait(&rl->wait[rw], &wait);
@@ -2506,6 +2482,70 @@ static inline void blk_partition_remap(struct bio *bio)
}
}
+void blk_finish_queue_drain(request_queue_t *q)
+{
+ struct request_list *rl = &q->rq;
+
+ clear_bit(QUEUE_FLAG_DRAIN, &q->queue_flags);
+ wake_up(&rl->wait[0]);
+ wake_up(&rl->wait[1]);
+ wake_up(&rl->drain);
+}
+
+/*
+ * We rely on the fact that only requests allocated through blk_alloc_request()
+ * have io scheduler private data structures associated with them. Any other
+ * type of request (allocated on stack or through kmalloc()) should not go
+ * to the io scheduler core, but be attached to the queue head instead.
+ */
+void blk_wait_queue_drained(request_queue_t *q)
+{
+ struct request_list *rl = &q->rq;
+ DEFINE_WAIT(wait);
+
+ spin_lock_irq(q->queue_lock);
+ set_bit(QUEUE_FLAG_DRAIN, &q->queue_flags);
+
+ while (rl->count[READ] || rl->count[WRITE]) {
+ prepare_to_wait(&rl->drain, &wait, TASK_UNINTERRUPTIBLE);
+
+ if (rl->count[READ] || rl->count[WRITE]) {
+ __generic_unplug_device(q);
+ spin_unlock_irq(q->queue_lock);
+ io_schedule();
+ spin_lock_irq(q->queue_lock);
+ }
+
+ finish_wait(&rl->drain, &wait);
+ }
+
+ spin_unlock_irq(q->queue_lock);
+}
+
+/*
+ * block waiting for the io scheduler being started again.
+ */
+static inline void block_wait_queue_running(request_queue_t *q)
+{
+ DEFINE_WAIT(wait);
+
+ while (test_bit(QUEUE_FLAG_DRAIN, &q->queue_flags)) {
+ struct request_list *rl = &q->rq;
+
+ prepare_to_wait_exclusive(&rl->drain, &wait,
+ TASK_UNINTERRUPTIBLE);
+
+ /*
+ * re-check the condition. avoids using prepare_to_wait()
+ * in the fast path (queue is running)
+ */
+ if (test_bit(QUEUE_FLAG_DRAIN, &q->queue_flags))
+ io_schedule();
+
+ finish_wait(&rl->drain, &wait);
+ }
+}
+
/**
* generic_make_request: hand a buffer to its device driver for I/O
* @bio: The bio describing the location in memory and on the device.
@@ -2595,6 +2635,8 @@ end_io:
if (test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))
goto end_io;
+ block_wait_queue_running(q);
+
/*
* If this device has partitions, remap block n
* of partition p to block n+start(p) of the disk.
@@ -3018,6 +3060,7 @@ void kblockd_flush(void)
{
flush_workqueue(kblockd_workqueue);
}
+EXPORT_SYMBOL(kblockd_flush);
int __init blk_dev_init(void)
{
@@ -3036,6 +3079,7 @@ int __init blk_dev_init(void)
blk_max_low_pfn = max_low_pfn;
blk_max_pfn = max_pfn;
+
return 0;
}
@@ -3052,9 +3096,13 @@ void put_io_context(struct io_context *ioc)
if (atomic_dec_and_test(&ioc->refcount)) {
if (ioc->aic && ioc->aic->dtor)
ioc->aic->dtor(ioc->aic);
+ if (ioc->cic && ioc->cic->dtor)
+ ioc->cic->dtor(ioc->cic);
+
kmem_cache_free(iocontext_cachep, ioc);
}
}
+EXPORT_SYMBOL(put_io_context);
/* Called by the exitting task */
void exit_io_context(void)
@@ -3064,14 +3112,15 @@ void exit_io_context(void)
local_irq_save(flags);
ioc = current->io_context;
- if (ioc) {
- if (ioc->aic && ioc->aic->exit)
- ioc->aic->exit(ioc->aic);
- put_io_context(ioc);
- current->io_context = NULL;
- } else
- WARN_ON(1);
+ current->io_context = NULL;
local_irq_restore(flags);
+
+ if (ioc->aic && ioc->aic->exit)
+ ioc->aic->exit(ioc->aic);
+ if (ioc->cic && ioc->cic->exit)
+ ioc->cic->exit(ioc->cic);
+
+ put_io_context(ioc);
}
/*
@@ -3090,22 +3139,42 @@ struct io_context *get_io_context(int gfp_flags)
local_irq_save(flags);
ret = tsk->io_context;
- if (ret == NULL) {
- ret = kmem_cache_alloc(iocontext_cachep, GFP_ATOMIC);
- if (ret) {
- atomic_set(&ret->refcount, 1);
- ret->pid = tsk->pid;
- ret->last_waited = jiffies; /* doesn't matter... */
- ret->nr_batch_requests = 0; /* because this is 0 */
- ret->aic = NULL;
+ if (ret)
+ goto out;
+
+ local_irq_restore(flags);
+
+ ret = kmem_cache_alloc(iocontext_cachep, gfp_flags);
+ if (ret) {
+ atomic_set(&ret->refcount, 1);
+ ret->pid = tsk->pid;
+ ret->last_waited = jiffies; /* doesn't matter... */
+ ret->nr_batch_requests = 0; /* because this is 0 */
+ ret->aic = NULL;
+ ret->cic = NULL;
+ spin_lock_init(&ret->lock);
+
+ local_irq_save(flags);
+
+ /*
+ * very unlikely, someone raced with us in setting up the task
+ * io context. free new context and just grab a reference.
+ */
+ if (!tsk->io_context)
tsk->io_context = ret;
+ else {
+ kmem_cache_free(iocontext_cachep, ret);
+ ret = tsk->io_context;
}
- }
- if (ret)
+
+out:
atomic_inc(&ret->refcount);
- local_irq_restore(flags);
+ local_irq_restore(flags);
+ }
+
return ret;
}
+EXPORT_SYMBOL(get_io_context);
void copy_io_context(struct io_context **pdst, struct io_context **psrc)
{
@@ -3119,6 +3188,7 @@ void copy_io_context(struct io_context **pdst, struct io_context **psrc)
*pdst = src;
}
}
+EXPORT_SYMBOL(copy_io_context);
void swap_io_context(struct io_context **ioc1, struct io_context **ioc2)
{
@@ -3127,7 +3197,7 @@ void swap_io_context(struct io_context **ioc1, struct io_context **ioc2)
*ioc1 = *ioc2;
*ioc2 = temp;
}
-
+EXPORT_SYMBOL(swap_io_context);
/*
* sysfs parts below
@@ -3285,11 +3355,18 @@ static struct queue_sysfs_entry queue_max_hw_sectors_entry = {
.show = queue_max_hw_sectors_show,
};
+static struct queue_sysfs_entry queue_iosched_entry = {
+ .attr = {.name = "scheduler", .mode = S_IRUGO | S_IWUSR },
+ .show = elv_iosched_show,
+ .store = elv_iosched_store,
+};
+
static struct attribute *default_attrs[] = {
&queue_requests_entry.attr,
&queue_ra_entry.attr,
&queue_max_hw_sectors_entry.attr,
&queue_max_sectors_entry.attr,
+ &queue_iosched_entry.attr,
NULL,
};
diff --git a/drivers/block/noop-iosched.c b/drivers/block/noop-iosched.c
index ffef40be1f92..707dddd7d881 100644
--- a/drivers/block/noop-iosched.c
+++ b/drivers/block/noop-iosched.c
@@ -83,12 +83,31 @@ struct request *elevator_noop_next_request(request_queue_t *q)
return NULL;
}
-elevator_t elevator_noop = {
- .elevator_merge_fn = elevator_noop_merge,
- .elevator_merge_req_fn = elevator_noop_merge_requests,
- .elevator_next_req_fn = elevator_noop_next_request,
- .elevator_add_req_fn = elevator_noop_add_request,
- .elevator_name = "noop",
+static struct elevator_type elevator_noop = {
+ .ops = {
+ .elevator_merge_fn = elevator_noop_merge,
+ .elevator_merge_req_fn = elevator_noop_merge_requests,
+ .elevator_next_req_fn = elevator_noop_next_request,
+ .elevator_add_req_fn = elevator_noop_add_request,
+ },
+ .elevator_name = "noop",
+ .elevator_owner = THIS_MODULE,
};
-EXPORT_SYMBOL(elevator_noop);
+int noop_init(void)
+{
+ return elv_register(&elevator_noop);
+}
+
+void noop_exit(void)
+{
+ elv_unregister(&elevator_noop);
+}
+
+module_init(noop_init);
+module_exit(noop_exit);
+
+
+MODULE_AUTHOR("Jens Axboe");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("No-op IO scheduler");
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
new file mode 100644
index 000000000000..fb80b6a91f84
--- /dev/null
+++ b/drivers/block/pktcdvd.c
@@ -0,0 +1,2679 @@
+/*
+ * Copyright (C) 2000 Jens Axboe <axboe@suse.de>
+ * Copyright (C) 2001-2004 Peter Osterlund <petero2@telia.com>
+ *
+ * May be copied or modified under the terms of the GNU General Public
+ * License. See linux/COPYING for more information.
+ *
+ * Packet writing layer for ATAPI and SCSI CD-R, CD-RW, DVD-R, and
+ * DVD-RW devices (aka an exercise in block layer masturbation)
+ *
+ *
+ * TODO: (circa order of when I will fix it)
+ * - Only able to write on CD-RW media right now.
+ * - check host application code on media and set it in write page
+ * - interface for UDF <-> packet to negotiate a new location when a write
+ * fails.
+ * - handle OPC, especially for -RW media
+ *
+ * Theory of operation:
+ *
+ * We use a custom make_request_fn function that forwards reads directly to
+ * the underlying CD device. Write requests are either attached directly to
+ * a live packet_data object, or simply stored sequentially in a list for
+ * later processing by the kcdrwd kernel thread. This driver doesn't use
+ * any elevator functionally as defined by the elevator_s struct, but the
+ * underlying CD device uses a standard elevator.
+ *
+ * This strategy makes it possible to do very late merging of IO requests.
+ * A new bio sent to pkt_make_request can be merged with a live packet_data
+ * object even if the object is in the data gathering state.
+ *
+ *************************************************************************/
+
+#define VERSION_CODE "v0.2.0a 2004-07-14 Jens Axboe (axboe@suse.de) and petero2@telia.com"
+
+#include <linux/pktcdvd.h>
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/kthread.h>
+#include <linux/errno.h>
+#include <linux/spinlock.h>
+#include <linux/file.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/miscdevice.h>
+#include <linux/suspend.h>
+#include <scsi/scsi_cmnd.h>
+#include <scsi/scsi_ioctl.h>
+
+#include <asm/uaccess.h>
+
+#if PACKET_DEBUG
+#define DPRINTK(fmt, args...) printk(KERN_NOTICE fmt, ##args)
+#else
+#define DPRINTK(fmt, args...)
+#endif
+
+#if PACKET_DEBUG > 1
+#define VPRINTK(fmt, args...) printk(KERN_NOTICE fmt, ##args)
+#else
+#define VPRINTK(fmt, args...)
+#endif
+
+#define MAX_SPEED 0xffff
+
+#define ZONE(sector, pd) (((sector) + (pd)->offset) & ~((pd)->settings.size - 1))
+
+static struct pktcdvd_device *pkt_devs[MAX_WRITERS];
+static struct proc_dir_entry *pkt_proc;
+static int pkt_major;
+static struct semaphore ctl_mutex; /* Serialize open/close/setup/teardown */
+static mempool_t *psd_pool;
+
+
+static void pkt_bio_finished(struct pktcdvd_device *pd)
+{
+ BUG_ON(atomic_read(&pd->cdrw.pending_bios) <= 0);
+ if (atomic_dec_and_test(&pd->cdrw.pending_bios)) {
+ VPRINTK("pktcdvd: queue empty\n");
+ atomic_set(&pd->iosched.attention, 1);
+ wake_up(&pd->wqueue);
+ }
+}
+
+static void pkt_bio_destructor(struct bio *bio)
+{
+ kfree(bio->bi_io_vec);
+ kfree(bio);
+}
+
+static struct bio *pkt_bio_alloc(int nr_iovecs)
+{
+ struct bio_vec *bvl = NULL;
+ struct bio *bio;
+
+ bio = kmalloc(sizeof(struct bio), GFP_KERNEL);
+ if (!bio)
+ goto no_bio;
+ bio_init(bio);
+
+ bvl = kmalloc(nr_iovecs * sizeof(struct bio_vec), GFP_KERNEL);
+ if (!bvl)
+ goto no_bvl;
+ memset(bvl, 0, nr_iovecs * sizeof(struct bio_vec));
+
+ bio->bi_max_vecs = nr_iovecs;
+ bio->bi_io_vec = bvl;
+ bio->bi_destructor = pkt_bio_destructor;
+
+ return bio;
+
+ no_bvl:
+ kfree(bio);
+ no_bio:
+ return NULL;
+}
+
+/*
+ * Allocate a packet_data struct
+ */
+static struct packet_data *pkt_alloc_packet_data(void)
+{
+ int i;
+ struct packet_data *pkt;
+
+ pkt = kmalloc(sizeof(struct packet_data), GFP_KERNEL);
+ if (!pkt)
+ goto no_pkt;
+ memset(pkt, 0, sizeof(struct packet_data));
+
+ pkt->w_bio = pkt_bio_alloc(PACKET_MAX_SIZE);
+ if (!pkt->w_bio)
+ goto no_bio;
+
+ for (i = 0; i < PAGES_PER_PACKET; i++) {
+ pkt->pages[i] = alloc_page(GFP_KERNEL);
+ if (!pkt->pages[i])
+ goto no_page;
+ }
+ for (i = 0; i < PAGES_PER_PACKET; i++)
+ clear_page(page_address(pkt->pages[i]));
+
+ spin_lock_init(&pkt->lock);
+
+ for (i = 0; i < PACKET_MAX_SIZE; i++) {
+ struct bio *bio = pkt_bio_alloc(1);
+ if (!bio)
+ goto no_rd_bio;
+ pkt->r_bios[i] = bio;
+ }
+
+ return pkt;
+
+no_rd_bio:
+ for (i = 0; i < PACKET_MAX_SIZE; i++) {
+ struct bio *bio = pkt->r_bios[i];
+ if (bio)
+ bio_put(bio);
+ }
+
+no_page:
+ for (i = 0; i < PAGES_PER_PACKET; i++)
+ if (pkt->pages[i])
+ __free_page(pkt->pages[i]);
+ bio_put(pkt->w_bio);
+no_bio:
+ kfree(pkt);
+no_pkt:
+ return NULL;
+}
+
+/*
+ * Free a packet_data struct
+ */
+static void pkt_free_packet_data(struct packet_data *pkt)
+{
+ int i;
+
+ for (i = 0; i < PACKET_MAX_SIZE; i++) {
+ struct bio *bio = pkt->r_bios[i];
+ if (bio)
+ bio_put(bio);
+ }
+ for (i = 0; i < PAGES_PER_PACKET; i++)
+ __free_page(pkt->pages[i]);
+ bio_put(pkt->w_bio);
+ kfree(pkt);
+}
+
+static void pkt_shrink_pktlist(struct pktcdvd_device *pd)
+{
+ struct packet_data *pkt, *next;
+
+ BUG_ON(!list_empty(&pd->cdrw.pkt_active_list));
+
+ list_for_each_entry_safe(pkt, next, &pd->cdrw.pkt_free_list, list) {
+ pkt_free_packet_data(pkt);
+ }
+}
+
+static int pkt_grow_pktlist(struct pktcdvd_device *pd, int nr_packets)
+{
+ struct packet_data *pkt;
+
+ INIT_LIST_HEAD(&pd->cdrw.pkt_free_list);
+ INIT_LIST_HEAD(&pd->cdrw.pkt_active_list);
+ spin_lock_init(&pd->cdrw.active_list_lock);
+ while (nr_packets > 0) {
+ pkt = pkt_alloc_packet_data();
+ if (!pkt) {
+ pkt_shrink_pktlist(pd);
+ return 0;
+ }
+ pkt->id = nr_packets;
+ pkt->pd = pd;
+ list_add(&pkt->list, &pd->cdrw.pkt_free_list);
+ nr_packets--;
+ }
+ return 1;
+}
+
+static void *pkt_rb_alloc(int gfp_mask, void *data)
+{
+ return kmalloc(sizeof(struct pkt_rb_node), gfp_mask);
+}
+
+static void pkt_rb_free(void *ptr, void *data)
+{
+ kfree(ptr);
+}
+
+static inline struct pkt_rb_node *pkt_rbtree_next(struct pkt_rb_node *node)
+{
+ struct rb_node *n = rb_next(&node->rb_node);
+ if (!n)
+ return NULL;
+ return rb_entry(n, struct pkt_rb_node, rb_node);
+}
+
+static inline void pkt_rbtree_erase(struct pktcdvd_device *pd, struct pkt_rb_node *node)
+{
+ rb_erase(&node->rb_node, &pd->bio_queue);
+ mempool_free(node, pd->rb_pool);
+ pd->bio_queue_size--;
+ BUG_ON(pd->bio_queue_size < 0);
+}
+
+/*
+ * Find the first node in the pd->bio_queue rb tree with a starting sector >= s.
+ */
+static struct pkt_rb_node *pkt_rbtree_find(struct pktcdvd_device *pd, sector_t s)
+{
+ struct rb_node *n = pd->bio_queue.rb_node;
+ struct rb_node *next;
+ struct pkt_rb_node *tmp;
+
+ if (!n) {
+ BUG_ON(pd->bio_queue_size > 0);
+ return NULL;
+ }
+
+ for (;;) {
+ tmp = rb_entry(n, struct pkt_rb_node, rb_node);
+ if (s <= tmp->bio->bi_sector)
+ next = n->rb_left;
+ else
+ next = n->rb_right;
+ if (!next)
+ break;
+ n = next;
+ }
+
+ if (s > tmp->bio->bi_sector) {
+ tmp = pkt_rbtree_next(tmp);
+ if (!tmp)
+ return NULL;
+ }
+ BUG_ON(s > tmp->bio->bi_sector);
+ return tmp;
+}
+
+/*
+ * Insert a node into the pd->bio_queue rb tree.
+ */
+static void pkt_rbtree_insert(struct pktcdvd_device *pd, struct pkt_rb_node *node)
+{
+ struct rb_node **p = &pd->bio_queue.rb_node;
+ struct rb_node *parent = NULL;
+ sector_t s = node->bio->bi_sector;
+ struct pkt_rb_node *tmp;
+
+ while (*p) {
+ parent = *p;
+ tmp = rb_entry(parent, struct pkt_rb_node, rb_node);
+ if (s < tmp->bio->bi_sector)
+ p = &(*p)->rb_left;
+ else
+ p = &(*p)->rb_right;
+ }
+ rb_link_node(&node->rb_node, parent, p);
+ rb_insert_color(&node->rb_node, &pd->bio_queue);
+ pd->bio_queue_size++;
+}
+
+/*
+ * Add a bio to a single linked list defined by its head and tail pointers.
+ */
+static inline void pkt_add_list_last(struct bio *bio, struct bio **list_head, struct bio **list_tail)
+{
+ bio->bi_next = NULL;
+ if (*list_tail) {
+ BUG_ON((*list_head) == NULL);
+ (*list_tail)->bi_next = bio;
+ (*list_tail) = bio;
+ } else {
+ BUG_ON((*list_head) != NULL);
+ (*list_head) = bio;
+ (*list_tail) = bio;
+ }
+}
+
+/*
+ * Remove and return the first bio from a single linked list defined by its
+ * head and tail pointers.
+ */
+static inline struct bio *pkt_get_list_first(struct bio **list_head, struct bio **list_tail)
+{
+ struct bio *bio;
+
+ if (*list_head == NULL)
+ return NULL;
+
+ bio = *list_head;
+ *list_head = bio->bi_next;
+ if (*list_head == NULL)
+ *list_tail = NULL;
+
+ bio->bi_next = NULL;
+ return bio;
+}
+
+/*
+ * Send a packet_command to the underlying block device and
+ * wait for completion.
+ */
+static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command *cgc)
+{
+ char sense[SCSI_SENSE_BUFFERSIZE];
+ request_queue_t *q;
+ struct request *rq;
+ DECLARE_COMPLETION(wait);
+ int err = 0;
+
+ q = bdev_get_queue(pd->bdev);
+
+ rq = blk_get_request(q, (cgc->data_direction == CGC_DATA_WRITE) ? WRITE : READ,
+ __GFP_WAIT);
+ rq->errors = 0;
+ rq->rq_disk = pd->bdev->bd_disk;
+ rq->bio = NULL;
+ rq->buffer = NULL;
+ rq->timeout = 60*HZ;
+ rq->data = cgc->buffer;
+ rq->data_len = cgc->buflen;
+ rq->sense = sense;
+ memset(sense, 0, sizeof(sense));
+ rq->sense_len = 0;
+ rq->flags |= REQ_BLOCK_PC | REQ_HARDBARRIER;
+ if (cgc->quiet)
+ rq->flags |= REQ_QUIET;
+ memcpy(rq->cmd, cgc->cmd, CDROM_PACKET_SIZE);
+ if (sizeof(rq->cmd) > CDROM_PACKET_SIZE)
+ memset(rq->cmd + CDROM_PACKET_SIZE, 0, sizeof(rq->cmd) - CDROM_PACKET_SIZE);
+
+ rq->ref_count++;
+ rq->flags |= REQ_NOMERGE;
+ rq->waiting = &wait;
+ elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 1);
+ generic_unplug_device(q);
+ wait_for_completion(&wait);
+
+ if (rq->errors)
+ err = -EIO;
+
+ blk_put_request(rq);
+ return err;
+}
+
+/*
+ * A generic sense dump / resolve mechanism should be implemented across
+ * all ATAPI + SCSI devices.
+ */
+static void pkt_dump_sense(struct packet_command *cgc)
+{
+ static char *info[9] = { "No sense", "Recovered error", "Not ready",
+ "Medium error", "Hardware error", "Illegal request",
+ "Unit attention", "Data protect", "Blank check" };
+ int i;
+ struct request_sense *sense = cgc->sense;
+
+ printk("pktcdvd:");
+ for (i = 0; i < CDROM_PACKET_SIZE; i++)
+ printk(" %02x", cgc->cmd[i]);
+ printk(" - ");
+
+ if (sense == NULL) {
+ printk("no sense\n");
+ return;
+ }
+
+ printk("sense %02x.%02x.%02x", sense->sense_key, sense->asc, sense->ascq);
+
+ if (sense->sense_key > 8) {
+ printk(" (INVALID)\n");
+ return;
+ }
+
+ printk(" (%s)\n", info[sense->sense_key]);
+}
+
+/*
+ * flush the drive cache to media
+ */
+static int pkt_flush_cache(struct pktcdvd_device *pd)
+{
+ struct packet_command cgc;
+
+ init_cdrom_command(&cgc, NULL, 0, CGC_DATA_NONE);
+ cgc.cmd[0] = GPCMD_FLUSH_CACHE;
+ cgc.quiet = 1;
+
+ /*
+ * the IMMED bit -- we default to not setting it, although that
+ * would allow a much faster close, this is safer
+ */
+#if 0
+ cgc.cmd[1] = 1 << 1;
+#endif
+ return pkt_generic_packet(pd, &cgc);
+}
+
+/*
+ * speed is given as the normal factor, e.g. 4 for 4x
+ */
+static int pkt_set_speed(struct pktcdvd_device *pd, unsigned write_speed, unsigned read_speed)
+{
+ struct packet_command cgc;
+ struct request_sense sense;
+ int ret;
+
+ init_cdrom_command(&cgc, NULL, 0, CGC_DATA_NONE);
+ cgc.sense = &sense;
+ cgc.cmd[0] = GPCMD_SET_SPEED;
+ cgc.cmd[2] = (read_speed >> 8) & 0xff;
+ cgc.cmd[3] = read_speed & 0xff;
+ cgc.cmd[4] = (write_speed >> 8) & 0xff;
+ cgc.cmd[5] = write_speed & 0xff;
+
+ if ((ret = pkt_generic_packet(pd, &cgc)))
+ pkt_dump_sense(&cgc);
+
+ return ret;
+}
+
+/*
+ * Queue a bio for processing by the low-level CD device. Must be called
+ * from process context.
+ */
+static void pkt_queue_bio(struct pktcdvd_device *pd, struct bio *bio, int high_prio_read)
+{
+ spin_lock(&pd->iosched.lock);
+ if (bio_data_dir(bio) == READ) {
+ pkt_add_list_last(bio, &pd->iosched.read_queue,
+ &pd->iosched.read_queue_tail);
+ if (high_prio_read)
+ pd->iosched.high_prio_read = 1;
+ } else {
+ pkt_add_list_last(bio, &pd->iosched.write_queue,
+ &pd->iosched.write_queue_tail);
+ }
+ spin_unlock(&pd->iosched.lock);
+
+ atomic_set(&pd->iosched.attention, 1);
+ wake_up(&pd->wqueue);
+}
+
+/*
+ * Process the queued read/write requests. This function handles special
+ * requirements for CDRW drives:
+ * - A cache flush command must be inserted before a read request if the
+ * previous request was a write.
+ * - Switching between reading and writing is slow, so don't it more often
+ * than necessary.
+ * - Set the read speed according to current usage pattern. When only reading
+ * from the device, it's best to use the highest possible read speed, but
+ * when switching often between reading and writing, it's better to have the
+ * same read and write speeds.
+ * - Reads originating from user space should have higher priority than reads
+ * originating from pkt_gather_data, because some process is usually waiting
+ * on reads of the first kind.
+ */
+static void pkt_iosched_process_queue(struct pktcdvd_device *pd)
+{
+ request_queue_t *q;
+
+ if (atomic_read(&pd->iosched.attention) == 0)
+ return;
+ atomic_set(&pd->iosched.attention, 0);
+
+ q = bdev_get_queue(pd->bdev);
+
+ for (;;) {
+ struct bio *bio;
+ int reads_queued, writes_queued, high_prio_read;
+
+ spin_lock(&pd->iosched.lock);
+ reads_queued = (pd->iosched.read_queue != NULL);
+ writes_queued = (pd->iosched.write_queue != NULL);
+ if (!reads_queued)
+ pd->iosched.high_prio_read = 0;
+ high_prio_read = pd->iosched.high_prio_read;
+ spin_unlock(&pd->iosched.lock);
+
+ if (!reads_queued && !writes_queued)
+ break;
+
+ if (pd->iosched.writing) {
+ if (high_prio_read || (!writes_queued && reads_queued)) {
+ if (atomic_read(&pd->cdrw.pending_bios) > 0) {
+ VPRINTK("pktcdvd: write, waiting\n");
+ break;
+ }
+ pkt_flush_cache(pd);
+ pd->iosched.writing = 0;
+ }
+ } else {
+ if (!reads_queued && writes_queued) {
+ if (atomic_read(&pd->cdrw.pending_bios) > 0) {
+ VPRINTK("pktcdvd: read, waiting\n");
+ break;
+ }
+ pd->iosched.writing = 1;
+ }
+ }
+
+ spin_lock(&pd->iosched.lock);
+ if (pd->iosched.writing) {
+ bio = pkt_get_list_first(&pd->iosched.write_queue,
+ &pd->iosched.write_queue_tail);
+ } else {
+ bio = pkt_get_list_first(&pd->iosched.read_queue,
+ &pd->iosched.read_queue_tail);
+ }
+ spin_unlock(&pd->iosched.lock);
+
+ if (!bio)
+ continue;
+
+ if (bio_data_dir(bio) == READ)
+ pd->iosched.successive_reads += bio->bi_size >> 10;
+ else
+ pd->iosched.successive_reads = 0;
+ if (pd->iosched.successive_reads >= HI_SPEED_SWITCH) {
+ if (pd->read_speed == pd->write_speed) {
+ pd->read_speed = MAX_SPEED;
+ pkt_set_speed(pd, pd->write_speed, pd->read_speed);
+ }
+ } else {
+ if (pd->read_speed != pd->write_speed) {
+ pd->read_speed = pd->write_speed;
+ pkt_set_speed(pd, pd->write_speed, pd->read_speed);
+ }
+ }
+
+ atomic_inc(&pd->cdrw.pending_bios);
+ generic_make_request(bio);
+ }
+}
+
+/*
+ * Special care is needed if the underlying block device has a small
+ * max_phys_segments value.
+ */
+static int pkt_set_segment_merging(struct pktcdvd_device *pd, request_queue_t *q)
+{
+ if ((pd->settings.size << 9) / CD_FRAMESIZE <= q->max_phys_segments) {
+ /*
+ * The cdrom device can handle one segment/frame
+ */
+ clear_bit(PACKET_MERGE_SEGS, &pd->flags);
+ return 0;
+ } else if ((pd->settings.size << 9) / PAGE_SIZE <= q->max_phys_segments) {
+ /*
+ * We can handle this case at the expense of some extra memory
+ * copies during write operations
+ */
+ set_bit(PACKET_MERGE_SEGS, &pd->flags);
+ return 0;
+ } else {
+ printk("pktcdvd: cdrom max_phys_segments too small\n");
+ return -EIO;
+ }
+}
+
+/*
+ * Copy CD_FRAMESIZE bytes from src_bio into a destination page
+ */
+static void pkt_copy_bio_data(struct bio *src_bio, int seg, int offs,
+ struct page *dst_page, int dst_offs)
+{
+ unsigned int copy_size = CD_FRAMESIZE;
+
+ while (copy_size > 0) {
+ struct bio_vec *src_bvl = bio_iovec_idx(src_bio, seg);
+ void *vfrom = kmap_atomic(src_bvl->bv_page, KM_USER0) +
+ src_bvl->bv_offset + offs;
+ void *vto = page_address(dst_page) + dst_offs;
+ int len = min_t(int, copy_size, src_bvl->bv_len - offs);
+
+ BUG_ON(len < 0);
+ memcpy(vto, vfrom, len);
+ kunmap_atomic(src_bvl->bv_page, KM_USER0);
+
+ seg++;
+ offs = 0;
+ dst_offs += len;
+ copy_size -= len;
+ }
+}
+
+/*
+ * Copy all data for this packet to pkt->pages[], so that
+ * a) The number of required segments for the write bio is minimized, which
+ * is necessary for some scsi controllers.
+ * b) The data can be used as cache to avoid read requests if we receive a
+ * new write request for the same zone.
+ */
+static void pkt_make_local_copy(struct packet_data *pkt, struct page **pages, int *offsets)
+{
+ int f, p, offs;
+
+ /* Copy all data to pkt->pages[] */
+ p = 0;
+ offs = 0;
+ for (f = 0; f < pkt->frames; f++) {
+ if (pages[f] != pkt->pages[p]) {
+ void *vfrom = kmap_atomic(pages[f], KM_USER0) + offsets[f];
+ void *vto = page_address(pkt->pages[p]) + offs;
+ memcpy(vto, vfrom, CD_FRAMESIZE);
+ kunmap_atomic(pages[f], KM_USER0);
+ pages[f] = pkt->pages[p];
+ offsets[f] = offs;
+ } else {
+ BUG_ON(offsets[f] != offs);
+ }
+ offs += CD_FRAMESIZE;
+ if (offs >= PAGE_SIZE) {
+ BUG_ON(offs > PAGE_SIZE);
+ offs = 0;
+ p++;
+ }
+ }
+}
+
+static int pkt_end_io_read(struct bio *bio, unsigned int bytes_done, int err)
+{
+ struct packet_data *pkt = bio->bi_private;
+ struct pktcdvd_device *pd = pkt->pd;
+ BUG_ON(!pd);
+
+ if (bio->bi_size)
+ return 1;
+
+ VPRINTK("pkt_end_io_read: bio=%p sec0=%llx sec=%llx err=%d\n", bio,
+ (unsigned long long)pkt->sector, (unsigned long long)bio->bi_sector, err);
+
+ if (err)
+ atomic_inc(&pkt->io_errors);
+ if (atomic_dec_and_test(&pkt->io_wait)) {
+ atomic_inc(&pkt->run_sm);
+ wake_up(&pd->wqueue);
+ }
+ pkt_bio_finished(pd);
+
+ return 0;
+}
+
+static int pkt_end_io_packet_write(struct bio *bio, unsigned int bytes_done, int err)
+{
+ struct packet_data *pkt = bio->bi_private;
+ struct pktcdvd_device *pd = pkt->pd;
+ BUG_ON(!pd);
+
+ if (bio->bi_size)
+ return 1;
+
+ VPRINTK("pkt_end_io_packet_write: id=%d, err=%d\n", pkt->id, err);
+
+ pd->stats.pkt_ended++;
+
+ pkt_bio_finished(pd);
+ atomic_dec(&pkt->io_wait);
+ atomic_inc(&pkt->run_sm);
+ wake_up(&pd->wqueue);
+ return 0;
+}
+
+/*
+ * Schedule reads for the holes in a packet
+ */
+static void pkt_gather_data(struct pktcdvd_device *pd, struct packet_data *pkt)
+{
+ int frames_read = 0;
+ struct bio *bio;
+ int f;
+ char written[PACKET_MAX_SIZE];
+
+ BUG_ON(!pkt->orig_bios);
+
+ atomic_set(&pkt->io_wait, 0);
+ atomic_set(&pkt->io_errors, 0);
+
+ if (pkt->cache_valid) {
+ VPRINTK("pkt_gather_data: zone %llx cached\n",
+ (unsigned long long)pkt->sector);
+ goto out_account;
+ }
+
+ /*
+ * Figure out which frames we need to read before we can write.
+ */
+ memset(written, 0, sizeof(written));
+ spin_lock(&pkt->lock);
+ for (bio = pkt->orig_bios; bio; bio = bio->bi_next) {
+ int first_frame = (bio->bi_sector - pkt->sector) / (CD_FRAMESIZE >> 9);
+ int num_frames = bio->bi_size / CD_FRAMESIZE;
+ BUG_ON(first_frame < 0);
+ BUG_ON(first_frame + num_frames > pkt->frames);
+ for (f = first_frame; f < first_frame + num_frames; f++)
+ written[f] = 1;
+ }
+ spin_unlock(&pkt->lock);
+
+ /*
+ * Schedule reads for missing parts of the packet.
+ */
+ for (f = 0; f < pkt->frames; f++) {
+ int p, offset;
+ if (written[f])
+ continue;
+ bio = pkt->r_bios[f];
+ bio_init(bio);
+ bio->bi_max_vecs = 1;
+ bio->bi_sector = pkt->sector + f * (CD_FRAMESIZE >> 9);
+ bio->bi_bdev = pd->bdev;
+ bio->bi_end_io = pkt_end_io_read;
+ bio->bi_private = pkt;
+
+ p = (f * CD_FRAMESIZE) / PAGE_SIZE;
+ offset = (f * CD_FRAMESIZE) % PAGE_SIZE;
+ VPRINTK("pkt_gather_data: Adding frame %d, page:%p offs:%d\n",
+ f, pkt->pages[p], offset);
+ if (!bio_add_page(bio, pkt->pages[p], CD_FRAMESIZE, offset))
+ BUG();
+
+ atomic_inc(&pkt->io_wait);
+ bio->bi_rw = READ;
+ pkt_queue_bio(pd, bio, 0);
+ frames_read++;
+ }
+
+out_account:
+ VPRINTK("pkt_gather_data: need %d frames for zone %llx\n",
+ frames_read, (unsigned long long)pkt->sector);
+ pd->stats.pkt_started++;
+ pd->stats.secs_rg += frames_read * (CD_FRAMESIZE >> 9);
+ pd->stats.secs_w += pd->settings.size;
+}
+
+/*
+ * Find a packet matching zone, or the least recently used packet if
+ * there is no match.
+ */
+static struct packet_data *pkt_get_packet_data(struct pktcdvd_device *pd, int zone)
+{
+ struct packet_data *pkt;
+
+ list_for_each_entry(pkt, &pd->cdrw.pkt_free_list, list) {
+ if (pkt->sector == zone || pkt->list.next == &pd->cdrw.pkt_free_list) {
+ list_del_init(&pkt->list);
+ if (pkt->sector != zone)
+ pkt->cache_valid = 0;
+ break;
+ }
+ }
+ return pkt;
+}
+
+static void pkt_put_packet_data(struct pktcdvd_device *pd, struct packet_data *pkt)
+{
+ if (pkt->cache_valid) {
+ list_add(&pkt->list, &pd->cdrw.pkt_free_list);
+ } else {
+ list_add_tail(&pkt->list, &pd->cdrw.pkt_free_list);
+ }
+}
+
+/*
+ * recover a failed write, query for relocation if possible
+ *
+ * returns 1 if recovery is possible, or 0 if not
+ *
+ */
+static int pkt_start_recovery(struct packet_data *pkt)
+{
+ /*
+ * FIXME. We need help from the file system to implement
+ * recovery handling.
+ */
+ return 0;
+#if 0
+ struct request *rq = pkt->rq;
+ struct pktcdvd_device *pd = rq->rq_disk->private_data;
+ struct block_device *pkt_bdev;
+ struct super_block *sb = NULL;
+ unsigned long old_block, new_block;
+ sector_t new_sector;
+
+ pkt_bdev = bdget(kdev_t_to_nr(pd->pkt_dev));
+ if (pkt_bdev) {
+ sb = get_super(pkt_bdev);
+ bdput(pkt_bdev);
+ }
+
+ if (!sb)
+ return 0;
+
+ if (!sb->s_op || !sb->s_op->relocate_blocks)
+ goto out;
+
+ old_block = pkt->sector / (CD_FRAMESIZE >> 9);
+ if (sb->s_op->relocate_blocks(sb, old_block, &new_block))
+ goto out;
+
+ new_sector = new_block * (CD_FRAMESIZE >> 9);
+ pkt->sector = new_sector;
+
+ pkt->bio->bi_sector = new_sector;
+ pkt->bio->bi_next = NULL;
+ pkt->bio->bi_flags = 1 << BIO_UPTODATE;
+ pkt->bio->bi_idx = 0;
+
+ BUG_ON(pkt->bio->bi_rw != (1 << BIO_RW));
+ BUG_ON(pkt->bio->bi_vcnt != pkt->frames);
+ BUG_ON(pkt->bio->bi_size != pkt->frames * CD_FRAMESIZE);
+ BUG_ON(pkt->bio->bi_end_io != pkt_end_io_packet_write);
+ BUG_ON(pkt->bio->bi_private != pkt);
+
+ drop_super(sb);
+ return 1;
+
+out:
+ drop_super(sb);
+ return 0;
+#endif
+}
+
+static inline void pkt_set_state(struct packet_data *pkt, enum packet_data_state state)
+{
+#if PACKET_DEBUG > 1
+ static const char *state_name[] = {
+ "IDLE", "WAITING", "READ_WAIT", "WRITE_WAIT", "RECOVERY", "FINISHED"
+ };
+ enum packet_data_state old_state = pkt->state;
+ VPRINTK("pkt %2d : s=%6llx %s -> %s\n", pkt->id, (unsigned long long)pkt->sector,
+ state_name[old_state], state_name[state]);
+#endif
+ pkt->state = state;
+}
+
+/*
+ * Scan the work queue to see if we can start a new packet.
+ * returns non-zero if any work was done.
+ */
+static int pkt_handle_queue(struct pktcdvd_device *pd)
+{
+ struct packet_data *pkt, *p;
+ struct bio *bio = NULL;
+ sector_t zone = 0; /* Suppress gcc warning */
+ struct pkt_rb_node *node, *first_node;
+ struct rb_node *n;
+
+ VPRINTK("handle_queue\n");
+
+ atomic_set(&pd->scan_queue, 0);
+
+ if (list_empty(&pd->cdrw.pkt_free_list)) {
+ VPRINTK("handle_queue: no pkt\n");
+ return 0;
+ }
+
+ /*
+ * Try to find a zone we are not already working on.
+ */
+ spin_lock(&pd->lock);
+ first_node = pkt_rbtree_find(pd, pd->current_sector);
+ if (!first_node) {
+ n = rb_first(&pd->bio_queue);
+ if (n)
+ first_node = rb_entry(n, struct pkt_rb_node, rb_node);
+ }
+ node = first_node;
+ while (node) {
+ bio = node->bio;
+ zone = ZONE(bio->bi_sector, pd);
+ list_for_each_entry(p, &pd->cdrw.pkt_active_list, list) {
+ if (p->sector == zone)
+ goto try_next_bio;
+ }
+ break;
+try_next_bio:
+ node = pkt_rbtree_next(node);
+ if (!node) {
+ n = rb_first(&pd->bio_queue);
+ if (n)
+ node = rb_entry(n, struct pkt_rb_node, rb_node);
+ }
+ if (node == first_node)
+ node = NULL;
+ }
+ spin_unlock(&pd->lock);
+ if (!bio) {
+ VPRINTK("handle_queue: no bio\n");
+ return 0;
+ }
+
+ pkt = pkt_get_packet_data(pd, zone);
+ BUG_ON(!pkt);
+
+ pd->current_sector = zone + pd->settings.size;
+ pkt->sector = zone;
+ pkt->frames = pd->settings.size >> 2;
+ BUG_ON(pkt->frames > PACKET_MAX_SIZE);
+ pkt->write_size = 0;
+
+ /*
+ * Scan work queue for bios in the same zone and link them
+ * to this packet.
+ */
+ spin_lock(&pd->lock);
+ VPRINTK("pkt_handle_queue: looking for zone %llx\n", (unsigned long long)zone);
+ while ((node = pkt_rbtree_find(pd, zone)) != NULL) {
+ bio = node->bio;
+ VPRINTK("pkt_handle_queue: found zone=%llx\n",
+ (unsigned long long)ZONE(bio->bi_sector, pd));
+ if (ZONE(bio->bi_sector, pd) != zone)
+ break;
+ pkt_rbtree_erase(pd, node);
+ spin_lock(&pkt->lock);
+ pkt_add_list_last(bio, &pkt->orig_bios, &pkt->orig_bios_tail);
+ pkt->write_size += bio->bi_size / CD_FRAMESIZE;
+ spin_unlock(&pkt->lock);
+ }
+ spin_unlock(&pd->lock);
+
+ pkt->sleep_time = max(PACKET_WAIT_TIME, 1);
+ pkt_set_state(pkt, PACKET_WAITING_STATE);
+ atomic_set(&pkt->run_sm, 1);
+
+ spin_lock(&pd->cdrw.active_list_lock);
+ list_add(&pkt->list, &pd->cdrw.pkt_active_list);
+ spin_unlock(&pd->cdrw.active_list_lock);
+
+ return 1;
+}
+
+/*
+ * Assemble a bio to write one packet and queue the bio for processing
+ * by the underlying block device.
+ */
+static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt)
+{
+ struct bio *bio;
+ struct page *pages[PACKET_MAX_SIZE];
+ int offsets[PACKET_MAX_SIZE];
+ int f;
+ int frames_write;
+
+ for (f = 0; f < pkt->frames; f++) {
+ pages[f] = pkt->pages[(f * CD_FRAMESIZE) / PAGE_SIZE];
+ offsets[f] = (f * CD_FRAMESIZE) % PAGE_SIZE;
+ }
+
+ /*
+ * Fill-in pages[] and offsets[] with data from orig_bios.
+ */
+ frames_write = 0;
+ spin_lock(&pkt->lock);
+ for (bio = pkt->orig_bios; bio; bio = bio->bi_next) {
+ int segment = bio->bi_idx;
+ int src_offs = 0;
+ int first_frame = (bio->bi_sector - pkt->sector) / (CD_FRAMESIZE >> 9);
+ int num_frames = bio->bi_size / CD_FRAMESIZE;
+ BUG_ON(first_frame < 0);
+ BUG_ON(first_frame + num_frames > pkt->frames);
+ for (f = first_frame; f < first_frame + num_frames; f++) {
+ struct bio_vec *src_bvl = bio_iovec_idx(bio, segment);
+
+ while (src_offs >= src_bvl->bv_len) {
+ src_offs -= src_bvl->bv_len;
+ segment++;
+ BUG_ON(segment >= bio->bi_vcnt);
+ src_bvl = bio_iovec_idx(bio, segment);
+ }
+
+ if (src_bvl->bv_len - src_offs >= CD_FRAMESIZE) {
+ pages[f] = src_bvl->bv_page;
+ offsets[f] = src_bvl->bv_offset + src_offs;
+ } else {
+ pkt_copy_bio_data(bio, segment, src_offs,
+ pages[f], offsets[f]);
+ }
+ src_offs += CD_FRAMESIZE;
+ frames_write++;
+ }
+ }
+ pkt_set_state(pkt, PACKET_WRITE_WAIT_STATE);
+ spin_unlock(&pkt->lock);
+
+ VPRINTK("pkt_start_write: Writing %d frames for zone %llx\n",
+ frames_write, (unsigned long long)pkt->sector);
+ BUG_ON(frames_write != pkt->write_size);
+
+ if (test_bit(PACKET_MERGE_SEGS, &pd->flags) || (pkt->write_size < pkt->frames)) {
+ pkt_make_local_copy(pkt, pages, offsets);
+ pkt->cache_valid = 1;
+ } else {
+ pkt->cache_valid = 0;
+ }
+
+ /* Start the write request */
+ bio_init(pkt->w_bio);
+ pkt->w_bio->bi_max_vecs = PACKET_MAX_SIZE;
+ pkt->w_bio->bi_sector = pkt->sector;
+ pkt->w_bio->bi_bdev = pd->bdev;
+ pkt->w_bio->bi_end_io = pkt_end_io_packet_write;
+ pkt->w_bio->bi_private = pkt;
+ for (f = 0; f < pkt->frames; f++) {
+ if ((f + 1 < pkt->frames) && (pages[f + 1] == pages[f]) &&
+ (offsets[f + 1] = offsets[f] + CD_FRAMESIZE)) {
+ if (!bio_add_page(pkt->w_bio, pages[f], CD_FRAMESIZE * 2, offsets[f]))
+ BUG();
+ f++;
+ } else {
+ if (!bio_add_page(pkt->w_bio, pages[f], CD_FRAMESIZE, offsets[f]))
+ BUG();
+ }
+ }
+ VPRINTK("pktcdvd: vcnt=%d\n", pkt->w_bio->bi_vcnt);
+
+ atomic_set(&pkt->io_wait, 1);
+ pkt->w_bio->bi_rw = WRITE;
+ pkt_queue_bio(pd, pkt->w_bio, 0);
+}
+
+static void pkt_finish_packet(struct packet_data *pkt, int uptodate)
+{
+ struct bio *bio, *next;
+
+ if (!uptodate)
+ pkt->cache_valid = 0;
+
+ /* Finish all bios corresponding to this packet */
+ bio = pkt->orig_bios;
+ while (bio) {
+ next = bio->bi_next;
+ bio->bi_next = NULL;
+ bio_endio(bio, bio->bi_size, uptodate ? 0 : -EIO);
+ bio = next;
+ }
+ pkt->orig_bios = pkt->orig_bios_tail = NULL;
+}
+
+static void pkt_run_state_machine(struct pktcdvd_device *pd, struct packet_data *pkt)
+{
+ int uptodate;
+
+ VPRINTK("run_state_machine: pkt %d\n", pkt->id);
+
+ for (;;) {
+ switch (pkt->state) {
+ case PACKET_WAITING_STATE:
+ if ((pkt->write_size < pkt->frames) && (pkt->sleep_time > 0))
+ return;
+
+ pkt->sleep_time = 0;
+ pkt_gather_data(pd, pkt);
+ pkt_set_state(pkt, PACKET_READ_WAIT_STATE);
+ break;
+
+ case PACKET_READ_WAIT_STATE:
+ if (atomic_read(&pkt->io_wait) > 0)
+ return;
+
+ if (atomic_read(&pkt->io_errors) > 0) {
+ pkt_set_state(pkt, PACKET_RECOVERY_STATE);
+ } else {
+ pkt_start_write(pd, pkt);
+ }
+ break;
+
+ case PACKET_WRITE_WAIT_STATE:
+ if (atomic_read(&pkt->io_wait) > 0)
+ return;
+
+ if (test_bit(BIO_UPTODATE, &pkt->w_bio->bi_flags)) {
+ pkt_set_state(pkt, PACKET_FINISHED_STATE);
+ } else {
+ pkt_set_state(pkt, PACKET_RECOVERY_STATE);
+ }
+ break;
+
+ case PACKET_RECOVERY_STATE:
+ if (pkt_start_recovery(pkt)) {
+ pkt_start_write(pd, pkt);
+ } else {
+ VPRINTK("No recovery possible\n");
+ pkt_set_state(pkt, PACKET_FINISHED_STATE);
+ }
+ break;
+
+ case PACKET_FINISHED_STATE:
+ uptodate = test_bit(BIO_UPTODATE, &pkt->w_bio->bi_flags);
+ pkt_finish_packet(pkt, uptodate);
+ return;
+
+ default:
+ BUG();
+ break;
+ }
+ }
+}
+
+static void pkt_handle_packets(struct pktcdvd_device *pd)
+{
+ struct packet_data *pkt, *next;
+
+ VPRINTK("pkt_handle_packets\n");
+
+ /*
+ * Run state machine for active packets
+ */
+ list_for_each_entry(pkt, &pd->cdrw.pkt_active_list, list) {
+ if (atomic_read(&pkt->run_sm) > 0) {
+ atomic_set(&pkt->run_sm, 0);
+ pkt_run_state_machine(pd, pkt);
+ }
+ }
+
+ /*
+ * Move no longer active packets to the free list
+ */
+ spin_lock(&pd->cdrw.active_list_lock);
+ list_for_each_entry_safe(pkt, next, &pd->cdrw.pkt_active_list, list) {
+ if (pkt->state == PACKET_FINISHED_STATE) {
+ list_del(&pkt->list);
+ pkt_put_packet_data(pd, pkt);
+ pkt_set_state(pkt, PACKET_IDLE_STATE);
+ atomic_set(&pd->scan_queue, 1);
+ }
+ }
+ spin_unlock(&pd->cdrw.active_list_lock);
+}
+
+static void pkt_count_states(struct pktcdvd_device *pd, int *states)
+{
+ struct packet_data *pkt;
+ int i;
+
+ for (i = 0; i <= PACKET_NUM_STATES; i++)
+ states[i] = 0;
+
+ spin_lock(&pd->cdrw.active_list_lock);
+ list_for_each_entry(pkt, &pd->cdrw.pkt_active_list, list) {
+ states[pkt->state]++;
+ }
+ spin_unlock(&pd->cdrw.active_list_lock);
+}
+
+/*
+ * kcdrwd is woken up when writes have been queued for one of our
+ * registered devices
+ */
+static int kcdrwd(void *foobar)
+{
+ struct pktcdvd_device *pd = foobar;
+ struct packet_data *pkt;
+ long min_sleep_time, residue;
+
+ set_user_nice(current, -20);
+
+ for (;;) {
+ DECLARE_WAITQUEUE(wait, current);
+
+ /*
+ * Wait until there is something to do
+ */
+ add_wait_queue(&pd->wqueue, &wait);
+ for (;;) {
+ set_current_state(TASK_INTERRUPTIBLE);
+
+ /* Check if we need to run pkt_handle_queue */
+ if (atomic_read(&pd->scan_queue) > 0)
+ goto work_to_do;
+
+ /* Check if we need to run the state machine for some packet */
+ list_for_each_entry(pkt, &pd->cdrw.pkt_active_list, list) {
+ if (atomic_read(&pkt->run_sm) > 0)
+ goto work_to_do;
+ }
+
+ /* Check if we need to process the iosched queues */
+ if (atomic_read(&pd->iosched.attention) != 0)
+ goto work_to_do;
+
+ /* Otherwise, go to sleep */
+ if (PACKET_DEBUG > 1) {
+ int states[PACKET_NUM_STATES];
+ pkt_count_states(pd, states);
+ VPRINTK("kcdrwd: i:%d ow:%d rw:%d ww:%d rec:%d fin:%d\n",
+ states[0], states[1], states[2], states[3],
+ states[4], states[5]);
+ }
+
+ min_sleep_time = MAX_SCHEDULE_TIMEOUT;
+ list_for_each_entry(pkt, &pd->cdrw.pkt_active_list, list) {
+ if (pkt->sleep_time && pkt->sleep_time < min_sleep_time)
+ min_sleep_time = pkt->sleep_time;
+ }
+
+ generic_unplug_device(bdev_get_queue(pd->bdev));
+
+ VPRINTK("kcdrwd: sleeping\n");
+ residue = schedule_timeout(min_sleep_time);
+ VPRINTK("kcdrwd: wake up\n");
+
+ /* make swsusp happy with our thread */
+ if (current->flags & PF_FREEZE)
+ refrigerator(PF_FREEZE);
+
+ list_for_each_entry(pkt, &pd->cdrw.pkt_active_list, list) {
+ if (!pkt->sleep_time)
+ continue;
+ pkt->sleep_time -= min_sleep_time - residue;
+ if (pkt->sleep_time <= 0) {
+ pkt->sleep_time = 0;
+ atomic_inc(&pkt->run_sm);
+ }
+ }
+
+ if (signal_pending(current)) {
+ flush_signals(current);
+ }
+ if (kthread_should_stop())
+ break;
+ }
+work_to_do:
+ set_current_state(TASK_RUNNING);
+ remove_wait_queue(&pd->wqueue, &wait);
+
+ if (kthread_should_stop())
+ break;
+
+ /*
+ * if pkt_handle_queue returns true, we can queue
+ * another request.
+ */
+ while (pkt_handle_queue(pd))
+ ;
+
+ /*
+ * Handle packet state machine
+ */
+ pkt_handle_packets(pd);
+
+ /*
+ * Handle iosched queues
+ */
+ pkt_iosched_process_queue(pd);
+ }
+
+ return 0;
+}
+
+static void pkt_print_settings(struct pktcdvd_device *pd)
+{
+ printk("pktcdvd: %s packets, ", pd->settings.fp ? "Fixed" : "Variable");
+ printk("%u blocks, ", pd->settings.size >> 2);
+ printk("Mode-%c disc\n", pd->settings.block_mode == 8 ? '1' : '2');
+}
+
+static int pkt_mode_sense(struct pktcdvd_device *pd, struct packet_command *cgc,
+ int page_code, int page_control)
+{
+ memset(cgc->cmd, 0, sizeof(cgc->cmd));
+
+ cgc->cmd[0] = GPCMD_MODE_SENSE_10;
+ cgc->cmd[2] = page_code | (page_control << 6);
+ cgc->cmd[7] = cgc->buflen >> 8;
+ cgc->cmd[8] = cgc->buflen & 0xff;
+ cgc->data_direction = CGC_DATA_READ;
+ return pkt_generic_packet(pd, cgc);
+}
+
+static int pkt_mode_select(struct pktcdvd_device *pd, struct packet_command *cgc)
+{
+ memset(cgc->cmd, 0, sizeof(cgc->cmd));
+ memset(cgc->buffer, 0, 2);
+ cgc->cmd[0] = GPCMD_MODE_SELECT_10;
+ cgc->cmd[1] = 0x10; /* PF */
+ cgc->cmd[7] = cgc->buflen >> 8;
+ cgc->cmd[8] = cgc->buflen & 0xff;
+ cgc->data_direction = CGC_DATA_WRITE;
+ return pkt_generic_packet(pd, cgc);
+}
+
+static int pkt_get_disc_info(struct pktcdvd_device *pd, disc_information *di)
+{
+ struct packet_command cgc;
+ int ret;
+
+ /* set up command and get the disc info */
+ init_cdrom_command(&cgc, di, sizeof(*di), CGC_DATA_READ);
+ cgc.cmd[0] = GPCMD_READ_DISC_INFO;
+ cgc.cmd[8] = cgc.buflen = 2;
+ cgc.quiet = 1;
+
+ if ((ret = pkt_generic_packet(pd, &cgc)))
+ return ret;
+
+ /* not all drives have the same disc_info length, so requeue
+ * packet with the length the drive tells us it can supply
+ */
+ cgc.buflen = be16_to_cpu(di->disc_information_length) +
+ sizeof(di->disc_information_length);
+
+ if (cgc.buflen > sizeof(disc_information))
+ cgc.buflen = sizeof(disc_information);
+
+ cgc.cmd[8] = cgc.buflen;
+ return pkt_generic_packet(pd, &cgc);
+}
+
+static int pkt_get_track_info(struct pktcdvd_device *pd, __u16 track, __u8 type, track_information *ti)
+{
+ struct packet_command cgc;
+ int ret;
+
+ init_cdrom_command(&cgc, ti, 8, CGC_DATA_READ);
+ cgc.cmd[0] = GPCMD_READ_TRACK_RZONE_INFO;
+ cgc.cmd[1] = type & 3;
+ cgc.cmd[4] = (track & 0xff00) >> 8;
+ cgc.cmd[5] = track & 0xff;
+ cgc.cmd[8] = 8;
+ cgc.quiet = 1;
+
+ if ((ret = pkt_generic_packet(pd, &cgc)))
+ return ret;
+
+ cgc.buflen = be16_to_cpu(ti->track_information_length) +
+ sizeof(ti->track_information_length);
+
+ if (cgc.buflen > sizeof(track_information))
+ cgc.buflen = sizeof(track_information);
+
+ cgc.cmd[8] = cgc.buflen;
+ return pkt_generic_packet(pd, &cgc);
+}
+
+static int pkt_get_last_written(struct pktcdvd_device *pd, long *last_written)
+{
+ disc_information di;
+ track_information ti;
+ __u32 last_track;
+ int ret = -1;
+
+ if ((ret = pkt_get_disc_info(pd, &di)))
+ return ret;
+
+ last_track = (di.last_track_msb << 8) | di.last_track_lsb;
+ if ((ret = pkt_get_track_info(pd, last_track, 1, &ti)))
+ return ret;
+
+ /* if this track is blank, try the previous. */
+ if (ti.blank) {
+ last_track--;
+ if ((ret = pkt_get_track_info(pd, last_track, 1, &ti)))
+ return ret;
+ }
+
+ /* if last recorded field is valid, return it. */
+ if (ti.lra_v) {
+ *last_written = be32_to_cpu(ti.last_rec_address);
+ } else {
+ /* make it up instead */
+ *last_written = be32_to_cpu(ti.track_start) +
+ be32_to_cpu(ti.track_size);
+ if (ti.free_blocks)
+ *last_written -= (be32_to_cpu(ti.free_blocks) + 7);
+ }
+ return 0;
+}
+
+/*
+ * write mode select package based on pd->settings
+ */
+static int pkt_set_write_settings(struct pktcdvd_device *pd)
+{
+ struct packet_command cgc;
+ struct request_sense sense;
+ write_param_page *wp;
+ char buffer[128];
+ int ret, size;
+
+ /* doesn't apply to DVD+RW */
+ if (pd->mmc3_profile == 0x1a)
+ return 0;
+
+ memset(buffer, 0, sizeof(buffer));
+ init_cdrom_command(&cgc, buffer, sizeof(*wp), CGC_DATA_READ);
+ cgc.sense = &sense;
+ if ((ret = pkt_mode_sense(pd, &cgc, GPMODE_WRITE_PARMS_PAGE, 0))) {
+ pkt_dump_sense(&cgc);
+ return ret;
+ }
+
+ size = 2 + ((buffer[0] << 8) | (buffer[1] & 0xff));
+ pd->mode_offset = (buffer[6] << 8) | (buffer[7] & 0xff);
+ if (size > sizeof(buffer))
+ size = sizeof(buffer);
+
+ /*
+ * now get it all
+ */
+ init_cdrom_command(&cgc, buffer, size, CGC_DATA_READ);
+ cgc.sense = &sense;
+ if ((ret = pkt_mode_sense(pd, &cgc, GPMODE_WRITE_PARMS_PAGE, 0))) {
+ pkt_dump_sense(&cgc);
+ return ret;
+ }
+
+ /*
+ * write page is offset header + block descriptor length
+ */
+ wp = (write_param_page *) &buffer[sizeof(struct mode_page_header) + pd->mode_offset];
+
+ wp->fp = pd->settings.fp;
+ wp->track_mode = pd->settings.track_mode;
+ wp->write_type = pd->settings.write_type;
+ wp->data_block_type = pd->settings.block_mode;
+
+ wp->multi_session = 0;
+
+#ifdef PACKET_USE_LS
+ wp->link_size = 7;
+ wp->ls_v = 1;
+#endif
+
+ if (wp->data_block_type == PACKET_BLOCK_MODE1) {
+ wp->session_format = 0;
+ wp->subhdr2 = 0x20;
+ } else if (wp->data_block_type == PACKET_BLOCK_MODE2) {
+ wp->session_format = 0x20;
+ wp->subhdr2 = 8;
+#if 0
+ wp->mcn[0] = 0x80;
+ memcpy(&wp->mcn[1], PACKET_MCN, sizeof(wp->mcn) - 1);
+#endif
+ } else {
+ /*
+ * paranoia
+ */
+ printk("pktcdvd: write mode wrong %d\n", wp->data_block_type);
+ return 1;
+ }
+ wp->packet_size = cpu_to_be32(pd->settings.size >> 2);
+
+ cgc.buflen = cgc.cmd[8] = size;
+ if ((ret = pkt_mode_select(pd, &cgc))) {
+ pkt_dump_sense(&cgc);
+ return ret;
+ }
+
+ pkt_print_settings(pd);
+ return 0;
+}
+
+/*
+ * 0 -- we can write to this track, 1 -- we can't
+ */
+static int pkt_good_track(track_information *ti)
+{
+ /*
+ * only good for CD-RW at the moment, not DVD-RW
+ */
+
+ /*
+ * FIXME: only for FP
+ */
+ if (ti->fp == 0)
+ return 0;
+
+ /*
+ * "good" settings as per Mt Fuji.
+ */
+ if (ti->rt == 0 && ti->blank == 0 && ti->packet == 1)
+ return 0;
+
+ if (ti->rt == 0 && ti->blank == 1 && ti->packet == 1)
+ return 0;
+
+ if (ti->rt == 1 && ti->blank == 0 && ti->packet == 1)
+ return 0;
+
+ printk("pktcdvd: bad state %d-%d-%d\n", ti->rt, ti->blank, ti->packet);
+ return 1;
+}
+
+/*
+ * 0 -- we can write to this disc, 1 -- we can't
+ */
+static int pkt_good_disc(struct pktcdvd_device *pd, disc_information *di)
+{
+ switch (pd->mmc3_profile) {
+ case 0x0a: /* CD-RW */
+ case 0xffff: /* MMC3 not supported */
+ break;
+ case 0x1a: /* DVD+RW */
+ case 0x13: /* DVD-RW */
+ return 0;
+ default:
+ printk("pktcdvd: Wrong disc profile (%x)\n", pd->mmc3_profile);
+ return 1;
+ }
+
+ /*
+ * for disc type 0xff we should probably reserve a new track.
+ * but i'm not sure, should we leave this to user apps? probably.
+ */
+ if (di->disc_type == 0xff) {
+ printk("pktcdvd: Unknown disc. No track?\n");
+ return 1;
+ }
+
+ if (di->disc_type != 0x20 && di->disc_type != 0) {
+ printk("pktcdvd: Wrong disc type (%x)\n", di->disc_type);
+ return 1;
+ }
+
+ if (di->erasable == 0) {
+ printk("pktcdvd: Disc not erasable\n");
+ return 1;
+ }
+
+ if (di->border_status == PACKET_SESSION_RESERVED) {
+ printk("pktcdvd: Can't write to last track (reserved)\n");
+ return 1;
+ }
+
+ return 0;
+}
+
+static int pkt_probe_settings(struct pktcdvd_device *pd)
+{
+ struct packet_command cgc;
+ unsigned char buf[12];
+ disc_information di;
+ track_information ti;
+ int ret, track;
+
+ init_cdrom_command(&cgc, buf, sizeof(buf), CGC_DATA_READ);
+ cgc.cmd[0] = GPCMD_GET_CONFIGURATION;
+ cgc.cmd[8] = 8;
+ ret = pkt_generic_packet(pd, &cgc);
+ pd->mmc3_profile = ret ? 0xffff : buf[6] << 8 | buf[7];
+
+ memset(&di, 0, sizeof(disc_information));
+ memset(&ti, 0, sizeof(track_information));
+
+ if ((ret = pkt_get_disc_info(pd, &di))) {
+ printk("failed get_disc\n");
+ return ret;
+ }
+
+ if (pkt_good_disc(pd, &di))
+ return -ENXIO;
+
+ switch (pd->mmc3_profile) {
+ case 0x1a: /* DVD+RW */
+ printk("pktcdvd: inserted media is DVD+RW\n");
+ break;
+ case 0x13: /* DVD-RW */
+ printk("pktcdvd: inserted media is DVD-RW\n");
+ break;
+ default:
+ printk("pktcdvd: inserted media is CD-R%s\n", di.erasable ? "W" : "");
+ break;
+ }
+ pd->type = di.erasable ? PACKET_CDRW : PACKET_CDR;
+
+ track = 1; /* (di.last_track_msb << 8) | di.last_track_lsb; */
+ if ((ret = pkt_get_track_info(pd, track, 1, &ti))) {
+ printk("pktcdvd: failed get_track\n");
+ return ret;
+ }
+
+ if (pkt_good_track(&ti)) {
+ printk("pktcdvd: can't write to this track\n");
+ return -ENXIO;
+ }
+
+ /*
+ * we keep packet size in 512 byte units, makes it easier to
+ * deal with request calculations.
+ */
+ pd->settings.size = be32_to_cpu(ti.fixed_packet_size) << 2;
+ if (pd->settings.size == 0) {
+ printk("pktcdvd: detected zero packet size!\n");
+ pd->settings.size = 128;
+ }
+ pd->settings.fp = ti.fp;
+ pd->offset = (be32_to_cpu(ti.track_start) << 2) & (pd->settings.size - 1);
+
+ if (ti.nwa_v) {
+ pd->nwa = be32_to_cpu(ti.next_writable);
+ set_bit(PACKET_NWA_VALID, &pd->flags);
+ }
+
+ /*
+ * in theory we could use lra on -RW media as well and just zero
+ * blocks that haven't been written yet, but in practice that
+ * is just a no-go. we'll use that for -R, naturally.
+ */
+ if (ti.lra_v) {
+ pd->lra = be32_to_cpu(ti.last_rec_address);
+ set_bit(PACKET_LRA_VALID, &pd->flags);
+ } else {
+ pd->lra = 0xffffffff;
+ set_bit(PACKET_LRA_VALID, &pd->flags);
+ }
+
+ /*
+ * fine for now
+ */
+ pd->settings.link_loss = 7;
+ pd->settings.write_type = 0; /* packet */
+ pd->settings.track_mode = ti.track_mode;
+
+ /*
+ * mode1 or mode2 disc
+ */
+ switch (ti.data_mode) {
+ case PACKET_MODE1:
+ pd->settings.block_mode = PACKET_BLOCK_MODE1;
+ break;
+ case PACKET_MODE2:
+ pd->settings.block_mode = PACKET_BLOCK_MODE2;
+ break;
+ default:
+ printk("pktcdvd: unknown data mode\n");
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * enable/disable write caching on drive
+ */
+static int pkt_write_caching(struct pktcdvd_device *pd, int set)
+{
+ struct packet_command cgc;
+ struct request_sense sense;
+ unsigned char buf[64];
+ int ret;
+
+ memset(buf, 0, sizeof(buf));
+ init_cdrom_command(&cgc, buf, sizeof(buf), CGC_DATA_READ);
+ cgc.sense = &sense;
+ cgc.buflen = pd->mode_offset + 12;
+
+ /*
+ * caching mode page might not be there, so quiet this command
+ */
+ cgc.quiet = 1;
+
+ if ((ret = pkt_mode_sense(pd, &cgc, GPMODE_WCACHING_PAGE, 0)))
+ return ret;
+
+ buf[pd->mode_offset + 10] |= (!!set << 2);
+
+ cgc.buflen = cgc.cmd[8] = 2 + ((buf[0] << 8) | (buf[1] & 0xff));
+ ret = pkt_mode_select(pd, &cgc);
+ if (ret) {
+ printk("pktcdvd: write caching control failed\n");
+ pkt_dump_sense(&cgc);
+ } else if (!ret && set)
+ printk("pktcdvd: enabled write caching on %s\n", pd->name);
+ return ret;
+}
+
+static int pkt_lock_door(struct pktcdvd_device *pd, int lockflag)
+{
+ struct packet_command cgc;
+
+ init_cdrom_command(&cgc, NULL, 0, CGC_DATA_NONE);
+ cgc.cmd[0] = GPCMD_PREVENT_ALLOW_MEDIUM_REMOVAL;
+ cgc.cmd[4] = lockflag ? 1 : 0;
+ return pkt_generic_packet(pd, &cgc);
+}
+
+/*
+ * Returns drive maximum write speed
+ */
+static int pkt_get_max_speed(struct pktcdvd_device *pd, unsigned *write_speed)
+{
+ struct packet_command cgc;
+ struct request_sense sense;
+ unsigned char buf[256+18];
+ unsigned char *cap_buf;
+ int ret, offset;
+
+ memset(buf, 0, sizeof(buf));
+ cap_buf = &buf[sizeof(struct mode_page_header) + pd->mode_offset];
+ init_cdrom_command(&cgc, buf, sizeof(buf), CGC_DATA_UNKNOWN);
+ cgc.sense = &sense;
+
+ ret = pkt_mode_sense(pd, &cgc, GPMODE_CAPABILITIES_PAGE, 0);
+ if (ret) {
+ cgc.buflen = pd->mode_offset + cap_buf[1] + 2 +
+ sizeof(struct mode_page_header);
+ ret = pkt_mode_sense(pd, &cgc, GPMODE_CAPABILITIES_PAGE, 0);
+ if (ret) {
+ pkt_dump_sense(&cgc);
+ return ret;
+ }
+ }
+
+ offset = 20; /* Obsoleted field, used by older drives */
+ if (cap_buf[1] >= 28)
+ offset = 28; /* Current write speed selected */
+ if (cap_buf[1] >= 30) {
+ /* If the drive reports at least one "Logical Unit Write
+ * Speed Performance Descriptor Block", use the information
+ * in the first block. (contains the highest speed)
+ */
+ int num_spdb = (cap_buf[30] << 8) + cap_buf[31];
+ if (num_spdb > 0)
+ offset = 34;
+ }
+
+ *write_speed = (cap_buf[offset] << 8) | cap_buf[offset + 1];
+ return 0;
+}
+
+/* These tables from cdrecord - I don't have orange book */
+/* standard speed CD-RW (1-4x) */
+static char clv_to_speed[16] = {
+ /* 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 */
+ 0, 2, 4, 6, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+/* high speed CD-RW (-10x) */
+static char hs_clv_to_speed[16] = {
+ /* 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 */
+ 0, 2, 4, 6, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+/* ultra high speed CD-RW */
+static char us_clv_to_speed[16] = {
+ /* 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 */
+ 0, 2, 4, 8, 0, 0,16, 0,24,32,40,48, 0, 0, 0, 0
+};
+
+/*
+ * reads the maximum media speed from ATIP
+ */
+static int pkt_media_speed(struct pktcdvd_device *pd, unsigned *speed)
+{
+ struct packet_command cgc;
+ struct request_sense sense;
+ unsigned char buf[64];
+ unsigned int size, st, sp;
+ int ret;
+
+ init_cdrom_command(&cgc, buf, 2, CGC_DATA_READ);
+ cgc.sense = &sense;
+ cgc.cmd[0] = GPCMD_READ_TOC_PMA_ATIP;
+ cgc.cmd[1] = 2;
+ cgc.cmd[2] = 4; /* READ ATIP */
+ cgc.cmd[8] = 2;
+ ret = pkt_generic_packet(pd, &cgc);
+ if (ret) {
+ pkt_dump_sense(&cgc);
+ return ret;
+ }
+ size = ((unsigned int) buf[0]<<8) + buf[1] + 2;
+ if (size > sizeof(buf))
+ size = sizeof(buf);
+
+ init_cdrom_command(&cgc, buf, size, CGC_DATA_READ);
+ cgc.sense = &sense;
+ cgc.cmd[0] = GPCMD_READ_TOC_PMA_ATIP;
+ cgc.cmd[1] = 2;
+ cgc.cmd[2] = 4;
+ cgc.cmd[8] = size;
+ ret = pkt_generic_packet(pd, &cgc);
+ if (ret) {
+ pkt_dump_sense(&cgc);
+ return ret;
+ }
+
+ if (!buf[6] & 0x40) {
+ printk("pktcdvd: Disc type is not CD-RW\n");
+ return 1;
+ }
+ if (!buf[6] & 0x4) {
+ printk("pktcdvd: A1 values on media are not valid, maybe not CDRW?\n");
+ return 1;
+ }
+
+ st = (buf[6] >> 3) & 0x7; /* disc sub-type */
+
+ sp = buf[16] & 0xf; /* max speed from ATIP A1 field */
+
+ /* Info from cdrecord */
+ switch (st) {
+ case 0: /* standard speed */
+ *speed = clv_to_speed[sp];
+ break;
+ case 1: /* high speed */
+ *speed = hs_clv_to_speed[sp];
+ break;
+ case 2: /* ultra high speed */
+ *speed = us_clv_to_speed[sp];
+ break;
+ default:
+ printk("pktcdvd: Unknown disc sub-type %d\n",st);
+ return 1;
+ }
+ if (*speed) {
+ printk("pktcdvd: Max. media speed: %d\n",*speed);
+ return 0;
+ } else {
+ printk("pktcdvd: Unknown speed %d for sub-type %d\n",sp,st);
+ return 1;
+ }
+}
+
+static int pkt_perform_opc(struct pktcdvd_device *pd)
+{
+ struct packet_command cgc;
+ struct request_sense sense;
+ int ret;
+
+ VPRINTK("pktcdvd: Performing OPC\n");
+
+ init_cdrom_command(&cgc, NULL, 0, CGC_DATA_NONE);
+ cgc.sense = &sense;
+ cgc.timeout = 60*HZ;
+ cgc.cmd[0] = GPCMD_SEND_OPC;
+ cgc.cmd[1] = 1;
+ if ((ret = pkt_generic_packet(pd, &cgc)))
+ pkt_dump_sense(&cgc);
+ return ret;
+}
+
+static int pkt_open_write(struct pktcdvd_device *pd)
+{
+ int ret;
+ unsigned int write_speed, media_write_speed, read_speed;
+
+ if ((ret = pkt_probe_settings(pd))) {
+ DPRINTK("pktcdvd: %s failed probe\n", pd->name);
+ return -EIO;
+ }
+
+ if ((ret = pkt_set_write_settings(pd))) {
+ DPRINTK("pktcdvd: %s failed saving write settings\n", pd->name);
+ return -EIO;
+ }
+
+ pkt_write_caching(pd, USE_WCACHING);
+
+ if ((ret = pkt_get_max_speed(pd, &write_speed)))
+ write_speed = 16 * 177;
+ switch (pd->mmc3_profile) {
+ case 0x13: /* DVD-RW */
+ case 0x1a: /* DVD+RW */
+ DPRINTK("pktcdvd: write speed %ukB/s\n", write_speed);
+ break;
+ default:
+ if ((ret = pkt_media_speed(pd, &media_write_speed)))
+ media_write_speed = 16;
+ write_speed = min(write_speed, media_write_speed * 177);
+ DPRINTK("pktcdvd: write speed %ux\n", write_speed / 176);
+ break;
+ }
+ read_speed = write_speed;
+
+ if ((ret = pkt_set_speed(pd, write_speed, read_speed))) {
+ DPRINTK("pktcdvd: %s couldn't set write speed\n", pd->name);
+ return -EIO;
+ }
+ pd->write_speed = write_speed;
+ pd->read_speed = read_speed;
+
+ if ((ret = pkt_perform_opc(pd))) {
+ DPRINTK("pktcdvd: %s Optimum Power Calibration failed\n", pd->name);
+ }
+
+ return 0;
+}
+
+/*
+ * called at open time.
+ */
+static int pkt_open_dev(struct pktcdvd_device *pd, int write)
+{
+ int ret;
+ long lba;
+ request_queue_t *q;
+
+ /*
+ * We need to re-open the cdrom device without O_NONBLOCK to be able
+ * to read/write from/to it. It is already opened in O_NONBLOCK mode
+ * so bdget() can't fail.
+ */
+ bdget(pd->bdev->bd_dev);
+ if ((ret = blkdev_get(pd->bdev, FMODE_READ, O_RDONLY)))
+ goto out;
+
+ if ((ret = pkt_get_last_written(pd, &lba))) {
+ printk("pktcdvd: pkt_get_last_written failed\n");
+ goto out_putdev;
+ }
+
+ set_capacity(pd->disk, lba << 2);
+ set_capacity(pd->bdev->bd_disk, lba << 2);
+ bd_set_size(pd->bdev, (loff_t)lba << 11);
+
+ q = bdev_get_queue(pd->bdev);
+ if (write) {
+ if ((ret = pkt_open_write(pd)))
+ goto out_putdev;
+ /*
+ * Some CDRW drives can not handle writes larger than one packet,
+ * even if the size is a multiple of the packet size.
+ */
+ spin_lock_irq(q->queue_lock);
+ blk_queue_max_sectors(q, pd->settings.size);
+ spin_unlock_irq(q->queue_lock);
+ set_bit(PACKET_WRITABLE, &pd->flags);
+ } else {
+ pkt_set_speed(pd, MAX_SPEED, MAX_SPEED);
+ clear_bit(PACKET_WRITABLE, &pd->flags);
+ }
+
+ if ((ret = pkt_set_segment_merging(pd, q)))
+ goto out_putdev;
+
+ if (write)
+ printk("pktcdvd: %lukB available on disc\n", lba << 1);
+
+ return 0;
+
+out_putdev:
+ blkdev_put(pd->bdev);
+out:
+ return ret;
+}
+
+/*
+ * called when the device is closed. makes sure that the device flushes
+ * the internal cache before we close.
+ */
+static void pkt_release_dev(struct pktcdvd_device *pd, int flush)
+{
+ if (flush && pkt_flush_cache(pd))
+ DPRINTK("pktcdvd: %s not flushing cache\n", pd->name);
+
+ pkt_lock_door(pd, 0);
+
+ pkt_set_speed(pd, MAX_SPEED, MAX_SPEED);
+ blkdev_put(pd->bdev);
+}
+
+static struct pktcdvd_device *pkt_find_dev_from_minor(int dev_minor)
+{
+ if (dev_minor >= MAX_WRITERS)
+ return NULL;
+ return pkt_devs[dev_minor];
+}
+
+static int pkt_open(struct inode *inode, struct file *file)
+{
+ struct pktcdvd_device *pd = NULL;
+ int ret;
+
+ VPRINTK("pktcdvd: entering open\n");
+
+ down(&ctl_mutex);
+ pd = pkt_find_dev_from_minor(iminor(inode));
+ if (!pd) {
+ ret = -ENODEV;
+ goto out;
+ }
+ BUG_ON(pd->refcnt < 0);
+
+ pd->refcnt++;
+ if (pd->refcnt == 1) {
+ if (pkt_open_dev(pd, file->f_mode & FMODE_WRITE)) {
+ ret = -EIO;
+ goto out_dec;
+ }
+ /*
+ * needed here as well, since ext2 (among others) may change
+ * the blocksize at mount time
+ */
+ set_blocksize(inode->i_bdev, CD_FRAMESIZE);
+ }
+
+ up(&ctl_mutex);
+ return 0;
+
+out_dec:
+ pd->refcnt--;
+out:
+ VPRINTK("pktcdvd: failed open (%d)\n", ret);
+ up(&ctl_mutex);
+ return ret;
+}
+
+static int pkt_close(struct inode *inode, struct file *file)
+{
+ struct pktcdvd_device *pd = inode->i_bdev->bd_disk->private_data;
+ int ret = 0;
+
+ down(&ctl_mutex);
+ pd->refcnt--;
+ BUG_ON(pd->refcnt < 0);
+ if (pd->refcnt == 0) {
+ int flush = test_bit(PACKET_WRITABLE, &pd->flags);
+ pkt_release_dev(pd, flush);
+ }
+ up(&ctl_mutex);
+ return ret;
+}
+
+
+static void *psd_pool_alloc(int gfp_mask, void *data)
+{
+ return kmalloc(sizeof(struct packet_stacked_data), gfp_mask);
+}
+
+static void psd_pool_free(void *ptr, void *data)
+{
+ kfree(ptr);
+}
+
+static int pkt_end_io_read_cloned(struct bio *bio, unsigned int bytes_done, int err)
+{
+ struct packet_stacked_data *psd = bio->bi_private;
+ struct pktcdvd_device *pd = psd->pd;
+
+ if (bio->bi_size)
+ return 1;
+
+ bio_put(bio);
+ bio_endio(psd->bio, psd->bio->bi_size, err);
+ mempool_free(psd, psd_pool);
+ pkt_bio_finished(pd);
+ return 0;
+}
+
+static int pkt_make_request(request_queue_t *q, struct bio *bio)
+{
+ struct pktcdvd_device *pd;
+ char b[BDEVNAME_SIZE];
+ sector_t zone;
+ struct packet_data *pkt;
+ int was_empty, blocked_bio;
+ struct pkt_rb_node *node;
+
+ pd = q->queuedata;
+ if (!pd) {
+ printk("pktcdvd: %s incorrect request queue\n", bdevname(bio->bi_bdev, b));
+ goto end_io;
+ }
+
+ /*
+ * Clone READ bios so we can have our own bi_end_io callback.
+ */
+ if (bio_data_dir(bio) == READ) {
+ struct bio *cloned_bio = bio_clone(bio, GFP_NOIO);
+ struct packet_stacked_data *psd = mempool_alloc(psd_pool, GFP_NOIO);
+
+ psd->pd = pd;
+ psd->bio = bio;
+ cloned_bio->bi_bdev = pd->bdev;
+ cloned_bio->bi_private = psd;
+ cloned_bio->bi_end_io = pkt_end_io_read_cloned;
+ pd->stats.secs_r += bio->bi_size >> 9;
+ pkt_queue_bio(pd, cloned_bio, 1);
+ return 0;
+ }
+
+ if (!test_bit(PACKET_WRITABLE, &pd->flags)) {
+ printk("pktcdvd: WRITE for ro device %s (%llu)\n",
+ pd->name, (unsigned long long)bio->bi_sector);
+ goto end_io;
+ }
+
+ if (!bio->bi_size || (bio->bi_size % CD_FRAMESIZE)) {
+ printk("pktcdvd: wrong bio size\n");
+ goto end_io;
+ }
+
+ blk_queue_bounce(q, &bio);
+
+ zone = ZONE(bio->bi_sector, pd);
+ VPRINTK("pkt_make_request: start = %6llx stop = %6llx\n",
+ (unsigned long long)bio->bi_sector,
+ (unsigned long long)(bio->bi_sector + bio_sectors(bio)));
+
+ /* Check if we have to split the bio */
+ {
+ struct bio_pair *bp;
+ sector_t last_zone;
+ int first_sectors;
+
+ last_zone = ZONE(bio->bi_sector + bio_sectors(bio) - 1, pd);
+ if (last_zone != zone) {
+ BUG_ON(last_zone != zone + pd->settings.size);
+ first_sectors = last_zone - bio->bi_sector;
+ bp = bio_split(bio, bio_split_pool, first_sectors);
+ BUG_ON(!bp);
+ pkt_make_request(q, &bp->bio1);
+ pkt_make_request(q, &bp->bio2);
+ bio_pair_release(bp);
+ return 0;
+ }
+ }
+
+ /*
+ * If we find a matching packet in state WAITING or READ_WAIT, we can
+ * just append this bio to that packet.
+ */
+ spin_lock(&pd->cdrw.active_list_lock);
+ blocked_bio = 0;
+ list_for_each_entry(pkt, &pd->cdrw.pkt_active_list, list) {
+ if (pkt->sector == zone) {
+ spin_lock(&pkt->lock);
+ if ((pkt->state == PACKET_WAITING_STATE) ||
+ (pkt->state == PACKET_READ_WAIT_STATE)) {
+ pkt_add_list_last(bio, &pkt->orig_bios,
+ &pkt->orig_bios_tail);
+ pkt->write_size += bio->bi_size / CD_FRAMESIZE;
+ if ((pkt->write_size >= pkt->frames) &&
+ (pkt->state == PACKET_WAITING_STATE)) {
+ atomic_inc(&pkt->run_sm);
+ wake_up(&pd->wqueue);
+ }
+ spin_unlock(&pkt->lock);
+ spin_unlock(&pd->cdrw.active_list_lock);
+ return 0;
+ } else {
+ blocked_bio = 1;
+ }
+ spin_unlock(&pkt->lock);
+ }
+ }
+ spin_unlock(&pd->cdrw.active_list_lock);
+
+ /*
+ * No matching packet found. Store the bio in the work queue.
+ */
+ node = mempool_alloc(pd->rb_pool, GFP_NOIO);
+ BUG_ON(!node);
+ node->bio = bio;
+ spin_lock(&pd->lock);
+ BUG_ON(pd->bio_queue_size < 0);
+ was_empty = (pd->bio_queue_size == 0);
+ pkt_rbtree_insert(pd, node);
+ spin_unlock(&pd->lock);
+
+ /*
+ * Wake up the worker thread.
+ */
+ atomic_set(&pd->scan_queue, 1);
+ if (was_empty) {
+ /* This wake_up is required for correct operation */
+ wake_up(&pd->wqueue);
+ } else if (!list_empty(&pd->cdrw.pkt_free_list) && !blocked_bio) {
+ /*
+ * This wake up is not required for correct operation,
+ * but improves performance in some cases.
+ */
+ wake_up(&pd->wqueue);
+ }
+ return 0;
+end_io:
+ bio_io_error(bio, bio->bi_size);
+ return 0;
+}
+
+
+
+static int pkt_merge_bvec(request_queue_t *q, struct bio *bio, struct bio_vec *bvec)
+{
+ struct pktcdvd_device *pd = q->queuedata;
+ sector_t zone = ZONE(bio->bi_sector, pd);
+ int used = ((bio->bi_sector - zone) << 9) + bio->bi_size;
+ int remaining = (pd->settings.size << 9) - used;
+ int remaining2;
+
+ /*
+ * A bio <= PAGE_SIZE must be allowed. If it crosses a packet
+ * boundary, pkt_make_request() will split the bio.
+ */
+ remaining2 = PAGE_SIZE - bio->bi_size;
+ remaining = max(remaining, remaining2);
+
+ BUG_ON(remaining < 0);
+ return remaining;
+}
+
+static void pkt_init_queue(struct pktcdvd_device *pd)
+{
+ request_queue_t *q = pd->disk->queue;
+
+ blk_queue_make_request(q, pkt_make_request);
+ blk_queue_hardsect_size(q, CD_FRAMESIZE);
+ blk_queue_max_sectors(q, PACKET_MAX_SECTORS);
+ blk_queue_merge_bvec(q, pkt_merge_bvec);
+ q->queuedata = pd;
+}
+
+static int pkt_seq_show(struct seq_file *m, void *p)
+{
+ struct pktcdvd_device *pd = m->private;
+ char *msg;
+ char bdev_buf[BDEVNAME_SIZE];
+ int states[PACKET_NUM_STATES];
+
+ seq_printf(m, "Writer %s mapped to %s:\n", pd->name,
+ bdevname(pd->bdev, bdev_buf));
+
+ seq_printf(m, "\nSettings:\n");
+ seq_printf(m, "\tpacket size:\t\t%dkB\n", pd->settings.size / 2);
+
+ if (pd->settings.write_type == 0)
+ msg = "Packet";
+ else
+ msg = "Unknown";
+ seq_printf(m, "\twrite type:\t\t%s\n", msg);
+
+ seq_printf(m, "\tpacket type:\t\t%s\n", pd->settings.fp ? "Fixed" : "Variable");
+ seq_printf(m, "\tlink loss:\t\t%d\n", pd->settings.link_loss);
+
+ seq_printf(m, "\ttrack mode:\t\t%d\n", pd->settings.track_mode);
+
+ if (pd->settings.block_mode == PACKET_BLOCK_MODE1)
+ msg = "Mode 1";
+ else if (pd->settings.block_mode == PACKET_BLOCK_MODE2)
+ msg = "Mode 2";
+ else
+ msg = "Unknown";
+ seq_printf(m, "\tblock mode:\t\t%s\n", msg);
+
+ seq_printf(m, "\nStatistics:\n");
+ seq_printf(m, "\tpackets started:\t%lu\n", pd->stats.pkt_started);
+ seq_printf(m, "\tpackets ended:\t\t%lu\n", pd->stats.pkt_ended);
+ seq_printf(m, "\twritten:\t\t%lukB\n", pd->stats.secs_w >> 1);
+ seq_printf(m, "\tread gather:\t\t%lukB\n", pd->stats.secs_rg >> 1);
+ seq_printf(m, "\tread:\t\t\t%lukB\n", pd->stats.secs_r >> 1);
+
+ seq_printf(m, "\nMisc:\n");
+ seq_printf(m, "\treference count:\t%d\n", pd->refcnt);
+ seq_printf(m, "\tflags:\t\t\t0x%lx\n", pd->flags);
+ seq_printf(m, "\tread speed:\t\t%ukB/s\n", pd->read_speed);
+ seq_printf(m, "\twrite speed:\t\t%ukB/s\n", pd->write_speed);
+ seq_printf(m, "\tstart offset:\t\t%lu\n", pd->offset);
+ seq_printf(m, "\tmode page offset:\t%u\n", pd->mode_offset);
+
+ seq_printf(m, "\nQueue state:\n");
+ seq_printf(m, "\tbios queued:\t\t%d\n", pd->bio_queue_size);
+ seq_printf(m, "\tbios pending:\t\t%d\n", atomic_read(&pd->cdrw.pending_bios));
+ seq_printf(m, "\tcurrent sector:\t\t0x%llx\n", (unsigned long long)pd->current_sector);
+
+ pkt_count_states(pd, states);
+ seq_printf(m, "\tstate:\t\t\ti:%d ow:%d rw:%d ww:%d rec:%d fin:%d\n",
+ states[0], states[1], states[2], states[3], states[4], states[5]);
+
+ return 0;
+}
+
+static int pkt_seq_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, pkt_seq_show, PDE(inode)->data);
+}
+
+static struct file_operations pkt_proc_fops = {
+ .open = pkt_seq_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release
+};
+
+static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev)
+{
+ int i;
+ int ret = 0;
+ char b[BDEVNAME_SIZE];
+ struct proc_dir_entry *proc;
+ struct block_device *bdev;
+
+ if (pd->pkt_dev == dev) {
+ printk("pktcdvd: Recursive setup not allowed\n");
+ return -EBUSY;
+ }
+ for (i = 0; i < MAX_WRITERS; i++) {
+ struct pktcdvd_device *pd2 = pkt_devs[i];
+ if (!pd2)
+ continue;
+ if (pd2->bdev->bd_dev == dev) {
+ printk("pktcdvd: %s already setup\n", bdevname(pd2->bdev, b));
+ return -EBUSY;
+ }
+ if (pd2->pkt_dev == dev) {
+ printk("pktcdvd: Can't chain pktcdvd devices\n");
+ return -EBUSY;
+ }
+ }
+
+ bdev = bdget(dev);
+ if (!bdev)
+ return -ENOMEM;
+ ret = blkdev_get(bdev, FMODE_READ, O_RDONLY | O_NONBLOCK);
+ if (ret)
+ return ret;
+
+ /* This is safe, since we have a reference from open(). */
+ __module_get(THIS_MODULE);
+
+ if (!pkt_grow_pktlist(pd, CONFIG_CDROM_PKTCDVD_BUFFERS)) {
+ printk("pktcdvd: not enough memory for buffers\n");
+ ret = -ENOMEM;
+ goto out_mem;
+ }
+
+ pd->bdev = bdev;
+ set_blocksize(bdev, CD_FRAMESIZE);
+
+ pkt_init_queue(pd);
+
+ atomic_set(&pd->cdrw.pending_bios, 0);
+ pd->cdrw.thread = kthread_run(kcdrwd, pd, "%s", pd->name);
+ if (IS_ERR(pd->cdrw.thread)) {
+ printk("pktcdvd: can't start kernel thread\n");
+ ret = -ENOMEM;
+ goto out_thread;
+ }
+
+ proc = create_proc_entry(pd->name, 0, pkt_proc);
+ if (proc) {
+ proc->data = pd;
+ proc->proc_fops = &pkt_proc_fops;
+ }
+ DPRINTK("pktcdvd: writer %s mapped to %s\n", pd->name, bdevname(bdev, b));
+ return 0;
+
+out_thread:
+ pkt_shrink_pktlist(pd);
+out_mem:
+ blkdev_put(bdev);
+ /* This is safe: open() is still holding a reference. */
+ module_put(THIS_MODULE);
+ return ret;
+}
+
+static int pkt_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg)
+{
+ struct pktcdvd_device *pd = inode->i_bdev->bd_disk->private_data;
+
+ VPRINTK("pkt_ioctl: cmd %x, dev %d:%d\n", cmd, imajor(inode), iminor(inode));
+ BUG_ON(!pd);
+
+ switch (cmd) {
+ /*
+ * forward selected CDROM ioctls to CD-ROM, for UDF
+ */
+ case CDROMMULTISESSION:
+ case CDROMREADTOCENTRY:
+ case CDROM_LAST_WRITTEN:
+ case CDROM_SEND_PACKET:
+ case SCSI_IOCTL_SEND_COMMAND:
+ return ioctl_by_bdev(pd->bdev, cmd, arg);
+
+ case CDROMEJECT:
+ /*
+ * The door gets locked when the device is opened, so we
+ * have to unlock it or else the eject command fails.
+ */
+ pkt_lock_door(pd, 0);
+ return ioctl_by_bdev(pd->bdev, cmd, arg);
+
+ default:
+ printk("pktcdvd: Unknown ioctl for %s (%x)\n", pd->name, cmd);
+ return -ENOTTY;
+ }
+
+ return 0;
+}
+
+static int pkt_media_changed(struct gendisk *disk)
+{
+ struct pktcdvd_device *pd = disk->private_data;
+ struct gendisk *attached_disk;
+
+ if (!pd)
+ return 0;
+ if (!pd->bdev)
+ return 0;
+ attached_disk = pd->bdev->bd_disk;
+ if (!attached_disk)
+ return 0;
+ return attached_disk->fops->media_changed(attached_disk);
+}
+
+static struct block_device_operations pktcdvd_ops = {
+ .owner = THIS_MODULE,
+ .open = pkt_open,
+ .release = pkt_close,
+ .ioctl = pkt_ioctl,
+ .media_changed = pkt_media_changed,
+};
+
+/*
+ * Set up mapping from pktcdvd device to CD-ROM device.
+ */
+static int pkt_setup_dev(struct pkt_ctrl_command *ctrl_cmd)
+{
+ int idx;
+ int ret = -ENOMEM;
+ struct pktcdvd_device *pd;
+ struct gendisk *disk;
+ dev_t dev = new_decode_dev(ctrl_cmd->dev);
+
+ for (idx = 0; idx < MAX_WRITERS; idx++)
+ if (!pkt_devs[idx])
+ break;
+ if (idx == MAX_WRITERS) {
+ printk("pktcdvd: max %d writers supported\n", MAX_WRITERS);
+ return -EBUSY;
+ }
+
+ pd = kmalloc(sizeof(struct pktcdvd_device), GFP_KERNEL);
+ if (!pd)
+ return ret;
+ memset(pd, 0, sizeof(struct pktcdvd_device));
+
+ pd->rb_pool = mempool_create(PKT_RB_POOL_SIZE, pkt_rb_alloc, pkt_rb_free, NULL);
+ if (!pd->rb_pool)
+ goto out_mem;
+
+ disk = alloc_disk(1);
+ if (!disk)
+ goto out_mem;
+ pd->disk = disk;
+
+ spin_lock_init(&pd->lock);
+ spin_lock_init(&pd->iosched.lock);
+ sprintf(pd->name, "pktcdvd%d", idx);
+ init_waitqueue_head(&pd->wqueue);
+ pd->bio_queue = RB_ROOT;
+
+ disk->major = pkt_major;
+ disk->first_minor = idx;
+ disk->fops = &pktcdvd_ops;
+ disk->flags = GENHD_FL_REMOVABLE;
+ sprintf(disk->disk_name, "pktcdvd%d", idx);
+ disk->private_data = pd;
+ disk->queue = blk_alloc_queue(GFP_KERNEL);
+ if (!disk->queue)
+ goto out_mem2;
+
+ pd->pkt_dev = MKDEV(disk->major, disk->first_minor);
+ ret = pkt_new_dev(pd, dev);
+ if (ret)
+ goto out_new_dev;
+
+ add_disk(disk);
+ pkt_devs[idx] = pd;
+ ctrl_cmd->pkt_dev = new_encode_dev(pd->pkt_dev);
+ return 0;
+
+out_new_dev:
+ blk_put_queue(disk->queue);
+out_mem2:
+ put_disk(disk);
+out_mem:
+ if (pd->rb_pool)
+ mempool_destroy(pd->rb_pool);
+ kfree(pd);
+ return ret;
+}
+
+/*
+ * Tear down mapping from pktcdvd device to CD-ROM device.
+ */
+static int pkt_remove_dev(struct pkt_ctrl_command *ctrl_cmd)
+{
+ struct pktcdvd_device *pd;
+ int idx;
+ dev_t pkt_dev = new_decode_dev(ctrl_cmd->pkt_dev);
+
+ for (idx = 0; idx < MAX_WRITERS; idx++) {
+ pd = pkt_devs[idx];
+ if (pd && (pd->pkt_dev == pkt_dev))
+ break;
+ }
+ if (idx == MAX_WRITERS) {
+ DPRINTK("pktcdvd: dev not setup\n");
+ return -ENXIO;
+ }
+
+ if (pd->refcnt > 0)
+ return -EBUSY;
+
+ if (!IS_ERR(pd->cdrw.thread))
+ kthread_stop(pd->cdrw.thread);
+
+ blkdev_put(pd->bdev);
+
+ pkt_shrink_pktlist(pd);
+
+ remove_proc_entry(pd->name, pkt_proc);
+ DPRINTK("pktcdvd: writer %s unmapped\n", pd->name);
+
+ del_gendisk(pd->disk);
+ blk_put_queue(pd->disk->queue);
+ put_disk(pd->disk);
+
+ pkt_devs[idx] = NULL;
+ mempool_destroy(pd->rb_pool);
+ kfree(pd);
+
+ /* This is safe: open() is still holding a reference. */
+ module_put(THIS_MODULE);
+ return 0;
+}
+
+static void pkt_get_status(struct pkt_ctrl_command *ctrl_cmd)
+{
+ struct pktcdvd_device *pd = pkt_find_dev_from_minor(ctrl_cmd->dev_index);
+ if (pd) {
+ ctrl_cmd->dev = new_encode_dev(pd->bdev->bd_dev);
+ ctrl_cmd->pkt_dev = new_encode_dev(pd->pkt_dev);
+ } else {
+ ctrl_cmd->dev = 0;
+ ctrl_cmd->pkt_dev = 0;
+ }
+ ctrl_cmd->num_devices = MAX_WRITERS;
+}
+
+static int pkt_ctl_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg)
+{
+ void __user *argp = (void __user *)arg;
+ struct pkt_ctrl_command ctrl_cmd;
+ int ret = 0;
+
+ if (cmd != PACKET_CTRL_CMD)
+ return -ENOTTY;
+
+ if (copy_from_user(&ctrl_cmd, argp, sizeof(struct pkt_ctrl_command)))
+ return -EFAULT;
+
+ switch (ctrl_cmd.command) {
+ case PKT_CTRL_CMD_SETUP:
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+ down(&ctl_mutex);
+ ret = pkt_setup_dev(&ctrl_cmd);
+ up(&ctl_mutex);
+ break;
+ case PKT_CTRL_CMD_TEARDOWN:
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+ down(&ctl_mutex);
+ ret = pkt_remove_dev(&ctrl_cmd);
+ up(&ctl_mutex);
+ break;
+ case PKT_CTRL_CMD_STATUS:
+ down(&ctl_mutex);
+ pkt_get_status(&ctrl_cmd);
+ up(&ctl_mutex);
+ break;
+ default:
+ return -ENOTTY;
+ }
+
+ if (copy_to_user(argp, &ctrl_cmd, sizeof(struct pkt_ctrl_command)))
+ return -EFAULT;
+ return ret;
+}
+
+
+static struct file_operations pkt_ctl_fops = {
+ .ioctl = pkt_ctl_ioctl,
+ .owner = THIS_MODULE,
+};
+
+static struct miscdevice pkt_misc = {
+ .minor = MISC_DYNAMIC_MINOR,
+ .name = "pktcdvd",
+ .devfs_name = "pktcdvd/control",
+ .fops = &pkt_ctl_fops
+};
+
+int pkt_init(void)
+{
+ int ret;
+
+ psd_pool = mempool_create(PSD_POOL_SIZE, psd_pool_alloc, psd_pool_free, NULL);
+ if (!psd_pool)
+ return -ENOMEM;
+
+ ret = register_blkdev(pkt_major, "pktcdvd");
+ if (ret < 0) {
+ printk("pktcdvd: Unable to register block device\n");
+ goto out2;
+ }
+ if (!pkt_major)
+ pkt_major = ret;
+
+ ret = misc_register(&pkt_misc);
+ if (ret) {
+ printk("pktcdvd: Unable to register misc device\n");
+ goto out;
+ }
+
+ init_MUTEX(&ctl_mutex);
+
+ pkt_proc = proc_mkdir("pktcdvd", proc_root_driver);
+
+ DPRINTK("pktcdvd: %s\n", VERSION_CODE);
+ return 0;
+
+out:
+ unregister_blkdev(pkt_major, "pktcdvd");
+out2:
+ mempool_destroy(psd_pool);
+ return ret;
+}
+
+void pkt_exit(void)
+{
+ remove_proc_entry("pktcdvd", proc_root_driver);
+ misc_deregister(&pkt_misc);
+ unregister_blkdev(pkt_major, "pktcdvd");
+ mempool_destroy(psd_pool);
+}
+
+MODULE_DESCRIPTION("Packet writing layer for CD/DVD drives");
+MODULE_AUTHOR("Jens Axboe <axboe@suse.de>");
+MODULE_LICENSE("GPL");
+
+module_init(pkt_init);
+module_exit(pkt_exit);
diff --git a/drivers/block/ub.c b/drivers/block/ub.c
index f605535d3f56..dd3be0a06219 100644
--- a/drivers/block/ub.c
+++ b/drivers/block/ub.c
@@ -25,6 +25,7 @@
* -- prune comments, they are too volumnous
* -- Exterminate P3 printks
* -- Resove XXX's
+ * -- Redo "benh's retries", perhaps have spin-up code to handle them. V:D=?
*/
#include <linux/kernel.h>
#include <linux/module.h>
@@ -62,9 +63,9 @@
/* command block wrapper */
struct bulk_cb_wrap {
- u32 Signature; /* contains 'USBC' */
+ __le32 Signature; /* contains 'USBC' */
u32 Tag; /* unique per command id */
- u32 DataTransferLength; /* size of data */
+ __le32 DataTransferLength; /* size of data */
u8 Flags; /* direction in bit 0 */
u8 Lun; /* LUN normally 0 */
u8 Length; /* of of the CDB */
@@ -78,9 +79,9 @@ struct bulk_cb_wrap {
/* command status wrapper */
struct bulk_cs_wrap {
- u32 Signature; /* should = 'USBS' */
+ __le32 Signature; /* should = 'USBS' */
u32 Tag; /* same as original command */
- u32 Residue; /* amount not transferred */
+ __le32 Residue; /* amount not transferred */
u8 Status; /* see below */
};
@@ -157,7 +158,8 @@ struct ub_scsi_cmd {
struct ub_scsi_cmd *next;
int error; /* Return code - valid upon done */
- int act_len; /* Return size */
+ unsigned int act_len; /* Return size */
+ unsigned char key, asc, ascq; /* May be valid if error==-EIO */
int stat_count; /* Retries getting status. */
@@ -490,6 +492,18 @@ static void ub_id_put(int id)
*/
static void ub_cleanup(struct ub_dev *sc)
{
+
+ /*
+ * If we zero disk->private_data BEFORE put_disk, we have to check
+ * for NULL all over the place in open, release, check_media and
+ * revalidate, because the block level semaphore is well inside the
+ * put_disk. But we cannot zero after the call, because *disk is gone.
+ * The sd.c is blatantly racy in this area.
+ */
+ /* disk->private_data = NULL; */
+ put_disk(sc->disk);
+ sc->disk = NULL;
+
ub_id_put(sc->id);
kfree(sc);
}
@@ -661,9 +675,12 @@ static inline int ub_bd_rq_fn_1(request_queue_t *q)
/*
* build the command
+ *
+ * The call to blk_queue_hardsect_size() guarantees that request
+ * is aligned, but it is given in terms of 512 byte units, always.
*/
- block = rq->sector;
- nblks = rq->nr_sectors;
+ block = rq->sector >> sc->capacity.bshift;
+ nblks = rq->nr_sectors >> sc->capacity.bshift;
memset(cmd, 0, sizeof(struct ub_scsi_cmd));
cmd->cdb[0] = (ub_dir == UB_DIR_READ)? READ_10: WRITE_10;
@@ -678,7 +695,7 @@ static inline int ub_bd_rq_fn_1(request_queue_t *q)
cmd->dir = ub_dir;
cmd->state = UB_CMDST_INIT;
cmd->data = rq->buffer;
- cmd->len = nblks * 512;
+ cmd->len = rq->nr_sectors * 512;
cmd->done = ub_rw_cmd_done;
cmd->back = rq;
@@ -786,17 +803,16 @@ static int ub_scsi_cmd_start(struct ub_dev *sc, struct ub_scsi_cmd *cmd)
sc->work_urb.error_count = 0;
sc->work_urb.status = 0;
- sc->work_timer.expires = jiffies + UB_URB_TIMEOUT;
- add_timer(&sc->work_timer);
-
if ((rc = usb_submit_urb(&sc->work_urb, GFP_ATOMIC)) != 0) {
/* XXX Clear stalls */
printk("ub: cmd #%d start failed (%d)\n", cmd->tag, rc); /* P3 */
- del_timer(&sc->work_timer);
ub_complete(&sc->work_done);
return rc;
}
+ sc->work_timer.expires = jiffies + UB_URB_TIMEOUT;
+ add_timer(&sc->work_timer);
+
cmd->state = UB_CMDST_CMD;
ub_cmdtr_state(sc, cmd);
return 0;
@@ -836,6 +852,7 @@ static void ub_scsi_action(unsigned long _dev)
unsigned long flags;
spin_lock_irqsave(&sc->lock, flags);
+ del_timer(&sc->work_timer);
ub_scsi_dispatch(sc);
spin_unlock_irqrestore(&sc->lock, flags);
}
@@ -968,18 +985,17 @@ static void ub_scsi_urb_compl(struct ub_dev *sc, struct ub_scsi_cmd *cmd)
sc->work_urb.error_count = 0;
sc->work_urb.status = 0;
- sc->work_timer.expires = jiffies + UB_URB_TIMEOUT;
- add_timer(&sc->work_timer);
-
if ((rc = usb_submit_urb(&sc->work_urb, GFP_ATOMIC)) != 0) {
/* XXX Clear stalls */
printk("ub: data #%d submit failed (%d)\n", cmd->tag, rc); /* P3 */
- del_timer(&sc->work_timer);
ub_complete(&sc->work_done);
ub_state_done(sc, cmd, rc);
return;
}
+ sc->work_timer.expires = jiffies + UB_URB_TIMEOUT;
+ add_timer(&sc->work_timer);
+
cmd->state = UB_CMDST_DATA;
ub_cmdtr_state(sc, cmd);
@@ -1063,19 +1079,18 @@ static void ub_scsi_urb_compl(struct ub_dev *sc, struct ub_scsi_cmd *cmd)
sc->work_urb.error_count = 0;
sc->work_urb.status = 0;
- sc->work_timer.expires = jiffies + UB_URB_TIMEOUT;
- add_timer(&sc->work_timer);
-
rc = usb_submit_urb(&sc->work_urb, GFP_ATOMIC);
if (rc != 0) {
/* XXX Clear stalls */
printk("%s: CSW #%d submit failed (%d)\n",
sc->name, cmd->tag, rc); /* P3 */
- del_timer(&sc->work_timer);
ub_complete(&sc->work_done);
ub_state_done(sc, cmd, rc);
return;
}
+
+ sc->work_timer.expires = jiffies + UB_URB_TIMEOUT;
+ add_timer(&sc->work_timer);
return;
}
@@ -1132,16 +1147,8 @@ static void ub_scsi_urb_compl(struct ub_dev *sc, struct ub_scsi_cmd *cmd)
(*cmd->done)(sc, cmd);
} else if (cmd->state == UB_CMDST_SENSE) {
- /*
- * We do not look at sense, because even if there was no sense,
- * we get into UB_CMDST_SENSE from a STALL or CSW FAIL only.
- * We request sense because we want to clear CHECK CONDITION
- * on devices with delusions of SCSI, and not because we
- * are curious in any way about the sense itself.
- */
- /* if ((cmd->top_sense[2] & 0x0F) == NO_SENSE) { foo } */
-
ub_state_done(sc, cmd, -EIO);
+
} else {
printk(KERN_WARNING "%s: "
"wrong command state %d on device %u\n",
@@ -1186,18 +1193,17 @@ static void ub_state_stat(struct ub_dev *sc, struct ub_scsi_cmd *cmd)
sc->work_urb.error_count = 0;
sc->work_urb.status = 0;
- sc->work_timer.expires = jiffies + UB_URB_TIMEOUT;
- add_timer(&sc->work_timer);
-
if ((rc = usb_submit_urb(&sc->work_urb, GFP_ATOMIC)) != 0) {
/* XXX Clear stalls */
printk("ub: CSW #%d submit failed (%d)\n", cmd->tag, rc); /* P3 */
- del_timer(&sc->work_timer);
ub_complete(&sc->work_done);
ub_state_done(sc, cmd, rc);
return;
}
+ sc->work_timer.expires = jiffies + UB_URB_TIMEOUT;
+ add_timer(&sc->work_timer);
+
cmd->stat_count = 0;
cmd->state = UB_CMDST_STAT;
ub_cmdtr_state(sc, cmd);
@@ -1217,9 +1223,17 @@ static void ub_state_sense(struct ub_dev *sc, struct ub_scsi_cmd *cmd)
goto error;
}
+ /*
+ * ``If the allocation length is eighteen or greater, and a device
+ * server returns less than eithteen bytes of data, the application
+ * client should assume that the bytes not transferred would have been
+ * zeroes had the device server returned those bytes.''
+ */
memset(&sc->top_sense, 0, UB_SENSE_SIZE);
+
scmd = &sc->top_rqs_cmd;
scmd->cdb[0] = REQUEST_SENSE;
+ scmd->cdb[4] = UB_SENSE_SIZE;
scmd->cdb_len = 6;
scmd->dir = UB_DIR_READ;
scmd->state = UB_CMDST_INIT;
@@ -1271,14 +1285,13 @@ static int ub_submit_clear_stall(struct ub_dev *sc, struct ub_scsi_cmd *cmd,
sc->work_urb.error_count = 0;
sc->work_urb.status = 0;
- sc->work_timer.expires = jiffies + UB_CTRL_TIMEOUT;
- add_timer(&sc->work_timer);
-
if ((rc = usb_submit_urb(&sc->work_urb, GFP_ATOMIC)) != 0) {
- del_timer(&sc->work_timer);
ub_complete(&sc->work_done);
return rc;
}
+
+ sc->work_timer.expires = jiffies + UB_CTRL_TIMEOUT;
+ add_timer(&sc->work_timer);
return 0;
}
@@ -1289,8 +1302,15 @@ static void ub_top_sense_done(struct ub_dev *sc, struct ub_scsi_cmd *scmd)
unsigned char *sense = scmd->data;
struct ub_scsi_cmd *cmd;
+ /*
+ * Ignoring scmd->act_len, because the buffer was pre-zeroed.
+ */
ub_cmdtr_sense(sc, scmd, sense);
+ /*
+ * Find the command which triggered the unit attention or a check,
+ * save the sense into it, and advance its state machine.
+ */
if ((cmd = ub_cmdq_peek(sc)) == NULL) {
printk(KERN_WARNING "%s: sense done while idle\n", sc->name);
return;
@@ -1308,6 +1328,10 @@ static void ub_top_sense_done(struct ub_dev *sc, struct ub_scsi_cmd *scmd)
return;
}
+ cmd->key = sense[2] & 0x0F;
+ cmd->asc = sense[12];
+ cmd->ascq = sense[13];
+
ub_scsi_urb_compl(sc, cmd);
}
@@ -1407,7 +1431,15 @@ static int ub_bd_open(struct inode *inode, struct file *filp)
if (sc->removable || sc->readonly)
check_disk_change(inode->i_bdev);
- /* XXX sd.c and floppy.c bail on open if media is not present. */
+ /*
+ * The sd.c considers ->media_present and ->changed not equivalent,
+ * under some pretty murky conditions (a failure of READ CAPACITY).
+ * We may need it one day.
+ */
+ if (sc->removable && sc->changed && !(filp->f_flags & O_NDELAY)) {
+ rc = -ENOMEDIUM;
+ goto err_open;
+ }
if (sc->readonly && (filp->f_mode & FMODE_WRITE)) {
rc = -EROFS;
@@ -1492,8 +1524,11 @@ static int ub_bd_revalidate(struct gendisk *disk)
printk(KERN_INFO "%s: device %u capacity nsec %ld bsize %u\n",
sc->name, sc->dev->devnum, sc->capacity.nsec, sc->capacity.bsize);
+ /* XXX Support sector size switching like in sr.c */
+ blk_queue_hardsect_size(disk->queue, sc->capacity.bsize);
set_capacity(disk, sc->capacity.nsec);
// set_disk_ro(sdkp->disk, sc->readonly);
+
return 0;
}
@@ -1592,6 +1627,9 @@ static int ub_sync_tur(struct ub_dev *sc)
rc = cmd->error;
+ if (rc == -EIO && cmd->key != 0) /* Retries for benh's key */
+ rc = cmd->key;
+
err_submit:
kfree(cmd);
err_alloc:
@@ -1654,8 +1692,8 @@ static int ub_sync_read_cap(struct ub_dev *sc, struct ub_capacity *ret)
}
/* sd.c special-cases sector size of 0 to mean 512. Needed? Safe? */
- nsec = be32_to_cpu(*(u32 *)p) + 1;
- bsize = be32_to_cpu(*(u32 *)(p + 4));
+ nsec = be32_to_cpu(*(__be32 *)p) + 1;
+ bsize = be32_to_cpu(*(__be32 *)(p + 4));
switch (bsize) {
case 512: shift = 0; break;
case 1024: shift = 1; break;
@@ -1725,28 +1763,22 @@ static int ub_probe_clear_stall(struct ub_dev *sc, int stalled_pipe)
sc->work_urb.error_count = 0;
sc->work_urb.status = 0;
- init_timer(&timer);
- timer.function = ub_probe_timeout;
- timer.data = (unsigned long) &compl;
- timer.expires = jiffies + UB_CTRL_TIMEOUT;
- add_timer(&timer);
-
if ((rc = usb_submit_urb(&sc->work_urb, GFP_KERNEL)) != 0) {
printk(KERN_WARNING
"%s: Unable to submit a probe clear (%d)\n", sc->name, rc);
- del_timer_sync(&timer);
return rc;
}
+ init_timer(&timer);
+ timer.function = ub_probe_timeout;
+ timer.data = (unsigned long) &compl;
+ timer.expires = jiffies + UB_CTRL_TIMEOUT;
+ add_timer(&timer);
+
wait_for_completion(&compl);
del_timer_sync(&timer);
- /*
- * Most of the time, URB was done and dev set to NULL, and so
- * the unlink bounces out with ENODEV. We do not call usb_kill_urb
- * because we still think about a backport to 2.4.
- */
- usb_unlink_urb(&sc->work_urb);
+ usb_kill_urb(&sc->work_urb);
/* reset the endpoint toggle */
usb_settoggle(sc->dev, endp, usb_pipeout(sc->last_pipe), 0);
@@ -1813,6 +1845,7 @@ static int ub_probe(struct usb_interface *intf,
request_queue_t *q;
struct gendisk *disk;
int rc;
+ int i;
rc = -ENOMEM;
if ((sc = kmalloc(sizeof(struct ub_dev), GFP_KERNEL)) == NULL)
@@ -1879,7 +1912,11 @@ static int ub_probe(struct usb_interface *intf,
* has to succeed, so we clear checks with an additional one here.
* In any case it's not our business how revaliadation is implemented.
*/
- ub_sync_tur(sc);
+ for (i = 0; i < 3; i++) { /* Retries for benh's key */
+ if ((rc = ub_sync_tur(sc)) <= 0) break;
+ if (rc != 0x6) break;
+ msleep(10);
+ }
sc->removable = 1; /* XXX Query this from the device */
@@ -1915,7 +1952,7 @@ static int ub_probe(struct usb_interface *intf,
blk_queue_max_phys_segments(q, UB_MAX_REQ_SG);
// blk_queue_segment_boundary(q, CARM_SG_BOUNDARY);
blk_queue_max_sectors(q, UB_MAX_SECTORS);
- // blk_queue_hardsect_size(q, xxxxx);
+ blk_queue_hardsect_size(q, sc->capacity.bsize);
/*
* This is a serious infraction, caused by a deficiency in the
@@ -2006,17 +2043,6 @@ static void ub_disconnect(struct usb_interface *intf)
blk_cleanup_queue(q);
/*
- * If we zero disk->private_data BEFORE put_disk, we have to check
- * for NULL all over the place in open, release, check_media and
- * revalidate, because the block level semaphore is well inside the
- * put_disk. But we cannot zero after the call, because *disk is gone.
- * The sd.c is blatantly racy in this area.
- */
- /* disk->private_data = NULL; */
- put_disk(disk);
- sc->disk = NULL;
-
- /*
* We really expect blk_cleanup_queue() to wait, so no amount
* of paranoya is too much.
*
@@ -2035,6 +2061,13 @@ static void ub_disconnect(struct usb_interface *intf)
spin_unlock_irqrestore(&sc->lock, flags);
/*
+ * There is virtually no chance that other CPU runs times so long
+ * after ub_urb_complete should have called del_timer, but only if HCD
+ * didn't forget to deliver a callback on unlink.
+ */
+ del_timer_sync(&sc->work_timer);
+
+ /*
* At this point there must be no commands coming from anyone
* and no URBs left in transit.
*/