summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew Morton <akpm@osdl.org>2004-03-15 15:16:02 -0800
committerLinus Torvalds <torvalds@ppc970.osdl.org>2004-03-15 15:16:02 -0800
commit4e9fbe87b0efa47f23628dfe334951382fe963fb (patch)
tree8abbad7dadaa9933e175e3ac40e861ed3eb8891c
parente5c539b8e8d3aa788d2738add7c85e6ff6f2163e (diff)
[PATCH] ide-scsi error handling fixes
From: Willem Riede <wrlk@riede.org> The patch revises the error handling in ide-scsi, fixing the scheduling while locked issues, and make it work properly, at least for me... Specific changes in this patch: - introduce idescsi_expiry, a timeout routine for the ide subsystem, which simply flags the fact that the command timed out, but postpones any other action until either the command still finishes on its own (unlikely?) or the scsi error handler kicks in; - introduce idescsi_atapi_error and idescsi_atapi_abort, error routines for the ide subsystem, which are modeled after those of ide-cd, but take only minimal effort to recover, leaving the heavy lifting for the scsi error handler; - rewrite (and rename for clarity) idescsi_eh_abort and idescsi_eh_error, the abort/error routines to be called by the scsi error handler -- this redesign should not have the scheduling while atomic problems of the old implementation. - move ide_cdrom_dump_status() from ide-cd.c to ide-lib.c as ide_dump_atapi_status() and both ide-cd and ide-scsi call it. - replaces BUG() by WARN_ON()/printk in the error handling code. - sets TASK_UNINTERRUPTIBLE before schedule_timeout() and moves the host unlock/lock around the while loop inside the loop in idescsi_eh_reset().
-rw-r--r--drivers/ide/ide-cd.c51
-rw-r--r--drivers/ide/ide-lib.c52
-rw-r--r--drivers/scsi/ide-scsi.c237
-rw-r--r--include/linux/ide.h1
4 files changed, 240 insertions, 101 deletions
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index a1bdabd53740..6015ff37b8b8 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -554,53 +554,6 @@ static void cdrom_queue_request_sense(ide_drive_t *drive,
(void) ide_do_drive_cmd(drive, rq, ide_preempt);
}
-
-/*
- * Error reporting, in human readable form (luxurious, but a memory hog).
- */
-byte ide_cdrom_dump_status (ide_drive_t *drive, const char *msg, byte stat)
-{
- unsigned long flags;
-
- atapi_status_t status;
- atapi_error_t error;
-
- status.all = stat;
- local_irq_set(flags);
- printk("%s: %s: status=0x%02x", drive->name, msg, stat);
-#if FANCY_STATUS_DUMPS
- printk(" { ");
- if (status.b.bsy)
- printk("Busy ");
- else {
- if (status.b.drdy) printk("DriveReady ");
- if (status.b.df) printk("DeviceFault ");
- if (status.b.dsc) printk("SeekComplete ");
- if (status.b.drq) printk("DataRequest ");
- if (status.b.corr) printk("CorrectedError ");
- if (status.b.idx) printk("Index ");
- if (status.b.check) printk("Error ");
- }
- printk("}");
-#endif /* FANCY_STATUS_DUMPS */
- printk("\n");
- if ((status.all & (status.b.bsy|status.b.check)) == status.b.check) {
- error.all = HWIF(drive)->INB(IDE_ERROR_REG);
- printk("%s: %s: error=0x%02x", drive->name, msg, error.all);
-#if FANCY_STATUS_DUMPS
- if (error.b.ili) printk("IllegalLengthIndication ");
- if (error.b.eom) printk("EndOfMedia ");
- if (error.b.abrt) printk("Aborted Command ");
- if (error.b.mcr) printk("MediaChangeRequested ");
- if (error.b.sense_key) printk("LastFailedSense 0x%02x ",
- error.b.sense_key);
-#endif /* FANCY_STATUS_DUMPS */
- printk("\n");
- }
- local_irq_restore(flags);
- return error.all;
-}
-
/*
* ide_error() takes action based on the error returned by the drive.
*/
@@ -609,7 +562,7 @@ ide_startstop_t ide_cdrom_error (ide_drive_t *drive, const char *msg, byte stat)
struct request *rq;
byte err;
- err = ide_cdrom_dump_status(drive, msg, stat);
+ err = ide_dump_atapi_status(drive, msg, stat);
if (drive == NULL || (rq = HWGROUP(drive)->rq) == NULL)
return ide_stopped;
/* retry only "normal" I/O: */
@@ -3412,7 +3365,7 @@ static ide_driver_t ide_cdrom_driver = {
.supports_dsc_overlap = 1,
.cleanup = ide_cdrom_cleanup,
.do_request = ide_do_rw_cdrom,
- .sense = ide_cdrom_dump_status,
+ .sense = ide_dump_atapi_status,
.error = ide_cdrom_error,
.abort = ide_cdrom_abort,
.capacity = ide_cdrom_capacity,
diff --git a/drivers/ide/ide-lib.c b/drivers/ide/ide-lib.c
index f670de4e8e8e..073c3fb82db3 100644
--- a/drivers/ide/ide-lib.c
+++ b/drivers/ide/ide-lib.c
@@ -447,3 +447,55 @@ int ide_set_xfer_rate(ide_drive_t *drive, u8 rate)
EXPORT_SYMBOL_GPL(ide_set_xfer_rate);
+/**
+ * ide_dump_atapi_status - print human readable atapi status
+ * @drive: drive that status applies to
+ * @msg: text message to print
+ * @stat: status byte to decode
+ *
+ * Error reporting, in human readable form (luxurious, but a memory hog).
+ */
+byte ide_dump_atapi_status (ide_drive_t *drive, const char *msg, byte stat)
+{
+ unsigned long flags;
+
+ atapi_status_t status;
+ atapi_error_t error;
+
+ status.all = stat;
+ local_irq_set(flags);
+ printk("%s: %s: status=0x%02x", drive->name, msg, stat);
+#if FANCY_STATUS_DUMPS
+ printk(" { ");
+ if (status.b.bsy)
+ printk("Busy ");
+ else {
+ if (status.b.drdy) printk("DriveReady ");
+ if (status.b.df) printk("DeviceFault ");
+ if (status.b.dsc) printk("SeekComplete ");
+ if (status.b.drq) printk("DataRequest ");
+ if (status.b.corr) printk("CorrectedError ");
+ if (status.b.idx) printk("Index ");
+ if (status.b.check) printk("Error ");
+ }
+ printk("}");
+#endif /* FANCY_STATUS_DUMPS */
+ printk("\n");
+ if ((status.all & (status.b.bsy|status.b.check)) == status.b.check) {
+ error.all = HWIF(drive)->INB(IDE_ERROR_REG);
+ printk("%s: %s: error=0x%02x", drive->name, msg, error.all);
+#if FANCY_STATUS_DUMPS
+ if (error.b.ili) printk("IllegalLengthIndication ");
+ if (error.b.eom) printk("EndOfMedia ");
+ if (error.b.abrt) printk("Aborted Command ");
+ if (error.b.mcr) printk("MediaChangeRequested ");
+ if (error.b.sense_key) printk("LastFailedSense 0x%02x ",
+ error.b.sense_key);
+#endif /* FANCY_STATUS_DUMPS */
+ printk("\n");
+ }
+ local_irq_restore(flags);
+ return error.all;
+}
+
+EXPORT_SYMBOL(ide_dump_atapi_status);
diff --git a/drivers/scsi/ide-scsi.c b/drivers/scsi/ide-scsi.c
index 000556e5b433..ba879f99a3fe 100644
--- a/drivers/scsi/ide-scsi.c
+++ b/drivers/scsi/ide-scsi.c
@@ -78,6 +78,7 @@ typedef struct idescsi_pc_s {
#define PC_DMA_IN_PROGRESS 0 /* 1 while DMA in progress */
#define PC_WRITING 1 /* Data direction */
#define PC_TRANSFORM 2 /* transform SCSI commands */
+#define PC_TIMEDOUT 3 /* command timed out */
#define PC_DMA_OK 4 /* Use DMA */
/*
@@ -307,6 +308,41 @@ static int idescsi_check_condition(ide_drive_t *drive, struct request *failed_co
return ide_do_drive_cmd(drive, rq, ide_preempt);
}
+ide_startstop_t idescsi_atapi_error (ide_drive_t *drive, const char *msg, byte stat)
+{
+ struct request *rq;
+ byte err;
+
+ err = ide_dump_atapi_status(drive, msg, stat);
+
+ if (drive == NULL || (rq = HWGROUP(drive)->rq) == NULL)
+ return ide_stopped;
+
+ if (HWIF(drive)->INB(IDE_STATUS_REG) & (BUSY_STAT|DRQ_STAT))
+ /* force an abort */
+ HWIF(drive)->OUTB(WIN_IDLEIMMEDIATE,IDE_COMMAND_REG);
+
+ rq->errors++;
+ DRIVER(drive)->end_request(drive, 0, 0);
+ return ide_stopped;
+}
+
+ide_startstop_t idescsi_atapi_abort (ide_drive_t *drive, const char *msg)
+{
+ struct request *rq;
+
+ if (drive == NULL || (rq = HWGROUP(drive)->rq) == NULL)
+ return ide_stopped;
+
+#if IDESCSI_DEBUG_LOG
+ printk(KERN_WARNING "idescsi_atapi_abort called for %lu\n",
+ ((idescsi_pc_t *) rq->special)->scsi_cmd->serial_number);
+#endif
+ rq->errors |= ERROR_MAX;
+ DRIVER(drive)->end_request(drive, 0, 0);
+ return ide_stopped;
+}
+
static int idescsi_end_request (ide_drive_t *drive, int uptodate, int nrsecs)
{
idescsi_scsi_t *scsi = drive_to_idescsi(drive);
@@ -334,7 +370,13 @@ static int idescsi_end_request (ide_drive_t *drive, int uptodate, int nrsecs)
kfree(rq);
pc = opc;
rq = pc->rq;
- pc->scsi_cmd->result = (CHECK_CONDITION << 1) | (DID_OK << 16);
+ pc->scsi_cmd->result = (CHECK_CONDITION << 1) |
+ ((test_bit(PC_TIMEDOUT, &pc->flags)?DID_TIME_OUT:DID_OK) << 16);
+ } else if (test_bit(PC_TIMEDOUT, &pc->flags)) {
+ if (log)
+ printk (KERN_WARNING "ide-scsi: %s: timed out for %lu\n",
+ drive->name, pc->scsi_cmd->serial_number);
+ pc->scsi_cmd->result = DID_TIME_OUT << 16;
} else if (rq->errors >= ERROR_MAX) {
pc->scsi_cmd->result = DID_ERROR << 16;
if (log)
@@ -374,6 +416,19 @@ static inline unsigned long get_timeout(idescsi_pc_t *pc)
return IDE_MAX(WAIT_CMD, pc->timeout - jiffies);
}
+static int idescsi_expiry(ide_drive_t *drive)
+{
+ idescsi_scsi_t *scsi = drive->driver_data;
+ idescsi_pc_t *pc = scsi->pc;
+
+#if IDESCSI_DEBUG_LOG
+ printk(KERN_WARNING "idescsi_expiry called for %lu at %lu\n", pc->scsi_cmd->serial_number, jiffies);
+#endif
+ set_bit(PC_TIMEDOUT, &pc->flags);
+
+ return 0; /* we do not want the ide subsystem to retry */
+}
+
/*
* Our interrupt handler.
*/
@@ -393,6 +448,15 @@ static ide_startstop_t idescsi_pc_intr (ide_drive_t *drive)
printk (KERN_INFO "ide-scsi: Reached idescsi_pc_intr interrupt handler\n");
#endif /* IDESCSI_DEBUG_LOG */
+ if (test_bit(PC_TIMEDOUT, &pc->flags)){
+#if IDESCSI_DEBUG_LOG
+ printk(KERN_WARNING "idescsi_pc_intr: got timed out packet %lu at %lu\n",
+ pc->scsi_cmd->serial_number, jiffies);
+#endif
+ /* end this request now - scsi should retry it*/
+ idescsi_end_request (drive, 1, 0);
+ return ide_stopped;
+ }
if (test_and_clear_bit (PC_DMA_IN_PROGRESS, &pc->flags)) {
#if IDESCSI_DEBUG_LOG
printk ("ide-scsi: %s: DMA complete\n", drive->name);
@@ -442,7 +506,7 @@ static ide_startstop_t idescsi_pc_intr (ide_drive_t *drive)
pc->actually_transferred += temp;
pc->current_position += temp;
idescsi_discard_data(drive, bcount.all - temp);
- ide_set_handler(drive, &idescsi_pc_intr, get_timeout(pc), NULL);
+ ide_set_handler(drive, &idescsi_pc_intr, get_timeout(pc), idescsi_expiry);
return ide_started;
}
#if IDESCSI_DEBUG_LOG
@@ -467,7 +531,8 @@ static ide_startstop_t idescsi_pc_intr (ide_drive_t *drive)
pc->actually_transferred += bcount.all;
pc->current_position += bcount.all;
- ide_set_handler(drive, &idescsi_pc_intr, get_timeout(pc), NULL); /* And set the interrupt handler again */
+ /* And set the interrupt handler again */
+ ide_set_handler(drive, &idescsi_pc_intr, get_timeout(pc), idescsi_expiry);
return ide_started;
}
@@ -492,7 +557,7 @@ static ide_startstop_t idescsi_transfer_pc(ide_drive_t *drive)
if (HWGROUP(drive)->handler != NULL)
BUG();
/* Set the interrupt routine */
- ide_set_handler(drive, &idescsi_pc_intr, get_timeout(pc), NULL);
+ ide_set_handler(drive, &idescsi_pc_intr, get_timeout(pc), idescsi_expiry);
/* Send the actual packet */
atapi_output_bytes(drive, scsi->pc->c, 12);
if (test_bit (PC_DMA_OK, &pc->flags)) {
@@ -540,7 +605,7 @@ static ide_startstop_t idescsi_issue_pc (ide_drive_t *drive, idescsi_pc_t *pc)
if (HWGROUP(drive)->handler != NULL)
BUG();
ide_set_handler(drive, &idescsi_transfer_pc,
- get_timeout(pc), NULL);
+ get_timeout(pc), idescsi_expiry);
/* Issue the packet command */
HWIF(drive)->OUTB(WIN_PACKETCMD, IDE_COMMAND_REG);
return ide_started;
@@ -633,6 +698,8 @@ static ide_driver_t idescsi_driver = {
.cleanup = idescsi_cleanup,
.do_request = idescsi_do_request,
.end_request = idescsi_end_request,
+ .error = idescsi_atapi_error,
+ .abort = idescsi_atapi_abort,
.drives = LIST_HEAD_INIT(idescsi_driver.drives),
};
@@ -852,66 +919,132 @@ abort:
return 1;
}
-static int idescsi_abort (Scsi_Cmnd *cmd)
+static int idescsi_eh_abort (Scsi_Cmnd *cmd)
{
- int countdown = 8;
- unsigned long flags;
- idescsi_scsi_t *scsi = scsihost_to_idescsi(cmd->device->host);
- ide_drive_t *drive = scsi->drive;
+ idescsi_scsi_t *scsi = scsihost_to_idescsi(cmd->device->host);
+ ide_drive_t *drive = scsi->drive;
+ int busy;
+ int ret = FAILED;
- printk (KERN_ERR "ide-scsi: abort called for %lu\n", cmd->serial_number);
- while (countdown--) {
- /* is cmd active?
- * need to lock so this stuff doesn't change under us */
- spin_lock_irqsave(&ide_lock, flags);
- if (scsi->pc && scsi->pc->scsi_cmd &&
- scsi->pc->scsi_cmd->serial_number == cmd->serial_number) {
- /* yep - let's give it some more time -
- * we can do that, we're in _our_ error kernel thread */
- spin_unlock_irqrestore(&ide_lock, flags);
- scsi_sleep(HZ);
- continue;
- }
- /* no, but is it queued in the ide subsystem? */
- if (elv_queue_empty(drive->queue)) {
- spin_unlock_irqrestore(&ide_lock, flags);
- return SUCCESS;
- }
- spin_unlock_irqrestore(&ide_lock, flags);
- schedule_timeout(HZ/10);
+ /* In idescsi_eh_abort we try to gently pry our command from the ide subsystem */
+
+ if (test_bit(IDESCSI_LOG_CMD, &scsi->log))
+ printk (KERN_WARNING "ide-scsi: abort called for %lu\n", cmd->serial_number);
+
+ if (!drive) {
+ printk (KERN_WARNING "ide-scsi: Drive not set in idescsi_eh_abort\n");
+ WARN_ON(1);
+ goto no_drive;
}
- return FAILED;
+
+ /* First give it some more time, how much is "right" is hard to say :-( */
+
+ busy = ide_wait_not_busy(HWIF(drive), 100); /* FIXME - uses mdelay which causes latency? */
+ if (test_bit(IDESCSI_LOG_CMD, &scsi->log))
+ printk (KERN_WARNING "ide-scsi: drive did%s become ready\n", busy?" not":"");
+
+ spin_lock_irq(&ide_lock);
+
+ /* If there is no pc running we're done (our interrupt took care of it) */
+ if (!scsi->pc) {
+ ret = SUCCESS;
+ goto ide_unlock;
+ }
+
+ /* It's somewhere in flight. Does ide subsystem agree? */
+ if (scsi->pc->scsi_cmd->serial_number == cmd->serial_number && !busy &&
+ elv_queue_empty(drive->queue) && HWGROUP(drive)->rq != scsi->pc->rq) {
+ /*
+ * FIXME - not sure this condition can ever occur
+ */
+ printk (KERN_ERR "ide-scsi: cmd aborted!\n");
+
+ idescsi_free_bio(scsi->pc->rq->bio);
+ if (scsi->pc->rq->flags & REQ_SENSE)
+ kfree(scsi->pc->buffer);
+ kfree(scsi->pc->rq);
+ kfree(scsi->pc);
+ scsi->pc = NULL;
+
+ ret = SUCCESS;
+ }
+
+ide_unlock:
+ spin_unlock_irq(&ide_lock);
+no_drive:
+ if (test_bit(IDESCSI_LOG_CMD, &scsi->log))
+ printk (KERN_WARNING "ide-scsi: abort returns %s\n", ret == SUCCESS?"success":"failed");
+
+ return ret;
}
-static int idescsi_reset (Scsi_Cmnd *cmd)
+static int idescsi_eh_reset (Scsi_Cmnd *cmd)
{
- unsigned long flags;
struct request *req;
- idescsi_scsi_t *idescsi = scsihost_to_idescsi(cmd->device->host);
- ide_drive_t *drive = idescsi->drive;
+ idescsi_scsi_t *scsi = scsihost_to_idescsi(cmd->device->host);
+ ide_drive_t *drive = scsi->drive;
+ int ready = 0;
+ int ret = SUCCESS;
+
+ /* In idescsi_eh_reset we forcefully remove the command from the ide subsystem and reset the device. */
- printk (KERN_ERR "ide-scsi: reset called for %lu\n", cmd->serial_number);
- /* first null the handler for the drive and let any process
- * doing IO (on another CPU) run to (partial) completion
- * the lock prevents processing new requests */
- spin_lock_irqsave(&ide_lock, flags);
- while (HWGROUP(drive)->handler) {
- HWGROUP(drive)->handler = NULL;
- schedule_timeout(1);
+ if (test_bit(IDESCSI_LOG_CMD, &scsi->log))
+ printk (KERN_WARNING "ide-scsi: reset called for %lu\n", cmd->serial_number);
+
+ if (!drive) {
+ printk (KERN_WARNING "ide-scsi: Drive not set in idescsi_eh_reset\n");
+ WARN_ON(1);
+ return FAILED;
}
+
+ spin_lock_irq(&ide_lock);
+
+ if (!scsi->pc || (req = scsi->pc->rq) != HWGROUP(drive)->rq || !HWGROUP(drive)->handler) {
+ printk (KERN_WARNING "ide-scsi: No active request in idescsi_eh_reset\n");
+ spin_unlock(&ide_lock);
+ return FAILED;
+ }
+
+ /* kill current request */
+ blkdev_dequeue_request(req);
+ end_that_request_last(req);
+ idescsi_free_bio(req->bio);
+ if (req->flags & REQ_SENSE)
+ kfree(scsi->pc->buffer);
+ kfree(scsi->pc);
+ scsi->pc = NULL;
+ kfree(req);
+
/* now nuke the drive queue */
while ((req = elv_next_request(drive->queue))) {
blkdev_dequeue_request(req);
end_that_request_last(req);
}
- /* FIXME - this will probably leak memory */
+
HWGROUP(drive)->rq = NULL;
- if (drive_to_idescsi(drive))
- drive_to_idescsi(drive)->pc = NULL;
- spin_unlock_irqrestore(&ide_lock, flags);
- /* finally, reset the drive (and its partner on the bus...) */
- ide_do_reset (drive);
- return SUCCESS;
+ HWGROUP(drive)->handler = NULL;
+ HWGROUP(drive)->busy = 1; /* will set this to zero when ide reset finished */
+ spin_unlock_irq(&ide_lock);
+
+ ide_do_reset(drive);
+
+ /* ide_do_reset starts a polling handler which restarts itself every 50ms until the reset finishes */
+
+ do {
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ spin_unlock_irq(cmd->device->host->host_lock);
+ schedule_timeout(HZ/20);
+ spin_lock_irq(cmd->device->host->host_lock);
+ } while ( HWGROUP(drive)->handler );
+
+ ready = drive_is_ready(drive);
+ HWGROUP(drive)->busy--;
+ if (!ready) {
+ printk (KERN_ERR "ide-scsi: reset failed!\n");
+ ret = FAILED;
+ }
+
+ return ret;
}
static int idescsi_bios(struct scsi_device *sdev, struct block_device *bdev,
@@ -935,8 +1068,8 @@ static Scsi_Host_Template idescsi_template = {
.slave_configure = idescsi_slave_configure,
.ioctl = idescsi_ioctl,
.queuecommand = idescsi_queue,
- .eh_abort_handler = idescsi_abort,
- .eh_device_reset_handler = idescsi_reset,
+ .eh_abort_handler = idescsi_eh_abort,
+ .eh_host_reset_handler = idescsi_eh_reset,
.bios_param = idescsi_bios,
.can_queue = 40,
.this_id = -1,
diff --git a/include/linux/ide.h b/include/linux/ide.h
index ccb9368c6efd..1ad87c679105 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -1612,6 +1612,7 @@ extern int ide_dma_enable(ide_drive_t *drive);
extern char *ide_xfer_verbose(u8 xfer_rate);
extern void ide_toggle_bounce(ide_drive_t *drive, int on);
extern int ide_set_xfer_rate(ide_drive_t *drive, u8 rate);
+extern byte ide_dump_atapi_status(ide_drive_t *drive, const char *msg, byte stat);
typedef struct ide_pio_timings_s {
int setup_time; /* Address setup (ns) minimum */