If we see a RF_RECON_WRITE_ERROR event we know a write has finished and

we need to account for that.  Failure to do so means we can end up
waiting forever for writes we think are outstanding, but which have
already completed.

Addresses the RAIDframe part of PR#40569.  Thanks to Matthias Scheler
for reporting the issue and verifying the fix.
This commit is contained in:
oster 2009-02-11 23:54:10 +00:00
parent cfd7bc0451
commit f17e8d67c4
1 changed files with 13 additions and 4 deletions

View File

@ -1,4 +1,4 @@
/* $NetBSD: rf_reconstruct.c,v 1.106 2008/12/20 17:04:51 oster Exp $ */
/* $NetBSD: rf_reconstruct.c,v 1.107 2009/02/11 23:54:10 oster Exp $ */
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
@ -33,7 +33,7 @@
************************************************************/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: rf_reconstruct.c,v 1.106 2008/12/20 17:04:51 oster Exp $");
__KERNEL_RCSID(0, "$NetBSD: rf_reconstruct.c,v 1.107 2009/02/11 23:54:10 oster Exp $");
#include <sys/param.h>
#include <sys/time.h>
@ -676,8 +676,10 @@ rf_ContinueReconstructFailedDisk(RF_RaidReconDesc_t *reconDesc)
done dealing with the reads that are
finished, we don't want to wait for any
writes */
if (status == RF_RECON_WRITE_ERROR)
if (status == RF_RECON_WRITE_ERROR) {
write_error = 1;
num_writes++;
}
} else if (status == RF_RECON_READ_STOPPED) {
/* count this component as being "done" */
@ -718,12 +720,13 @@ rf_ContinueReconstructFailedDisk(RF_RaidReconDesc_t *reconDesc)
status = ProcessReconEvent(raidPtr, event);
if (status == RF_RECON_WRITE_ERROR) {
num_writes++;
recon_error = 1;
raidPtr->reconControl->error = 1;
/* an error was encountered at the very end... bail */
} else if (status == RF_RECON_WRITE_DONE) {
num_writes++;
}
} /* else it's something else, and we don't care */
}
if (recon_error ||
(raidPtr->reconControl->lastPSID == lastPSID)) {
@ -1054,6 +1057,12 @@ ProcessReconEvent(RF_Raid_t *raidPtr, RF_ReconEvent_t *event)
case RF_REVENT_WRITE_FAILED:
retcode = RF_RECON_WRITE_ERROR;
/* This is an error, but it was a pending write.
Account for it. */
RF_LOCK_MUTEX(raidPtr->reconControl->rb_mutex);
raidPtr->reconControl->pending_writes--;
RF_UNLOCK_MUTEX(raidPtr->reconControl->rb_mutex);
rbuf = (RF_ReconBuffer_t *) event->arg;
/* cleanup the disk queue data */