Vastly improve the error handling in the case of a read/write error

that occurs during a reconstruction.  We go from zero error handling
and likely panicing if something goes amiss, to gracefully bailing and
leaving the system in the best, usable state possible.

- introduce rf_DrainReconEventQueue() to allow easy cleaning of the
reconstruction event queue

- change how we cleanup the floating recon buffers in
rf_FreeReconControl().  Detect the end of the list rather
than traversing according to a count.

- keep track of the number of pending reconstruction writes.  In the
event of a read error, use this to wait long enough for the pending
writes to (hopefully) drain.

- more cleanup is still needed on this code, but I didn't want to
start mixing major functional changes with minor cleanups.

XXX: There is a known issue with pool items left outstanding due to
the IO failure, and this can show up in the form of a panic at the
tail end of a shutdown.  This problem is much less severe than before
these changes, and the hope/plan is that this problem will go away
once this code gets overhauled again.
This commit is contained in:
oster 2005-02-05 23:32:43 +00:00
parent 1205cb9e5b
commit c38bce14f6
5 changed files with 308 additions and 57 deletions

View File

@ -1,4 +1,4 @@
/* $NetBSD: rf_reconstruct.c,v 1.81 2005/01/22 02:24:31 oster Exp $ */
/* $NetBSD: rf_reconstruct.c,v 1.82 2005/02/05 23:32:43 oster Exp $ */
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
@ -33,7 +33,7 @@
************************************************************/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: rf_reconstruct.c,v 1.81 2005/01/22 02:24:31 oster Exp $");
__KERNEL_RCSID(0, "$NetBSD: rf_reconstruct.c,v 1.82 2005/02/05 23:32:43 oster Exp $");
#include <sys/time.h>
#include <sys/buf.h>
@ -94,6 +94,11 @@ __KERNEL_RCSID(0, "$NetBSD: rf_reconstruct.c,v 1.81 2005/01/22 02:24:31 oster Ex
#endif /* RF_DEBUG_RECON */
#define RF_RECON_DONE_READS 1
#define RF_RECON_READ_ERROR 2
#define RF_RECON_WRITE_ERROR 3
#define RF_RECON_READ_STOPPED 4
#define RF_MAX_FREE_RECONBUFFER 32
#define RF_MIN_FREE_RECONBUFFER 16
@ -320,6 +325,16 @@ rf_ReconstructFailedDiskBasic(RF_Raid_t *raidPtr, RF_RowCol_t col)
rf_update_component_labels(raidPtr,
RF_NORMAL_COMPONENT_UPDATE);
} else {
/* Reconstruct failed. */
RF_LOCK_MUTEX(raidPtr->mutex);
/* Failed disk goes back to "failed" status */
raidPtr->Disks[col].status = rf_ds_failed;
/* Spare disk goes back to "spare" status. */
spareDiskPtr->status = rf_ds_spare;
RF_UNLOCK_MUTEX(raidPtr->mutex);
}
return (rc);
}
@ -496,10 +511,6 @@ rf_ReconstructInPlace(RF_Raid_t *raidPtr, RF_RowCol_t col)
reconDesc->maxReconExecTicks = 0;
rc = rf_ContinueReconstructFailedDisk(reconDesc);
RF_LOCK_MUTEX(raidPtr->mutex);
raidPtr->reconInProgress--;
RF_UNLOCK_MUTEX(raidPtr->mutex);
if (!rc) {
RF_LOCK_MUTEX(raidPtr->mutex);
/* Need to set these here, as at this point it'll be claiming
@ -536,8 +547,18 @@ rf_ReconstructInPlace(RF_Raid_t *raidPtr, RF_RowCol_t col)
rf_update_component_labels(raidPtr,
RF_NORMAL_COMPONENT_UPDATE);
} else {
/* Reconstruct-in-place failed. Disk goes back to
"failed" status, regardless of what it was before. */
RF_LOCK_MUTEX(raidPtr->mutex);
raidPtr->Disks[col].status = rf_ds_failed;
RF_UNLOCK_MUTEX(raidPtr->mutex);
}
RF_LOCK_MUTEX(raidPtr->mutex);
raidPtr->reconInProgress--;
RF_UNLOCK_MUTEX(raidPtr->mutex);
RF_SIGNAL_COND(raidPtr->waitForReconCond);
return (rc);
}
@ -552,9 +573,12 @@ rf_ContinueReconstructFailedDisk(RF_RaidReconDesc_t *reconDesc)
RF_ReconMap_t *mapPtr;
RF_ReconCtrl_t *tmp_reconctrl;
RF_ReconEvent_t *event;
RF_CallbackDesc_t *p;
struct timeval etime, elpsd;
unsigned long xor_s, xor_resid_us;
int i, ds;
int status;
int recon_error, write_error;
raidPtr->accumXorTimeUs = 0;
#if RF_ACC_TRACE > 0
@ -609,19 +633,65 @@ rf_ContinueReconstructFailedDisk(RF_RaidReconDesc_t *reconDesc)
* they've completed all work */
mapPtr = raidPtr->reconControl->reconMap;
recon_error = 0;
write_error = 0;
while (reconDesc->numDisksDone < raidPtr->numCol - 1) {
event = rf_GetNextReconEvent(reconDesc);
RF_ASSERT(event);
if (ProcessReconEvent(raidPtr, event))
status = ProcessReconEvent(raidPtr, event);
/* the normal case is that a read completes, and all is well. */
if (status == RF_RECON_DONE_READS) {
reconDesc->numDisksDone++;
} else if ((status == RF_RECON_READ_ERROR) ||
(status == RF_RECON_WRITE_ERROR)) {
/* an error was encountered while reconstructing...
Pretend we've finished this disk.
*/
recon_error = 1;
raidPtr->reconControl->error = 1;
/* bump the numDisksDone count for reads,
but not for writes */
if (status == RF_RECON_READ_ERROR)
reconDesc->numDisksDone++;
/* write errors are special -- when we are
done dealing with the reads that are
finished, we don't want to wait for any
writes */
if (status == RF_RECON_WRITE_ERROR)
write_error = 1;
} else if (status == RF_RECON_READ_STOPPED) {
/* count this component as being "done" */
reconDesc->numDisksDone++;
}
if (recon_error) {
/* make sure any stragglers are woken up so that
their theads will complete, and we can get out
of here with all IO processed */
while (raidPtr->reconControl->headSepCBList) {
p = raidPtr->reconControl->headSepCBList;
raidPtr->reconControl->headSepCBList = p->next;
p->next = NULL;
rf_CauseReconEvent(raidPtr, p->col, NULL, RF_REVENT_HEADSEPCLEAR);
rf_FreeCallbackDesc(p);
}
}
raidPtr->reconControl->numRUsTotal =
mapPtr->totalRUs;
raidPtr->reconControl->numRUsComplete =
mapPtr->totalRUs -
rf_UnitsLeftToReconstruct(mapPtr);
#if RF_DEBUG_RECON
raidPtr->reconControl->percentComplete =
(raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal);
@ -637,19 +707,107 @@ rf_ContinueReconstructFailedDisk(RF_RaidReconDesc_t *reconDesc)
}
/* at this point all the reads have completed. We now wait
* for any pending writes to complete, and then we're done */
while (rf_UnitsLeftToReconstruct(raidPtr->reconControl->reconMap) > 0) {
while (!recon_error && rf_UnitsLeftToReconstruct(raidPtr->reconControl->reconMap) > 0) {
event = rf_GetNextReconEvent(reconDesc);
RF_ASSERT(event);
(void) ProcessReconEvent(raidPtr, event); /* ignore return code */
status = ProcessReconEvent(raidPtr, event);
if (status == RF_RECON_WRITE_ERROR) {
recon_error = 1;
raidPtr->reconControl->error = 1;
/* an error was encountered at the very end... bail */
} else {
#if RF_DEBUG_RECON
raidPtr->reconControl->percentComplete = 100 - (rf_UnitsLeftToReconstruct(mapPtr) * 100 / mapPtr->totalRUs);
if (rf_prReconSched) {
rf_PrintReconSchedule(raidPtr->reconControl->reconMap, &(raidPtr->reconControl->starttime));
}
raidPtr->reconControl->percentComplete = 100 - (rf_UnitsLeftToReconstruct(mapPtr) * 100 / mapPtr->totalRUs);
if (rf_prReconSched) {
rf_PrintReconSchedule(raidPtr->reconControl->reconMap, &(raidPtr->reconControl->starttime));
}
#endif
}
}
if (recon_error) {
/* we've encountered an error in reconstructing. */
printf("raid%d: reconstruction failed.\n", raidPtr->raidid);
/* we start by blocking IO to the RAID set. */
rf_SuspendNewRequestsAndWait(raidPtr);
RF_LOCK_MUTEX(raidPtr->mutex);
/* mark set as being degraded, rather than
rf_rs_reconstructing as we were before the problem.
After this is done we can update status of the
component disks without worrying about someone
trying to read from a failed component.
*/
raidPtr->status = rf_rs_degraded;
RF_UNLOCK_MUTEX(raidPtr->mutex);
/* resume IO */
rf_ResumeNewRequests(raidPtr);
/* At this point there are two cases:
1) If we've experienced a read error, then we've
already waited for all the reads we're going to get,
and we just need to wait for the writes.
2) If we've experienced a write error, we've also
already waited for all the reads to complete,
but there is little point in waiting for the writes --
when they do complete, they will just be ignored.
So we just wait for writes to complete if we didn't have a
write error.
*/
if (!write_error) {
/* wait for writes to complete */
while (raidPtr->reconControl->pending_writes > 0) {
event = rf_GetNextReconEvent(reconDesc);
status = ProcessReconEvent(raidPtr, event);
if (status == RF_RECON_WRITE_ERROR) {
raidPtr->reconControl->error = 1;
/* an error was encountered at the very end... bail.
This will be very bad news for the user, since
at this point there will have been a read error
on one component, and a write error on another!
*/
break;
}
}
}
/* cleanup */
/* drain the event queue - after waiting for the writes above,
there shouldn't be much (if anything!) left in the queue. */
rf_DrainReconEventQueue(reconDesc);
/* XXX As much as we'd like to free the recon control structure
and the reconDesc, we have no way of knowing if/when those will
be touched by IO that has yet to occur. It is rather poor to be
basically causing a 'memory leak' here, but there doesn't seem to be
a cleaner alternative at this time. Perhaps when the reconstruct code
gets a makeover this problem will go away.
*/
#if 0
rf_FreeReconControl(raidPtr);
#endif
#if RF_ACC_TRACE > 0
RF_Free(raidPtr->recon_tracerecs, raidPtr->numCol * sizeof(RF_AccTraceEntry_t));
#endif
/* XXX see comment above */
#if 0
FreeReconDesc(reconDesc);
#endif
return (1);
}
/* Success: mark the dead disk as reconstructed. We quiesce
@ -683,7 +841,6 @@ rf_ContinueReconstructFailedDisk(RF_RaidReconDesc_t *reconDesc)
(int) raidPtr->reconControl->starttime.tv_sec,
(int) raidPtr->reconControl->starttime.tv_usec,
(int) etime.tv_sec, (int) etime.tv_usec);
#if RF_RECON_STATS > 0
printf("raid%d: Total head-sep stall count was %d\n",
raidPtr->raidid, (int) reconDesc->hsStallCount);
@ -695,11 +852,10 @@ rf_ContinueReconstructFailedDisk(RF_RaidReconDesc_t *reconDesc)
FreeReconDesc(reconDesc);
return (0);
}
/*****************************************************************************
* do the right thing upon each reconstruction event.
* returns nonzero if and only if there is nothing left unread on the
* indicated disk
*****************************************************************************/
static int
ProcessReconEvent(RF_Raid_t *raidPtr, RF_ReconEvent_t *event)
@ -708,6 +864,8 @@ ProcessReconEvent(RF_Raid_t *raidPtr, RF_ReconEvent_t *event)
RF_ReconBuffer_t *rbuf;
RF_SectorCount_t sectorsPerRU;
retcode = RF_RECON_READ_STOPPED;
Dprintf1("RECON: ProcessReconEvent type %d\n", event->type);
switch (event->type) {
@ -720,10 +878,12 @@ ProcessReconEvent(RF_Raid_t *raidPtr, RF_ReconEvent_t *event)
rbuf->parityStripeID, rbuf->buffer, rbuf->buffer[0] & 0xff, rbuf->buffer[1] & 0xff,
rbuf->buffer[2] & 0xff, rbuf->buffer[3] & 0xff, rbuf->buffer[4] & 0xff);
rf_FreeDiskQueueData((RF_DiskQueueData_t *) rbuf->arg);
submitblocked = rf_SubmitReconBuffer(rbuf, 0, 0);
Dprintf1("RECON: submitblocked=%d\n", submitblocked);
if (!submitblocked)
retcode = IssueNextReadRequest(raidPtr, event->col);
if (!raidPtr->reconControl->error) {
submitblocked = rf_SubmitReconBuffer(rbuf, 0, 0);
Dprintf1("RECON: submitblocked=%d\n", submitblocked);
if (!submitblocked)
retcode = IssueNextReadRequest(raidPtr, event->col);
}
break;
/* a write I/O has completed */
@ -742,6 +902,10 @@ ProcessReconEvent(RF_Raid_t *raidPtr, RF_ReconEvent_t *event)
rbuf->failedDiskSectorOffset, rbuf->failedDiskSectorOffset + sectorsPerRU - 1);
rf_RemoveFromActiveReconTable(raidPtr, rbuf->parityStripeID, rbuf->which_ru);
RF_LOCK_MUTEX(raidPtr->reconControl->rb_mutex);
raidPtr->reconControl->pending_writes--;
RF_UNLOCK_MUTEX(raidPtr->reconControl->rb_mutex);
if (rbuf->type == RF_RBUF_TYPE_FLOATING) {
RF_LOCK_MUTEX(raidPtr->reconControl->rb_mutex);
while(raidPtr->reconControl->rb_lock) {
@ -763,47 +927,59 @@ ProcessReconEvent(RF_Raid_t *raidPtr, RF_ReconEvent_t *event)
rf_FreeReconBuffer(rbuf);
else
RF_ASSERT(0);
retcode = 0;
break;
case RF_REVENT_BUFCLEAR: /* A buffer-stall condition has been
* cleared */
Dprintf1("RECON: BUFCLEAR EVENT: col %d\n", event->col);
submitblocked = rf_SubmitReconBuffer(raidPtr->reconControl->perDiskInfo[event->col].rbuf, 0, (int) (long) event->arg);
RF_ASSERT(!submitblocked); /* we wouldn't have gotten the
* BUFCLEAR event if we
* couldn't submit */
retcode = IssueNextReadRequest(raidPtr, event->col);
if (!raidPtr->reconControl->error) {
submitblocked = rf_SubmitReconBuffer(raidPtr->reconControl->perDiskInfo[event->col].rbuf,
0, (int) (long) event->arg);
RF_ASSERT(!submitblocked); /* we wouldn't have gotten the
* BUFCLEAR event if we
* couldn't submit */
retcode = IssueNextReadRequest(raidPtr, event->col);
}
break;
case RF_REVENT_BLOCKCLEAR: /* A user-write reconstruction
* blockage has been cleared */
DDprintf1("RECON: BLOCKCLEAR EVENT: col %d\n", event->col);
retcode = TryToRead(raidPtr, event->col);
if (!raidPtr->reconControl->error) {
retcode = TryToRead(raidPtr, event->col);
}
break;
case RF_REVENT_HEADSEPCLEAR: /* A max-head-separation
* reconstruction blockage has been
* cleared */
Dprintf1("RECON: HEADSEPCLEAR EVENT: col %d\n", event->col);
retcode = TryToRead(raidPtr, event->col);
if (!raidPtr->reconControl->error) {
retcode = TryToRead(raidPtr, event->col);
}
break;
/* a buffer has become ready to write */
case RF_REVENT_BUFREADY:
Dprintf1("RECON: BUFREADY EVENT: col %d\n", event->col);
retcode = IssueNextWriteRequest(raidPtr);
if (!raidPtr->reconControl->error) {
retcode = IssueNextWriteRequest(raidPtr);
#if RF_DEBUG_RECON
if (rf_floatingRbufDebug) {
rf_CheckFloatingRbufCount(raidPtr, 1);
}
if (rf_floatingRbufDebug) {
rf_CheckFloatingRbufCount(raidPtr, 1);
}
#endif
}
break;
/* we need to skip the current RU entirely because it got
* recon'd while we were waiting for something else to happen */
case RF_REVENT_SKIP:
DDprintf1("RECON: SKIP EVENT: col %d\n", event->col);
retcode = IssueNextReadRequest(raidPtr, event->col);
if (!raidPtr->reconControl->error) {
retcode = IssueNextReadRequest(raidPtr, event->col);
}
break;
/* a forced-reconstruction read access has completed. Just
@ -812,21 +988,38 @@ ProcessReconEvent(RF_Raid_t *raidPtr, RF_ReconEvent_t *event)
rbuf = (RF_ReconBuffer_t *) event->arg;
rf_FreeDiskQueueData((RF_DiskQueueData_t *) rbuf->arg);
DDprintf1("RECON: FORCEDREADDONE EVENT: col %d\n", event->col);
submitblocked = rf_SubmitReconBuffer(rbuf, 1, 0);
RF_ASSERT(!submitblocked);
if (!raidPtr->reconControl->error) {
submitblocked = rf_SubmitReconBuffer(rbuf, 1, 0);
RF_ASSERT(!submitblocked);
}
break;
/* A read I/O failed to complete */
case RF_REVENT_READ_FAILED:
/* fallthru to panic... */
retcode = RF_RECON_READ_ERROR;
break;
/* A write I/O failed to complete */
case RF_REVENT_WRITE_FAILED:
/* fallthru to panic... */
retcode = RF_RECON_WRITE_ERROR;
rbuf = (RF_ReconBuffer_t *) event->arg;
/* cleanup the disk queue data */
rf_FreeDiskQueueData((RF_DiskQueueData_t *) rbuf->arg);
/* At this point we're erroring out, badly, and floatingRbufs
may not even be valid. Rather than putting this back onto
the floatingRbufs list, just arrange for its immediate
destruction.
*/
rf_FreeReconBuffer(rbuf);
break;
/* a forced read I/O failed to complete */
case RF_REVENT_FORCEDREAD_FAILED:
/* fallthru to panic... */
retcode = RF_RECON_READ_ERROR;
break;
default:
RF_PANIC();
@ -851,9 +1044,6 @@ ProcessReconEvent(RF_Raid_t *raidPtr, RF_ReconEvent_t *event)
* accessed. This allows us to easily retry when we're blocked by
* head separation or reconstruction-blockage events.
*
* returns nonzero if and only if there is nothing left unread on the
* indicated disk
*
*****************************************************************************/
static int
IssueNextReadRequest(RF_Raid_t *raidPtr, RF_RowCol_t col)
@ -883,7 +1073,7 @@ IssueNextReadRequest(RF_Raid_t *raidPtr, RF_RowCol_t col)
* parity stripe id */
if (ctrl->curPSID >= raidPtr->reconControl->lastPSID) {
CheckForNewMinHeadSep(raidPtr, ++(ctrl->headSepCounter));
return (1); /* finito! */
return (RF_RECON_DONE_READS); /* finito! */
}
/* find the disk offsets of the start of the parity
* stripe on both the current disk and the failed
@ -1199,6 +1389,9 @@ IssueNextWriteRequest(RF_Raid_t *raidPtr)
RF_ASSERT(req); /* XXX -- fix this -- XXX */
rbuf->arg = (void *) req;
RF_LOCK_MUTEX(raidPtr->reconControl->rb_mutex);
raidPtr->reconControl->pending_writes++;
RF_UNLOCK_MUTEX(raidPtr->reconControl->rb_mutex);
rf_DiskIOEnqueue(&raidPtr->Queues[rbuf->spCol], req, RF_IO_RECON_PRIORITY);
return (0);
@ -1216,7 +1409,16 @@ static int
ReconReadDoneProc(void *arg, int status)
{
RF_PerDiskReconCtrl_t *ctrl = (RF_PerDiskReconCtrl_t *) arg;
RF_Raid_t *raidPtr = ctrl->reconCtrl->reconDesc->raidPtr;
RF_Raid_t *raidPtr;
/* Detect that reconCtrl is no longer valid, and if that
is the case, bail without calling rf_CauseReconEvent().
There won't be anyone listening for this event anyway */
if (ctrl->reconCtrl == NULL)
return(0);
raidPtr = ctrl->reconCtrl->reconDesc->raidPtr;
if (status) {
printf("raid%d: Recon read failed!\n", raidPtr->raidid);
@ -1243,6 +1445,13 @@ ReconWriteDoneProc(void *arg, int status)
{
RF_ReconBuffer_t *rbuf = (RF_ReconBuffer_t *) arg;
/* Detect that reconControl is no longer valid, and if that
is the case, bail without calling rf_CauseReconEvent().
There won't be anyone listening for this event anyway */
if (rbuf->raidPtr->reconControl == NULL)
return(0);
Dprintf2("Reconstruction completed on psid %ld ru %d\n", rbuf->parityStripeID, rbuf->which_ru);
if (status) {
printf("raid%d: Recon write failed!\n", rbuf->raidPtr->raidid);
@ -1541,6 +1750,13 @@ ForceReconReadDoneProc(void *arg, int status)
{
RF_ReconBuffer_t *rbuf = arg;
/* Detect that reconControl is no longer valid, and if that
is the case, bail without calling rf_CauseReconEvent().
There won't be anyone listening for this event anyway */
if (rbuf->raidPtr->reconControl == NULL)
return;
if (status) {
printf("raid%d: Forced recon read failed!\n", rbuf->raidPtr->raidid);
rf_CauseReconEvent(rbuf->raidPtr, rbuf->col, (void *) rbuf, RF_REVENT_FORCEDREAD_FAILED);

View File

@ -1,4 +1,4 @@
/* $NetBSD: rf_reconstruct.h,v 1.18 2004/11/15 17:16:28 oster Exp $ */
/* $NetBSD: rf_reconstruct.h,v 1.19 2005/02/05 23:32:44 oster Exp $ */
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
@ -129,6 +129,10 @@ struct RF_ReconCtrl_s {
int percentComplete;/* percentage completion of reconstruction */
int numRUsComplete; /* number of Reconstruction Units done */
int numRUsTotal; /* total number of Reconstruction Units */
int error; /* non-0 indicates that an error has
occured during reconstruction, and
the reconstruction is in the process of
bailing out. */
/* reconstruction event queue */
RF_ReconEvent_t *eventQueue; /* queue of pending reconstruction
@ -143,6 +147,8 @@ struct RF_ReconCtrl_s {
int rb_lock; /* 1 if someone is mucking
with recon buffers,
0 otherwise */
int pending_writes; /* number of writes which
have not completed */
RF_ReconBuffer_t *floatingRbufs; /* available floating
* reconstruction buffers */
RF_ReconBuffer_t *committedRbufs; /* recon buffers that have

View File

@ -1,4 +1,4 @@
/* $NetBSD: rf_reconutil.c,v 1.23 2004/03/18 16:54:54 oster Exp $ */
/* $NetBSD: rf_reconutil.c,v 1.24 2005/02/05 23:32:44 oster Exp $ */
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
@ -31,7 +31,7 @@
********************************************/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: rf_reconutil.c,v 1.23 2004/03/18 16:54:54 oster Exp $");
__KERNEL_RCSID(0, "$NetBSD: rf_reconutil.c,v 1.24 2005/02/05 23:32:44 oster Exp $");
#include <dev/raidframe/raidframevar.h>
@ -83,6 +83,8 @@ rf_MakeReconControl(RF_RaidReconDesc_t *reconDesc,
reconCtrlPtr->spareCol = scol;
reconCtrlPtr->lastPSID = layoutPtr->numStripe / layoutPtr->SUsPerPU;
reconCtrlPtr->percentComplete = 0;
reconCtrlPtr->error = 0;
reconCtrlPtr->pending_writes = 0;
/* initialize each per-disk recon information structure */
for (i = 0; i < raidPtr->numCol; i++) {
@ -161,12 +163,14 @@ rf_FreeReconControl(RF_Raid_t *raidPtr)
for (i = 0; i < raidPtr->numCol; i++)
if (reconCtrlPtr->perDiskInfo[i].rbuf)
rf_FreeReconBuffer(reconCtrlPtr->perDiskInfo[i].rbuf);
for (i = 0; i < raidPtr->numFloatingReconBufs; i++) {
t = reconCtrlPtr->floatingRbufs;
RF_ASSERT(t);
t = reconCtrlPtr->floatingRbufs;
while (t) {
reconCtrlPtr->floatingRbufs = t->next;
rf_FreeReconBuffer(t);
t = reconCtrlPtr->floatingRbufs;
}
rf_FreeReconMap(reconCtrlPtr->reconMap);
rf_FreeParityStripeStatusTable(raidPtr, reconCtrlPtr->pssTable);
RF_Free(reconCtrlPtr->perDiskInfo,

View File

@ -1,4 +1,4 @@
/* $NetBSD: rf_revent.c,v 1.19 2004/11/15 17:16:28 oster Exp $ */
/* $NetBSD: rf_revent.c,v 1.20 2005/02/05 23:32:44 oster Exp $ */
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
@ -30,7 +30,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: rf_revent.c,v 1.19 2004/11/15 17:16:28 oster Exp $");
__KERNEL_RCSID(0, "$NetBSD: rf_revent.c,v 1.20 2005/02/05 23:32:44 oster Exp $");
#include <sys/errno.h>
@ -181,6 +181,31 @@ GetReconEventDesc(RF_RowCol_t col, void *arg, RF_Revent_t type)
return (t);
}
/*
rf_DrainReconEventQueue() -- used in the event of a reconstruction
problem, this function simply drains all pending events from the
reconstruct event queue.
*/
void
rf_DrainReconEventQueue(RF_RaidReconDesc_t *reconDesc)
{
RF_ReconCtrl_t *rctrl = reconDesc->raidPtr->reconControl;
RF_ReconEvent_t *event;
RF_LOCK_MUTEX(rctrl->eq_mutex);
while (rctrl->eventQueue!=NULL) {
event = rctrl->eventQueue;
rctrl->eventQueue = event->next;
event->next = NULL;
rctrl->eq_count--;
/* dump it */
rf_FreeReconEventDesc(event);
}
RF_UNLOCK_MUTEX(rctrl->eq_mutex);
}
void
rf_FreeReconEventDesc(RF_ReconEvent_t *event)
{

View File

@ -1,4 +1,4 @@
/* $NetBSD: rf_revent.h,v 1.7 2004/11/15 17:16:28 oster Exp $ */
/* $NetBSD: rf_revent.h,v 1.8 2005/02/05 23:32:44 oster Exp $ */
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
@ -43,7 +43,7 @@ RF_ReconEvent_t *rf_GetNextReconEvent(RF_RaidReconDesc_t * reconDesc);
void rf_CauseReconEvent(RF_Raid_t * raidPtr, RF_RowCol_t col,
void *arg, RF_Revent_t type);
void rf_DrainReconEventQueue(RF_RaidReconDesc_t *reconDesc);
void rf_FreeReconEventDesc(RF_ReconEvent_t * event);
#endif /* !_RF__RF_REVENT_H_ */