Vastly improve the error handling in the case of a read/write error

that occurs during a reconstruction. We go from zero error handling and likely panicing if something goes amiss, to gracefully bailing and leaving the system in the best, usable state possible. - introduce rf_DrainReconEventQueue() to allow easy cleaning of the reconstruction event queue - change how we cleanup the floating recon buffers in rf_FreeReconControl(). Detect the end of the list rather than traversing according to a count. - keep track of the number of pending reconstruction writes. In the event of a read error, use this to wait long enough for the pending writes to (hopefully) drain. - more cleanup is still needed on this code, but I didn't want to start mixing major functional changes with minor cleanups. XXX: There is a known issue with pool items left outstanding due to the IO failure, and this can show up in the form of a panic at the tail end of a shutdown. This problem is much less severe than before these changes, and the hope/plan is that this problem will go away once this code gets overhauled again.
2005-02-05 23:32:43 +00:00 · 2005-02-05 23:32:43 +00:00 · c38bce14f6
parent 1205cb9e5b
commit c38bce14f6
5 changed files with 308 additions and 57 deletions
--- a/sys/dev/raidframe/rf_reconstruct.c
+++ b/sys/dev/raidframe/rf_reconstruct.c
@ -1,4 +1,4 @@
-/*	$NetBSD: rf_reconstruct.c,v 1.81 2005/01/22 02:24:31 oster Exp $	*/
+/*	$NetBSD: rf_reconstruct.c,v 1.82 2005/02/05 23:32:43 oster Exp $	*/
 /*
 * Copyright (c) 1995 Carnegie-Mellon University.
 * All rights reserved.
@ -33,7 +33,7 @@
 ************************************************************/

 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: rf_reconstruct.c,v 1.81 2005/01/22 02:24:31 oster Exp $");
+__KERNEL_RCSID(0, "$NetBSD: rf_reconstruct.c,v 1.82 2005/02/05 23:32:43 oster Exp $");

 #include <sys/time.h>
 #include <sys/buf.h>
@ -94,6 +94,11 @@ __KERNEL_RCSID(0, "$NetBSD: rf_reconstruct.c,v 1.81 2005/01/22 02:24:31 oster Ex

 #endif /* RF_DEBUG_RECON */

+#define RF_RECON_DONE_READS   1
+#define RF_RECON_READ_ERROR   2
+#define RF_RECON_WRITE_ERROR  3
+#define RF_RECON_READ_STOPPED 4
+
 #define RF_MAX_FREE_RECONBUFFER 32
 #define RF_MIN_FREE_RECONBUFFER 16

@ -320,6 +325,16 @@ rf_ReconstructFailedDiskBasic(RF_Raid_t *raidPtr, RF_RowCol_t col)
 		rf_update_component_labels(raidPtr, 
 					   RF_NORMAL_COMPONENT_UPDATE);

+	} else {
+		/* Reconstruct failed. */
+
+		RF_LOCK_MUTEX(raidPtr->mutex);
+		/* Failed disk goes back to "failed" status */
+		raidPtr->Disks[col].status = rf_ds_failed;
+
+		/* Spare disk goes back to "spare" status. */
+		spareDiskPtr->status = rf_ds_spare;
+		RF_UNLOCK_MUTEX(raidPtr->mutex);
 	}
 	return (rc);
 }
@ -496,10 +511,6 @@ rf_ReconstructInPlace(RF_Raid_t *raidPtr, RF_RowCol_t col)
 	reconDesc->maxReconExecTicks = 0;
 	rc = rf_ContinueReconstructFailedDisk(reconDesc);
 	
-	RF_LOCK_MUTEX(raidPtr->mutex);
-	raidPtr->reconInProgress--;
-	RF_UNLOCK_MUTEX(raidPtr->mutex);
-	
 	if (!rc) {
 		RF_LOCK_MUTEX(raidPtr->mutex);
 		/* Need to set these here, as at this point it'll be claiming
@ -536,8 +547,18 @@ rf_ReconstructInPlace(RF_Raid_t *raidPtr, RF_RowCol_t col)

 		rf_update_component_labels(raidPtr, 
 					   RF_NORMAL_COMPONENT_UPDATE);
-
+	} else {
+		/* Reconstruct-in-place failed.  Disk goes back to
+		   "failed" status, regardless of what it was before.  */
+		RF_LOCK_MUTEX(raidPtr->mutex);
+		raidPtr->Disks[col].status = rf_ds_failed;
+		RF_UNLOCK_MUTEX(raidPtr->mutex);
 	}
+
+	RF_LOCK_MUTEX(raidPtr->mutex);
+	raidPtr->reconInProgress--;
+	RF_UNLOCK_MUTEX(raidPtr->mutex);
+	
 	RF_SIGNAL_COND(raidPtr->waitForReconCond);
 	return (rc);
 }
@ -552,9 +573,12 @@ rf_ContinueReconstructFailedDisk(RF_RaidReconDesc_t *reconDesc)
 	RF_ReconMap_t *mapPtr;
 	RF_ReconCtrl_t *tmp_reconctrl;
 	RF_ReconEvent_t *event;
+	RF_CallbackDesc_t *p;
 	struct timeval etime, elpsd;
 	unsigned long xor_s, xor_resid_us;
 	int     i, ds;
+	int status;
+	int recon_error, write_error;

 	raidPtr->accumXorTimeUs = 0;
 #if RF_ACC_TRACE > 0
@ -609,19 +633,65 @@ rf_ContinueReconstructFailedDisk(RF_RaidReconDesc_t *reconDesc)
 	 * they've completed all work */

 	mapPtr = raidPtr->reconControl->reconMap;
-	
+	recon_error = 0;
+	write_error = 0;
+
 	while (reconDesc->numDisksDone < raidPtr->numCol - 1) {
 		
 		event = rf_GetNextReconEvent(reconDesc);
 		RF_ASSERT(event);
-		
-		if (ProcessReconEvent(raidPtr, event))
+
+		status = ProcessReconEvent(raidPtr, event);
+
+		/* the normal case is that a read completes, and all is well. */
+		if (status == RF_RECON_DONE_READS) {
 			reconDesc->numDisksDone++;
+		} else if ((status == RF_RECON_READ_ERROR) ||
+			   (status == RF_RECON_WRITE_ERROR)) {
+			/* an error was encountered while reconstructing... 
+			   Pretend we've finished this disk. 
+			*/
+			recon_error = 1;
+			raidPtr->reconControl->error = 1;
+
+			/* bump the numDisksDone count for reads, 
+			   but not for writes */
+			if (status == RF_RECON_READ_ERROR)
+				reconDesc->numDisksDone++;
+
+			/* write errors are special -- when we are
+			   done dealing with the reads that are
+			   finished, we don't want to wait for any
+			   writes */
+			if (status == RF_RECON_WRITE_ERROR)
+				write_error = 1;
+
+		} else if (status == RF_RECON_READ_STOPPED) {
+			/* count this component as being "done" */
+			reconDesc->numDisksDone++;
+		}
+
+		if (recon_error) {
+
+			/* make sure any stragglers are woken up so that
+			   their theads will complete, and we can get out
+			   of here with all IO processed */
+
+			while (raidPtr->reconControl->headSepCBList) {
+				p = raidPtr->reconControl->headSepCBList;
+				raidPtr->reconControl->headSepCBList = p->next;
+				p->next = NULL;
+				rf_CauseReconEvent(raidPtr, p->col, NULL, RF_REVENT_HEADSEPCLEAR);
+				rf_FreeCallbackDesc(p);
+			}
+		}
+
 		raidPtr->reconControl->numRUsTotal = 
 			mapPtr->totalRUs;
 		raidPtr->reconControl->numRUsComplete = 
 			mapPtr->totalRUs - 
 			rf_UnitsLeftToReconstruct(mapPtr);
+
 #if RF_DEBUG_RECON
 		raidPtr->reconControl->percentComplete = 
 			(raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal);
@ -637,19 +707,107 @@ rf_ContinueReconstructFailedDisk(RF_RaidReconDesc_t *reconDesc)
 	}
 	/* at this point all the reads have completed.  We now wait
 	 * for any pending writes to complete, and then we're done */
-	
-	while (rf_UnitsLeftToReconstruct(raidPtr->reconControl->reconMap) > 0) {
+
+	while (!recon_error && rf_UnitsLeftToReconstruct(raidPtr->reconControl->reconMap) > 0) {
 		
 		event = rf_GetNextReconEvent(reconDesc);
 		RF_ASSERT(event);
-		
-		(void) ProcessReconEvent(raidPtr, event);	/* ignore return code */
+
+		status = ProcessReconEvent(raidPtr, event);
+		if (status == RF_RECON_WRITE_ERROR) {
+			recon_error = 1;
+			raidPtr->reconControl->error = 1; 
+			/* an error was encountered at the very end... bail */
+		} else {
 #if RF_DEBUG_RECON
-		raidPtr->reconControl->percentComplete = 100 - (rf_UnitsLeftToReconstruct(mapPtr) * 100 / mapPtr->totalRUs);
-		if (rf_prReconSched) {
-			rf_PrintReconSchedule(raidPtr->reconControl->reconMap, &(raidPtr->reconControl->starttime));
-		}
+			raidPtr->reconControl->percentComplete = 100 - (rf_UnitsLeftToReconstruct(mapPtr) * 100 / mapPtr->totalRUs);
+			if (rf_prReconSched) {
+				rf_PrintReconSchedule(raidPtr->reconControl->reconMap, &(raidPtr->reconControl->starttime));
+			}
 #endif
+		}
+	}
+
+	if (recon_error) {
+		/* we've encountered an error in reconstructing. */
+		printf("raid%d: reconstruction failed.\n", raidPtr->raidid);
+		
+		/* we start by blocking IO to the RAID set. */
+		rf_SuspendNewRequestsAndWait(raidPtr);
+	
+		RF_LOCK_MUTEX(raidPtr->mutex);
+		/* mark set as being degraded, rather than
+		   rf_rs_reconstructing as we were before the problem.
+		   After this is done we can update status of the
+		   component disks without worrying about someone
+		   trying to read from a failed component.
+		*/
+		raidPtr->status = rf_rs_degraded;
+		RF_UNLOCK_MUTEX(raidPtr->mutex);
+		
+		/* resume IO */
+		rf_ResumeNewRequests(raidPtr);	
+	
+		/* At this point there are two cases:
+		   1) If we've experienced a read error, then we've
+		   already waited for all the reads we're going to get,
+		   and we just need to wait for the writes.
+
+		   2) If we've experienced a write error, we've also
+		   already waited for all the reads to complete,
+		   but there is little point in waiting for the writes --
+		   when they do complete, they will just be ignored.
+
+		   So we just wait for writes to complete if we didn't have a 
+		   write error.
+		*/
+
+		if (!write_error) {
+			/* wait for writes to complete */
+			while (raidPtr->reconControl->pending_writes > 0) {
+				event = rf_GetNextReconEvent(reconDesc);
+				status = ProcessReconEvent(raidPtr, event);
+
+				if (status == RF_RECON_WRITE_ERROR) {
+					raidPtr->reconControl->error = 1; 
+					/* an error was encountered at the very end... bail.
+					   This will be very bad news for the user, since
+					   at this point there will have been a read error
+					   on one component, and a write error on another!
+					*/
+					break;
+				}
+			}
+		}
+
+		
+		/* cleanup */
+
+		/* drain the event queue - after waiting for the writes above,
+		   there shouldn't be much (if anything!) left in the queue. */
+
+		rf_DrainReconEventQueue(reconDesc);
+		
+		/* XXX  As much as we'd like to free the recon control structure
+		   and the reconDesc, we have no way of knowing if/when those will
+		   be touched by IO that has yet to occur.  It is rather poor to be
+		   basically causing a 'memory leak' here, but there doesn't seem to be
+		   a cleaner alternative at this time.  Perhaps when the reconstruct code
+		   gets a makeover this problem will go away.
+		*/
+#if 0
+		rf_FreeReconControl(raidPtr);
+#endif
+
+#if RF_ACC_TRACE > 0
+		RF_Free(raidPtr->recon_tracerecs, raidPtr->numCol * sizeof(RF_AccTraceEntry_t));
+#endif
+		/* XXX see comment above */
+#if 0
+		FreeReconDesc(reconDesc);
+#endif
+
+		return (1);
 	}

 	/* Success:  mark the dead disk as reconstructed.  We quiesce
@ -683,7 +841,6 @@ rf_ContinueReconstructFailedDisk(RF_RaidReconDesc_t *reconDesc)
 	       (int) raidPtr->reconControl->starttime.tv_sec,
 	       (int) raidPtr->reconControl->starttime.tv_usec,
 	       (int) etime.tv_sec, (int) etime.tv_usec);
-	
 #if RF_RECON_STATS > 0
 	printf("raid%d: Total head-sep stall count was %d\n",
 	       raidPtr->raidid, (int) reconDesc->hsStallCount);
@ -695,11 +852,10 @@ rf_ContinueReconstructFailedDisk(RF_RaidReconDesc_t *reconDesc)
 	FreeReconDesc(reconDesc);
 	
 	return (0);
+
 }
 /*****************************************************************************
 * do the right thing upon each reconstruction event.
- * returns nonzero if and only if there is nothing left unread on the 
- * indicated disk
 *****************************************************************************/
 static int 
 ProcessReconEvent(RF_Raid_t *raidPtr, RF_ReconEvent_t *event)
@ -708,6 +864,8 @@ ProcessReconEvent(RF_Raid_t *raidPtr, RF_ReconEvent_t *event)
 	RF_ReconBuffer_t *rbuf;
 	RF_SectorCount_t sectorsPerRU;

+	retcode = RF_RECON_READ_STOPPED;
+
 	Dprintf1("RECON: ProcessReconEvent type %d\n", event->type);
 	switch (event->type) {

@ -720,10 +878,12 @@ ProcessReconEvent(RF_Raid_t *raidPtr, RF_ReconEvent_t *event)
 		    rbuf->parityStripeID, rbuf->buffer, rbuf->buffer[0] & 0xff, rbuf->buffer[1] & 0xff,
 		    rbuf->buffer[2] & 0xff, rbuf->buffer[3] & 0xff, rbuf->buffer[4] & 0xff);
 		rf_FreeDiskQueueData((RF_DiskQueueData_t *) rbuf->arg);
-		submitblocked = rf_SubmitReconBuffer(rbuf, 0, 0);
-		Dprintf1("RECON: submitblocked=%d\n", submitblocked);
-		if (!submitblocked)
-			retcode = IssueNextReadRequest(raidPtr, event->col);
+		if (!raidPtr->reconControl->error) {
+			submitblocked = rf_SubmitReconBuffer(rbuf, 0, 0);
+			Dprintf1("RECON: submitblocked=%d\n", submitblocked);
+			if (!submitblocked)
+				retcode = IssueNextReadRequest(raidPtr, event->col);
+		}
 		break;

 		/* a write I/O has completed */
@ -742,6 +902,10 @@ ProcessReconEvent(RF_Raid_t *raidPtr, RF_ReconEvent_t *event)
 		    rbuf->failedDiskSectorOffset, rbuf->failedDiskSectorOffset + sectorsPerRU - 1);
 		rf_RemoveFromActiveReconTable(raidPtr, rbuf->parityStripeID, rbuf->which_ru);

+		RF_LOCK_MUTEX(raidPtr->reconControl->rb_mutex);
+		raidPtr->reconControl->pending_writes--;
+		RF_UNLOCK_MUTEX(raidPtr->reconControl->rb_mutex);
+
 		if (rbuf->type == RF_RBUF_TYPE_FLOATING) {
 			RF_LOCK_MUTEX(raidPtr->reconControl->rb_mutex);
 			while(raidPtr->reconControl->rb_lock) {
@ -763,47 +927,59 @@ ProcessReconEvent(RF_Raid_t *raidPtr, RF_ReconEvent_t *event)
 				rf_FreeReconBuffer(rbuf);
 			else
 				RF_ASSERT(0);
+		retcode = 0;
 		break;

 	case RF_REVENT_BUFCLEAR:	/* A buffer-stall condition has been
 					 * cleared */
 		Dprintf1("RECON: BUFCLEAR EVENT: col %d\n", event->col);
-		submitblocked = rf_SubmitReconBuffer(raidPtr->reconControl->perDiskInfo[event->col].rbuf, 0, (int) (long) event->arg);
-		RF_ASSERT(!submitblocked);	/* we wouldn't have gotten the
-						 * BUFCLEAR event if we
-						 * couldn't submit */
-		retcode = IssueNextReadRequest(raidPtr, event->col);
+		if (!raidPtr->reconControl->error) {
+			submitblocked = rf_SubmitReconBuffer(raidPtr->reconControl->perDiskInfo[event->col].rbuf, 
+							     0, (int) (long) event->arg);
+			RF_ASSERT(!submitblocked);	/* we wouldn't have gotten the
+							 * BUFCLEAR event if we
+							 * couldn't submit */
+			retcode = IssueNextReadRequest(raidPtr, event->col);
+		}
 		break;

 	case RF_REVENT_BLOCKCLEAR:	/* A user-write reconstruction
 					 * blockage has been cleared */
 		DDprintf1("RECON: BLOCKCLEAR EVENT: col %d\n", event->col);
-		retcode = TryToRead(raidPtr, event->col);
+		if (!raidPtr->reconControl->error) {
+			retcode = TryToRead(raidPtr, event->col);
+		}
 		break;

 	case RF_REVENT_HEADSEPCLEAR:	/* A max-head-separation
 					 * reconstruction blockage has been
 					 * cleared */
 		Dprintf1("RECON: HEADSEPCLEAR EVENT: col %d\n", event->col);
-		retcode = TryToRead(raidPtr, event->col);
+		if (!raidPtr->reconControl->error) {
+			retcode = TryToRead(raidPtr, event->col);
+		}
 		break;

 		/* a buffer has become ready to write */
 	case RF_REVENT_BUFREADY:
 		Dprintf1("RECON: BUFREADY EVENT: col %d\n", event->col);
-		retcode = IssueNextWriteRequest(raidPtr);
+		if (!raidPtr->reconControl->error) {
+			retcode = IssueNextWriteRequest(raidPtr);
 #if RF_DEBUG_RECON
-		if (rf_floatingRbufDebug) {
-			rf_CheckFloatingRbufCount(raidPtr, 1);
-		}
+			if (rf_floatingRbufDebug) {
+				rf_CheckFloatingRbufCount(raidPtr, 1);
+			}
 #endif
+		}
 		break;

 		/* we need to skip the current RU entirely because it got
 		 * recon'd while we were waiting for something else to happen */
 	case RF_REVENT_SKIP:
 		DDprintf1("RECON: SKIP EVENT: col %d\n", event->col);
-		retcode = IssueNextReadRequest(raidPtr, event->col);
+		if (!raidPtr->reconControl->error) {		
+			retcode = IssueNextReadRequest(raidPtr, event->col);
+		}
 		break;

 		/* a forced-reconstruction read access has completed.  Just
@ -812,21 +988,38 @@ ProcessReconEvent(RF_Raid_t *raidPtr, RF_ReconEvent_t *event)
 		rbuf = (RF_ReconBuffer_t *) event->arg;
 		rf_FreeDiskQueueData((RF_DiskQueueData_t *) rbuf->arg);
 		DDprintf1("RECON: FORCEDREADDONE EVENT: col %d\n", event->col);
-		submitblocked = rf_SubmitReconBuffer(rbuf, 1, 0);
-		RF_ASSERT(!submitblocked);
+		if (!raidPtr->reconControl->error) {
+			submitblocked = rf_SubmitReconBuffer(rbuf, 1, 0);
+			RF_ASSERT(!submitblocked);
+		}
 		break;

 		/* A read I/O failed to complete */
 	case RF_REVENT_READ_FAILED:
-		/* fallthru to panic... */
+		retcode = RF_RECON_READ_ERROR;
+		break;

 		/* A write I/O failed to complete */
 	case RF_REVENT_WRITE_FAILED:
-		/* fallthru to panic... */
+		retcode = RF_RECON_WRITE_ERROR;
+
+		rbuf = (RF_ReconBuffer_t *) event->arg;
+
+		/* cleanup the disk queue data */
+		rf_FreeDiskQueueData((RF_DiskQueueData_t *) rbuf->arg);
+
+		/* At this point we're erroring out, badly, and floatingRbufs
+		   may not even be valid.  Rather than putting this back onto
+		   the floatingRbufs list, just arrange for its immediate
+		   destruction.
+		*/
+		rf_FreeReconBuffer(rbuf);
+		break;

 		/* a forced read I/O failed to complete */
 	case RF_REVENT_FORCEDREAD_FAILED:
-		/* fallthru to panic... */
+		retcode = RF_RECON_READ_ERROR;
+		break;

 	default:
 		RF_PANIC();
@ -851,9 +1044,6 @@ ProcessReconEvent(RF_Raid_t *raidPtr, RF_ReconEvent_t *event)
 * accessed.  This allows us to easily retry when we're blocked by
 * head separation or reconstruction-blockage events.
 *
- * returns nonzero if and only if there is nothing left unread on the
- * indicated disk
- *
 *****************************************************************************/
 static int 
 IssueNextReadRequest(RF_Raid_t *raidPtr, RF_RowCol_t col)
@ -883,7 +1073,7 @@ IssueNextReadRequest(RF_Raid_t *raidPtr, RF_RowCol_t col)
 			 * parity stripe id */
 			if (ctrl->curPSID >= raidPtr->reconControl->lastPSID) {
 				CheckForNewMinHeadSep(raidPtr, ++(ctrl->headSepCounter));
-				return (1);	/* finito! */
+				return (RF_RECON_DONE_READS);	/* finito! */
 			}
 			/* find the disk offsets of the start of the parity
 			 * stripe on both the current disk and the failed
@ -1199,6 +1389,9 @@ IssueNextWriteRequest(RF_Raid_t *raidPtr)
 	RF_ASSERT(req);		/* XXX -- fix this -- XXX */

 	rbuf->arg = (void *) req;
+	RF_LOCK_MUTEX(raidPtr->reconControl->rb_mutex);
+	raidPtr->reconControl->pending_writes++;
+	RF_UNLOCK_MUTEX(raidPtr->reconControl->rb_mutex);
 	rf_DiskIOEnqueue(&raidPtr->Queues[rbuf->spCol], req, RF_IO_RECON_PRIORITY);

 	return (0);
@ -1216,7 +1409,16 @@ static int
 ReconReadDoneProc(void *arg, int status)
 {
 	RF_PerDiskReconCtrl_t *ctrl = (RF_PerDiskReconCtrl_t *) arg;
-	RF_Raid_t *raidPtr = ctrl->reconCtrl->reconDesc->raidPtr;
+	RF_Raid_t *raidPtr;
+
+	/* Detect that reconCtrl is no longer valid, and if that
+	   is the case, bail without calling rf_CauseReconEvent().
+	   There won't be anyone listening for this event anyway */
+
+	if (ctrl->reconCtrl == NULL)
+		return(0);
+
+	raidPtr = ctrl->reconCtrl->reconDesc->raidPtr;

 	if (status) {
 		printf("raid%d: Recon read failed!\n", raidPtr->raidid);
@ -1243,6 +1445,13 @@ ReconWriteDoneProc(void *arg, int status)
 {
 	RF_ReconBuffer_t *rbuf = (RF_ReconBuffer_t *) arg;

+	/* Detect that reconControl is no longer valid, and if that
+	   is the case, bail without calling rf_CauseReconEvent().
+	   There won't be anyone listening for this event anyway */
+
+	if (rbuf->raidPtr->reconControl == NULL)
+		return(0);
+
 	Dprintf2("Reconstruction completed on psid %ld ru %d\n", rbuf->parityStripeID, rbuf->which_ru);
 	if (status) {
 		printf("raid%d: Recon write failed!\n", rbuf->raidPtr->raidid);
@ -1541,6 +1750,13 @@ ForceReconReadDoneProc(void *arg, int status)
 {
 	RF_ReconBuffer_t *rbuf = arg;

+	/* Detect that reconControl is no longer valid, and if that
+	   is the case, bail without calling rf_CauseReconEvent().
+	   There won't be anyone listening for this event anyway */
+
+	if (rbuf->raidPtr->reconControl == NULL)
+		return;
+
 	if (status) {
 		printf("raid%d: Forced recon read failed!\n", rbuf->raidPtr->raidid);
 		rf_CauseReconEvent(rbuf->raidPtr, rbuf->col, (void *) rbuf, RF_REVENT_FORCEDREAD_FAILED);
--- a/sys/dev/raidframe/rf_reconstruct.h
+++ b/sys/dev/raidframe/rf_reconstruct.h
@ -1,4 +1,4 @@
-/*	$NetBSD: rf_reconstruct.h,v 1.18 2004/11/15 17:16:28 oster Exp $	*/
+/*	$NetBSD: rf_reconstruct.h,v 1.19 2005/02/05 23:32:44 oster Exp $	*/
 /*
 * Copyright (c) 1995 Carnegie-Mellon University.
 * All rights reserved.
@ -129,6 +129,10 @@ struct RF_ReconCtrl_s {
 	int     percentComplete;/* percentage completion of reconstruction */
 	int     numRUsComplete; /* number of Reconstruction Units done */
 	int     numRUsTotal;    /* total number of Reconstruction Units */
+	int error;              /* non-0 indicates that an error has
+				   occured during reconstruction, and
+				   the reconstruction is in the process of
+				   bailing out. */

 	/* reconstruction event queue */
 	RF_ReconEvent_t *eventQueue;	/* queue of pending reconstruction
@ -143,6 +147,8 @@ struct RF_ReconCtrl_s {
 	int rb_lock;                            /* 1 if someone is mucking
 						   with recon buffers,
 						   0 otherwise */
+	int pending_writes;			/* number of writes which
+						   have not completed */
 	RF_ReconBuffer_t *floatingRbufs;	/* available floating
 						 * reconstruction buffers */
 	RF_ReconBuffer_t *committedRbufs;	/* recon buffers that have
--- a/sys/dev/raidframe/rf_reconutil.c
+++ b/sys/dev/raidframe/rf_reconutil.c
@ -1,4 +1,4 @@
-/*	$NetBSD: rf_reconutil.c,v 1.23 2004/03/18 16:54:54 oster Exp $	*/
+/*	$NetBSD: rf_reconutil.c,v 1.24 2005/02/05 23:32:44 oster Exp $	*/
 /*
 * Copyright (c) 1995 Carnegie-Mellon University.
 * All rights reserved.
@ -31,7 +31,7 @@
 ********************************************/

 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: rf_reconutil.c,v 1.23 2004/03/18 16:54:54 oster Exp $");
+__KERNEL_RCSID(0, "$NetBSD: rf_reconutil.c,v 1.24 2005/02/05 23:32:44 oster Exp $");

 #include <dev/raidframe/raidframevar.h>

@ -83,6 +83,8 @@ rf_MakeReconControl(RF_RaidReconDesc_t *reconDesc,
 	reconCtrlPtr->spareCol = scol;
 	reconCtrlPtr->lastPSID = layoutPtr->numStripe / layoutPtr->SUsPerPU;
 	reconCtrlPtr->percentComplete = 0;
+	reconCtrlPtr->error = 0;
+	reconCtrlPtr->pending_writes = 0;

 	/* initialize each per-disk recon information structure */
 	for (i = 0; i < raidPtr->numCol; i++) {
@ -161,12 +163,14 @@ rf_FreeReconControl(RF_Raid_t *raidPtr)
 	for (i = 0; i < raidPtr->numCol; i++)
 		if (reconCtrlPtr->perDiskInfo[i].rbuf)
 			rf_FreeReconBuffer(reconCtrlPtr->perDiskInfo[i].rbuf);
-	for (i = 0; i < raidPtr->numFloatingReconBufs; i++) {
-		t = reconCtrlPtr->floatingRbufs;
-		RF_ASSERT(t);
+
+	t = reconCtrlPtr->floatingRbufs;
+	while (t) {
 		reconCtrlPtr->floatingRbufs = t->next;
 		rf_FreeReconBuffer(t);
+		t = reconCtrlPtr->floatingRbufs;
 	}
+
 	rf_FreeReconMap(reconCtrlPtr->reconMap);
 	rf_FreeParityStripeStatusTable(raidPtr, reconCtrlPtr->pssTable);
 	RF_Free(reconCtrlPtr->perDiskInfo, 
--- a/sys/dev/raidframe/rf_revent.c
+++ b/sys/dev/raidframe/rf_revent.c
@ -1,4 +1,4 @@
-/*	$NetBSD: rf_revent.c,v 1.19 2004/11/15 17:16:28 oster Exp $	*/
+/*	$NetBSD: rf_revent.c,v 1.20 2005/02/05 23:32:44 oster Exp $	*/
 /*
 * Copyright (c) 1995 Carnegie-Mellon University.
 * All rights reserved.
@ -30,7 +30,7 @@
 */

 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: rf_revent.c,v 1.19 2004/11/15 17:16:28 oster Exp $");
+__KERNEL_RCSID(0, "$NetBSD: rf_revent.c,v 1.20 2005/02/05 23:32:44 oster Exp $");

 #include <sys/errno.h>

@ -181,6 +181,31 @@ GetReconEventDesc(RF_RowCol_t col, void *arg, RF_Revent_t type)
 	return (t);
 }

+/*
+  rf_DrainReconEventQueue() -- used in the event of a reconstruction
+  problem, this function simply drains all pending events from the
+  reconstruct event queue.
+ */
+
+void
+rf_DrainReconEventQueue(RF_RaidReconDesc_t *reconDesc)
+{
+	RF_ReconCtrl_t *rctrl = reconDesc->raidPtr->reconControl;
+	RF_ReconEvent_t *event;
+
+	RF_LOCK_MUTEX(rctrl->eq_mutex);
+	while (rctrl->eventQueue!=NULL) {
+		
+		event = rctrl->eventQueue;
+		rctrl->eventQueue = event->next;
+		event->next = NULL;
+		rctrl->eq_count--;
+		/* dump it */
+		rf_FreeReconEventDesc(event);
+	}
+	RF_UNLOCK_MUTEX(rctrl->eq_mutex);
+}
+
 void 
 rf_FreeReconEventDesc(RF_ReconEvent_t *event)
 {
--- a/sys/dev/raidframe/rf_revent.h
+++ b/sys/dev/raidframe/rf_revent.h
@ -1,4 +1,4 @@
-/*	$NetBSD: rf_revent.h,v 1.7 2004/11/15 17:16:28 oster Exp $	*/
+/*	$NetBSD: rf_revent.h,v 1.8 2005/02/05 23:32:44 oster Exp $	*/
 /*
 * Copyright (c) 1995 Carnegie-Mellon University.
 * All rights reserved.
@ -43,7 +43,7 @@ RF_ReconEvent_t *rf_GetNextReconEvent(RF_RaidReconDesc_t * reconDesc);

 void rf_CauseReconEvent(RF_Raid_t * raidPtr, RF_RowCol_t col,
 			void *arg, RF_Revent_t type);
-
+void rf_DrainReconEventQueue(RF_RaidReconDesc_t *reconDesc);
 void rf_FreeReconEventDesc(RF_ReconEvent_t * event);

 #endif				/* !_RF__RF_REVENT_H_ */