Be more aggressive about updating component labels in the event

of a real component failure (or a simulated failure): - add 'numNewFailures' to keep track of the number of disk failures since mod_counter was last updated for each component label. - make sure we call rf_update_component_labels() upon any component failure, real or simulated.
2000-02-23 03:44:02 +00:00 · 2000-02-23 03:44:02 +00:00 · d91ecfbcfd
commit d91ecfbcfd
parent 6ac2d6c797
4 changed files with 19 additions and 4 deletions
--- a/sys/dev/raidframe/rf_driver.c
+++ b/sys/dev/raidframe/rf_driver.c
@ -1,4 +1,4 @@
-/*	$NetBSD: rf_driver.c,v 1.29 2000/02/23 02:04:21 oster Exp $	*/
+/*	$NetBSD: rf_driver.c,v 1.30 2000/02/23 03:44:02 oster Exp $	*/
 /*-
 * Copyright (c) 1999 The NetBSD Foundation, Inc.
 * All rights reserved.
@ -516,6 +516,7 @@ rf_Configure(raidPtr, cfgPtr, ac)
 		}
 	}

+	raidPtr->numNewFailures = 0;
 	raidPtr->copyback_in_progress = 0;
 	raidPtr->parity_rewrite_in_progress = 0;
 	raidPtr->recon_in_progress = 0;
@ -756,6 +757,7 @@ rf_SetReconfiguredMode(raidPtr, row, col)
 	raidPtr->numFailures++;
 	raidPtr->Disks[row][col].status = rf_ds_dist_spared;
 	raidPtr->status[row] = rf_rs_reconfigured;
+	rf_update_component_labels(raidPtr);
 	/* install spare table only if declustering + distributed sparing
 	 * architecture. */
 	if (raidPtr->Layout.map->flags & RF_BD_DECLUSTERED)
@ -779,6 +781,7 @@ rf_FailDisk(
 	raidPtr->numFailures++;
 	raidPtr->Disks[frow][fcol].status = rf_ds_failed;
 	raidPtr->status[frow] = rf_rs_degraded;
+	rf_update_component_labels(raidPtr);
 	RF_UNLOCK_MUTEX(raidPtr->mutex);
 	if (initRecon)
 		rf_ReconstructFailedDisk(raidPtr, frow, fcol);
--- a/sys/dev/raidframe/rf_netbsdkintf.c
+++ b/sys/dev/raidframe/rf_netbsdkintf.c
@ -1,4 +1,4 @@
-/*	$NetBSD: rf_netbsdkintf.c,v 1.55 2000/02/23 02:11:05 oster Exp $	*/
+/*	$NetBSD: rf_netbsdkintf.c,v 1.56 2000/02/23 03:44:03 oster Exp $	*/
 /*-
 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
 * All rights reserved.
@ -1537,6 +1537,14 @@ raidstart(raidPtr)
 	unit = raidPtr->raidid;
 	rs = &raid_softc[unit];
 	
+	/* quick check to see if anything has died recently */
+	RF_LOCK_MUTEX(raidPtr->mutex);
+	if (raidPtr->numNewFailures > 0) {
+		rf_update_component_labels(raidPtr);
+		raidPtr->numNewFailures--;
+	}
+	RF_UNLOCK_MUTEX(raidPtr->mutex);
+
 	/* Check to see if we're at the limit... */
 	RF_LOCK_MUTEX(raidPtr->mutex);
 	while (raidPtr->openings > 0) {
@ -1811,6 +1819,7 @@ KernelWakeupFunc(vbp)
 			    rf_ds_failed;
 			queue->raidPtr->status[queue->row] = rf_rs_degraded;
 			queue->raidPtr->numFailures++;
+			queue->raidPtr->numNewFailures++;
 			/* XXX here we should bump the version number for each component, and write that data out */
 		} else {	/* Disk is already dead... */
 			/* printf("Disk already marked as dead!\n"); */
--- a/sys/dev/raidframe/rf_raid.h
+++ b/sys/dev/raidframe/rf_raid.h
@ -1,4 +1,4 @@
-/*	$NetBSD: rf_raid.h,v 1.10 2000/02/23 02:04:21 oster Exp $	*/
+/*	$NetBSD: rf_raid.h,v 1.11 2000/02/23 03:44:02 oster Exp $	*/
 /*
 * Copyright (c) 1995 Carnegie-Mellon University.
 * All rights reserved.
@ -132,6 +132,8 @@ struct RF_Raid_s {
 	RF_LockTableEntry_t *lockTable;	/* stripe-lock table */
 	RF_LockTableEntry_t *quiesceLock;	/* quiesnce table */
 	int     numFailures;	/* total number of failures in the array */
+	int     numNewFailures; /* number of *new* failures (that havn't 
+				   caused a mod_counter update */

 	int     parity_good;    /* !0 if parity is known to be correct */
 	int     serial_number;  /* a "serial number" for this set */
--- a/sys/dev/raidframe/rf_reconstruct.c
+++ b/sys/dev/raidframe/rf_reconstruct.c
@ -1,4 +1,4 @@
-/*	$NetBSD: rf_reconstruct.c,v 1.16 2000/02/23 02:03:03 oster Exp $	*/
+/*	$NetBSD: rf_reconstruct.c,v 1.17 2000/02/23 03:44:03 oster Exp $	*/
 /*
 * Copyright (c) 1995 Carnegie-Mellon University.
 * All rights reserved.
@ -434,6 +434,7 @@ rf_ReconstructInPlace(raidPtr, row, col)
 			raidPtr->numFailures++;
 			raidPtr->Disks[row][col].status = rf_ds_failed;
 			raidPtr->status[row] = rf_rs_degraded;
+			rf_update_component_labels(raidPtr);
 		}

 		while (raidPtr->reconInProgress) {