Be more aggressive about updating component labels in the event

of a real component failure (or a simulated failure):
- add 'numNewFailures' to keep track of the number of disk failures
since mod_counter was last updated for each component label.
- make sure we call rf_update_component_labels() upon any component failure,
real or simulated.
This commit is contained in:
oster 2000-02-23 03:44:02 +00:00
parent 6ac2d6c797
commit d91ecfbcfd
4 changed files with 19 additions and 4 deletions

View File

@ -1,4 +1,4 @@
/* $NetBSD: rf_driver.c,v 1.29 2000/02/23 02:04:21 oster Exp $ */
/* $NetBSD: rf_driver.c,v 1.30 2000/02/23 03:44:02 oster Exp $ */
/*-
* Copyright (c) 1999 The NetBSD Foundation, Inc.
* All rights reserved.
@ -516,6 +516,7 @@ rf_Configure(raidPtr, cfgPtr, ac)
}
}
raidPtr->numNewFailures = 0;
raidPtr->copyback_in_progress = 0;
raidPtr->parity_rewrite_in_progress = 0;
raidPtr->recon_in_progress = 0;
@ -756,6 +757,7 @@ rf_SetReconfiguredMode(raidPtr, row, col)
raidPtr->numFailures++;
raidPtr->Disks[row][col].status = rf_ds_dist_spared;
raidPtr->status[row] = rf_rs_reconfigured;
rf_update_component_labels(raidPtr);
/* install spare table only if declustering + distributed sparing
* architecture. */
if (raidPtr->Layout.map->flags & RF_BD_DECLUSTERED)
@ -779,6 +781,7 @@ rf_FailDisk(
raidPtr->numFailures++;
raidPtr->Disks[frow][fcol].status = rf_ds_failed;
raidPtr->status[frow] = rf_rs_degraded;
rf_update_component_labels(raidPtr);
RF_UNLOCK_MUTEX(raidPtr->mutex);
if (initRecon)
rf_ReconstructFailedDisk(raidPtr, frow, fcol);

View File

@ -1,4 +1,4 @@
/* $NetBSD: rf_netbsdkintf.c,v 1.55 2000/02/23 02:11:05 oster Exp $ */
/* $NetBSD: rf_netbsdkintf.c,v 1.56 2000/02/23 03:44:03 oster Exp $ */
/*-
* Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
* All rights reserved.
@ -1537,6 +1537,14 @@ raidstart(raidPtr)
unit = raidPtr->raidid;
rs = &raid_softc[unit];
/* quick check to see if anything has died recently */
RF_LOCK_MUTEX(raidPtr->mutex);
if (raidPtr->numNewFailures > 0) {
rf_update_component_labels(raidPtr);
raidPtr->numNewFailures--;
}
RF_UNLOCK_MUTEX(raidPtr->mutex);
/* Check to see if we're at the limit... */
RF_LOCK_MUTEX(raidPtr->mutex);
while (raidPtr->openings > 0) {
@ -1811,6 +1819,7 @@ KernelWakeupFunc(vbp)
rf_ds_failed;
queue->raidPtr->status[queue->row] = rf_rs_degraded;
queue->raidPtr->numFailures++;
queue->raidPtr->numNewFailures++;
/* XXX here we should bump the version number for each component, and write that data out */
} else { /* Disk is already dead... */
/* printf("Disk already marked as dead!\n"); */

View File

@ -1,4 +1,4 @@
/* $NetBSD: rf_raid.h,v 1.10 2000/02/23 02:04:21 oster Exp $ */
/* $NetBSD: rf_raid.h,v 1.11 2000/02/23 03:44:02 oster Exp $ */
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
@ -132,6 +132,8 @@ struct RF_Raid_s {
RF_LockTableEntry_t *lockTable; /* stripe-lock table */
RF_LockTableEntry_t *quiesceLock; /* quiesnce table */
int numFailures; /* total number of failures in the array */
int numNewFailures; /* number of *new* failures (that havn't
caused a mod_counter update */
int parity_good; /* !0 if parity is known to be correct */
int serial_number; /* a "serial number" for this set */

View File

@ -1,4 +1,4 @@
/* $NetBSD: rf_reconstruct.c,v 1.16 2000/02/23 02:03:03 oster Exp $ */
/* $NetBSD: rf_reconstruct.c,v 1.17 2000/02/23 03:44:03 oster Exp $ */
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
@ -434,6 +434,7 @@ rf_ReconstructInPlace(raidPtr, row, col)
raidPtr->numFailures++;
raidPtr->Disks[row][col].status = rf_ds_failed;
raidPtr->status[row] = rf_rs_degraded;
rf_update_component_labels(raidPtr);
}
while (raidPtr->reconInProgress) {