Re-work some of the guts of the reconstruction code.
Reconmap used to have one pointer for every reconstruction unit. This does not scale well in the land of 1TB disks, where some 100MB+ of "status pointers" are required for typical configurations. Convert the reconstruction code to use a "sliding status window" which will scale nicely regardless of the number of stripes/reconstruction units in the RAID set. Convert the main reconstruction loop to rebuild the array in chunks rather than in one big lump. As part of these changes, introduce a function to kick any waiters on the head separation callback list, and use that in the main reconstruction event queue to wake up the waiters if things have stalled. (I believe this may fix a race condition that could occur at at least at the very end of a disk during reconstruction under heavy IO load.) Thanks to Brian Buhrow for all his help, support, and patience in testing these changes.
This commit is contained in:
parent
aa10084492
commit
396f9f4598
@ -1,4 +1,4 @@
|
||||
/* $NetBSD: rf_reconmap.c,v 1.30 2007/03/12 18:18:31 ad Exp $ */
|
||||
/* $NetBSD: rf_reconmap.c,v 1.31 2008/05/19 19:49:54 oster Exp $ */
|
||||
/*
|
||||
* Copyright (c) 1995 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
@ -34,7 +34,7 @@
|
||||
*************************************************************************/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__KERNEL_RCSID(0, "$NetBSD: rf_reconmap.c,v 1.30 2007/03/12 18:18:31 ad Exp $");
|
||||
__KERNEL_RCSID(0, "$NetBSD: rf_reconmap.c,v 1.31 2008/05/19 19:49:54 oster Exp $");
|
||||
|
||||
#include "rf_raid.h"
|
||||
#include <sys/time.h>
|
||||
@ -61,18 +61,13 @@ __KERNEL_RCSID(0, "$NetBSD: rf_reconmap.c,v 1.30 2007/03/12 18:18:31 ad Exp $");
|
||||
#define RF_NUM_RECON_POOL_ELEM 100
|
||||
|
||||
static void
|
||||
compact_stat_entry(RF_Raid_t * raidPtr, RF_ReconMap_t * mapPtr,
|
||||
int i);
|
||||
static void crunch_list(RF_ReconMap_t *mapPtr,
|
||||
RF_ReconMapListElem_t * listPtr);
|
||||
compact_stat_entry(RF_Raid_t *, RF_ReconMap_t *, int, int);
|
||||
static void crunch_list(RF_ReconMap_t *, RF_ReconMapListElem_t *);
|
||||
static RF_ReconMapListElem_t *
|
||||
MakeReconMapListElem(RF_ReconMap_t *mapPtr, RF_SectorNum_t startSector,
|
||||
RF_SectorNum_t stopSector, RF_ReconMapListElem_t * next);
|
||||
MakeReconMapListElem(RF_ReconMap_t *, RF_SectorNum_t, RF_SectorNum_t,
|
||||
RF_ReconMapListElem_t *);
|
||||
static void
|
||||
FreeReconMapListElem(RF_ReconMap_t *mapPtr, RF_ReconMapListElem_t * p);
|
||||
#if 0
|
||||
static void PrintList(RF_ReconMapListElem_t * listPtr);
|
||||
#endif
|
||||
|
||||
/*---------------------------------------------------------------------------
|
||||
*
|
||||
@ -99,13 +94,16 @@ rf_MakeReconMap(RF_Raid_t *raidPtr, RF_SectorCount_t ru_sectors,
|
||||
p->totalRUs = num_rus;
|
||||
p->spareRUs = spareUnitsPerDisk;
|
||||
p->unitsLeft = num_rus - spareUnitsPerDisk;
|
||||
p->low_ru = 0;
|
||||
p->status_size = RF_RECONMAP_SIZE;
|
||||
p->high_ru = p->status_size - 1;
|
||||
p->head = 0;
|
||||
|
||||
RF_Malloc(p->status, num_rus * sizeof(RF_ReconMapListElem_t *), (RF_ReconMapListElem_t **));
|
||||
RF_Malloc(p->status, p->status_size * sizeof(RF_ReconMapListElem_t *), (RF_ReconMapListElem_t **));
|
||||
RF_ASSERT(p->status != (RF_ReconMapListElem_t **) NULL);
|
||||
|
||||
(void) memset((char *) p->status, 0,
|
||||
num_rus * sizeof(RF_ReconMapListElem_t *));
|
||||
|
||||
p->status_size * sizeof(RF_ReconMapListElem_t *));
|
||||
|
||||
pool_init(&p->elem_pool, sizeof(RF_ReconMapListElem_t), 0,
|
||||
0, 0, "raidreconpl", NULL, IPL_BIO);
|
||||
@ -138,12 +136,13 @@ rf_ReconMapUpdate(RF_Raid_t *raidPtr, RF_ReconMap_t *mapPtr,
|
||||
RF_SectorNum_t startSector, RF_SectorNum_t stopSector)
|
||||
{
|
||||
RF_SectorCount_t sectorsPerReconUnit = mapPtr->sectorsPerReconUnit;
|
||||
RF_SectorNum_t i, first_in_RU, last_in_RU;
|
||||
RF_SectorNum_t i, first_in_RU, last_in_RU, ru;
|
||||
RF_ReconMapListElem_t *p, *pt;
|
||||
|
||||
RF_LOCK_MUTEX(mapPtr->mutex);
|
||||
while(mapPtr->lock) {
|
||||
ltsleep(&mapPtr->lock, PRIBIO, "reconupdate", 0, &mapPtr->mutex);
|
||||
ltsleep(&mapPtr->lock, PRIBIO, "reconupdate", 0,
|
||||
&mapPtr->mutex);
|
||||
}
|
||||
mapPtr->lock = 1;
|
||||
RF_UNLOCK_MUTEX(mapPtr->mutex);
|
||||
@ -154,12 +153,51 @@ rf_ReconMapUpdate(RF_Raid_t *raidPtr, RF_ReconMap_t *mapPtr,
|
||||
i = startSector / mapPtr->sectorsPerReconUnit;
|
||||
first_in_RU = i * sectorsPerReconUnit;
|
||||
last_in_RU = first_in_RU + sectorsPerReconUnit - 1;
|
||||
p = mapPtr->status[i];
|
||||
|
||||
/* do we need to move the queue? */
|
||||
while (i > mapPtr->high_ru) {
|
||||
#ifdef DIAGNOSTIC
|
||||
if (mapPtr->status[mapPtr->head]!=RU_ALL) {
|
||||
printf("\nraid%d: reconmap incorrect -- working on i %" PRIu64 "\n",
|
||||
raidPtr->raidid, i);
|
||||
printf("raid%d: ru %" PRIu64 " not completed!!!\n",
|
||||
raidPtr->raidid, mapPtr->head);
|
||||
|
||||
printf("raid%d: low: %" PRIu64 " high: %" PRIu64 "\n",
|
||||
raidPtr->raidid, mapPtr->low_ru, mapPtr->high_ru);
|
||||
|
||||
panic("reconmap incorrect");
|
||||
}
|
||||
#endif
|
||||
mapPtr->low_ru++;
|
||||
mapPtr->high_ru++;
|
||||
/* initialize "highest" RU status entry, which
|
||||
will take over the current head postion */
|
||||
mapPtr->status[mapPtr->head]=RU_NOTHING;
|
||||
|
||||
/* move head too */
|
||||
mapPtr->head++;
|
||||
if (mapPtr->head >= mapPtr->status_size)
|
||||
mapPtr->head = 0;
|
||||
|
||||
}
|
||||
|
||||
ru = i - mapPtr->low_ru + mapPtr->head;
|
||||
if (ru >= mapPtr->status_size)
|
||||
ru = ru - mapPtr->status_size;
|
||||
|
||||
if ((ru < 0) || (ru >= mapPtr->status_size)) {
|
||||
printf("raid%d: ru is bogus %" PRIu64 "%" PRIu64 "%" PRIu64 "%" PRIu64 "%" PRIu64 "\n",
|
||||
raidPtr->raidid, i, ru, mapPtr->head, mapPtr->low_ru, mapPtr->high_ru);
|
||||
panic("bogus ru in reconmap");
|
||||
}
|
||||
|
||||
p = mapPtr->status[ru];
|
||||
if (p != RU_ALL) {
|
||||
if (p == RU_NOTHING || p->startSector > startSector) {
|
||||
/* insert at front of list */
|
||||
|
||||
mapPtr->status[i] = MakeReconMapListElem(mapPtr,startSector, RF_MIN(stopSector, last_in_RU), (p == RU_NOTHING) ? NULL : p);
|
||||
mapPtr->status[ru] = MakeReconMapListElem(mapPtr,startSector, RF_MIN(stopSector, last_in_RU), (p == RU_NOTHING) ? NULL : p);
|
||||
|
||||
} else {/* general case */
|
||||
do { /* search for place to insert */
|
||||
@ -169,11 +207,11 @@ rf_ReconMapUpdate(RF_Raid_t *raidPtr, RF_ReconMap_t *mapPtr,
|
||||
pt->next = MakeReconMapListElem(mapPtr,startSector, RF_MIN(stopSector, last_in_RU), p);
|
||||
|
||||
}
|
||||
compact_stat_entry(raidPtr, mapPtr, i);
|
||||
compact_stat_entry(raidPtr, mapPtr, i, ru);
|
||||
}
|
||||
startSector = RF_MIN(stopSector, last_in_RU) + 1;
|
||||
}
|
||||
RF_LOCK_MUTEX(mapPtr->mutex);
|
||||
RF_LOCK_MUTEX(mapPtr->mutex);
|
||||
mapPtr->lock = 0;
|
||||
wakeup(&mapPtr->lock);
|
||||
RF_UNLOCK_MUTEX(mapPtr->mutex);
|
||||
@ -200,22 +238,23 @@ rf_ReconMapUpdate(RF_Raid_t *raidPtr, RF_ReconMap_t *mapPtr,
|
||||
*-------------------------------------------------------------------------*/
|
||||
|
||||
static void
|
||||
compact_stat_entry(RF_Raid_t *raidPtr, RF_ReconMap_t *mapPtr, int i)
|
||||
compact_stat_entry(RF_Raid_t *raidPtr, RF_ReconMap_t *mapPtr, int i, int j)
|
||||
{
|
||||
RF_SectorCount_t sectorsPerReconUnit = mapPtr->sectorsPerReconUnit;
|
||||
RF_ReconMapListElem_t *p = mapPtr->status[i];
|
||||
RF_ReconMapListElem_t *p = mapPtr->status[j];
|
||||
|
||||
crunch_list(mapPtr, p);
|
||||
|
||||
if ((p->startSector == i * sectorsPerReconUnit) &&
|
||||
(p->stopSector == i * sectorsPerReconUnit +
|
||||
sectorsPerReconUnit - 1)) {
|
||||
mapPtr->status[i] = RU_ALL;
|
||||
mapPtr->status[j] = RU_ALL;
|
||||
mapPtr->unitsLeft--;
|
||||
FreeReconMapListElem(mapPtr, p);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
crunch_list(RF_ReconMap_t *mapPtr, RF_ReconMapListElem_t *listPtr)
|
||||
{
|
||||
@ -283,7 +322,7 @@ rf_FreeReconMap(RF_ReconMap_t *mapPtr)
|
||||
if (mapPtr->sectorsInDisk % mapPtr->sectorsPerReconUnit)
|
||||
numRUs++;
|
||||
|
||||
for (i = 0; i < numRUs; i++) {
|
||||
for (i = 0; i < mapPtr->status_size; i++) {
|
||||
p = mapPtr->status[i];
|
||||
while (p != RU_NOTHING && p != RU_ALL) {
|
||||
q = p;
|
||||
@ -291,8 +330,9 @@ rf_FreeReconMap(RF_ReconMap_t *mapPtr)
|
||||
RF_Free(q, sizeof(*q));
|
||||
}
|
||||
}
|
||||
|
||||
pool_destroy(&mapPtr->elem_pool);
|
||||
RF_Free(mapPtr->status, mapPtr->totalRUs *
|
||||
RF_Free(mapPtr->status, mapPtr->status_size *
|
||||
sizeof(RF_ReconMapListElem_t *));
|
||||
RF_Free(mapPtr, sizeof(RF_ReconMap_t));
|
||||
}
|
||||
@ -305,12 +345,26 @@ rf_FreeReconMap(RF_ReconMap_t *mapPtr)
|
||||
int
|
||||
rf_CheckRUReconstructed(RF_ReconMap_t *mapPtr, RF_SectorNum_t startSector)
|
||||
{
|
||||
RF_ReconMapListElem_t *l; /* used for searching */
|
||||
RF_ReconUnitNum_t i;
|
||||
int rv;
|
||||
|
||||
i = startSector / mapPtr->sectorsPerReconUnit;
|
||||
l = mapPtr->status[i];
|
||||
return ((l == RU_ALL) ? 1 : 0);
|
||||
|
||||
if (i < mapPtr->low_ru)
|
||||
rv = 1;
|
||||
else if (i > mapPtr->high_ru)
|
||||
rv = 0;
|
||||
else {
|
||||
i = i - mapPtr->low_ru + mapPtr->head;
|
||||
if (i >= mapPtr->status_size)
|
||||
i = i - mapPtr->status_size;
|
||||
if (mapPtr->status[i] == RU_ALL)
|
||||
rv = 1;
|
||||
else
|
||||
rv = 0;
|
||||
}
|
||||
|
||||
return rv;
|
||||
}
|
||||
|
||||
RF_ReconUnitCount_t
|
||||
@ -320,44 +374,6 @@ rf_UnitsLeftToReconstruct(RF_ReconMap_t *mapPtr)
|
||||
return (mapPtr->unitsLeft);
|
||||
}
|
||||
|
||||
#if 0
|
||||
static void
|
||||
PrintList(RF_ReconMapListElem_t *listPtr)
|
||||
{
|
||||
while (listPtr) {
|
||||
printf("%d,%d -> ", (int) listPtr->startSector,
|
||||
(int) listPtr->stopSector);
|
||||
listPtr = listPtr->next;
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
void
|
||||
rf_PrintReconMap(RF_Raid_t *raidPtr, RF_ReconMap_t *mapPtr, RF_RowCol_t fcol)
|
||||
{
|
||||
RF_ReconUnitCount_t numRUs;
|
||||
RF_ReconMapListElem_t *p;
|
||||
RF_ReconUnitNum_t i;
|
||||
|
||||
numRUs = mapPtr->totalRUs;
|
||||
if (mapPtr->sectorsInDisk % mapPtr->sectorsPerReconUnit)
|
||||
numRUs++;
|
||||
|
||||
for (i = 0; i < numRUs; i++) {
|
||||
p = mapPtr->status[i];
|
||||
if (p == RU_ALL)/* printf("[%d] ALL\n",i) */
|
||||
;
|
||||
else
|
||||
if (p == RU_NOTHING) {
|
||||
printf("%d: Unreconstructed\n", i);
|
||||
} else {
|
||||
printf("%d: ", i);
|
||||
PrintList(p);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#if RF_DEBUG_RECON
|
||||
void
|
||||
rf_PrintReconSchedule(RF_ReconMap_t *mapPtr, struct timeval *starttime)
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* $NetBSD: rf_reconmap.h,v 1.10 2005/12/11 12:23:37 christos Exp $ */
|
||||
/* $NetBSD: rf_reconmap.h,v 1.11 2008/05/19 19:49:54 oster Exp $ */
|
||||
/*
|
||||
* Copyright (c) 1995 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
@ -38,6 +38,9 @@
|
||||
|
||||
#include "rf_threadstuff.h"
|
||||
|
||||
/* the number of recon units in the status table. */
|
||||
#define RF_RECONMAP_SIZE 32
|
||||
|
||||
/*
|
||||
* Main reconstruction status descriptor.
|
||||
*/
|
||||
@ -49,6 +52,13 @@ struct RF_ReconMap_s {
|
||||
RF_ReconUnitCount_t totalRUs; /* total recon units on disk */
|
||||
RF_ReconUnitCount_t spareRUs; /* total number of spare RUs on failed
|
||||
* disk */
|
||||
RF_ReconUnitCount_t low_ru; /* lowest reconstruction unit number in
|
||||
the status array */
|
||||
RF_ReconUnitCount_t high_ru; /* highest reconstruction unit number
|
||||
in the status array */
|
||||
RF_ReconUnitCount_t head; /* the position in the array where
|
||||
low_ru is found */
|
||||
RF_ReconUnitCount_t status_size; /* number of recon units in status */
|
||||
RF_StripeCount_t totalParityStripes; /* total number of parity
|
||||
* stripes in array */
|
||||
RF_ReconMapListElem_t **status; /* array of ptrs to list elements */
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* $NetBSD: rf_reconstruct.c,v 1.103 2008/04/15 16:05:43 oster Exp $ */
|
||||
/* $NetBSD: rf_reconstruct.c,v 1.104 2008/05/19 19:49:54 oster Exp $ */
|
||||
/*
|
||||
* Copyright (c) 1995 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
@ -33,7 +33,7 @@
|
||||
************************************************************/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__KERNEL_RCSID(0, "$NetBSD: rf_reconstruct.c,v 1.103 2008/04/15 16:05:43 oster Exp $");
|
||||
__KERNEL_RCSID(0, "$NetBSD: rf_reconstruct.c,v 1.104 2008/05/19 19:49:54 oster Exp $");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/time.h>
|
||||
@ -97,6 +97,7 @@ __KERNEL_RCSID(0, "$NetBSD: rf_reconstruct.c,v 1.103 2008/04/15 16:05:43 oster E
|
||||
#define RF_RECON_READ_ERROR 2
|
||||
#define RF_RECON_WRITE_ERROR 3
|
||||
#define RF_RECON_READ_STOPPED 4
|
||||
#define RF_RECON_WRITE_DONE 5
|
||||
|
||||
#define RF_MAX_FREE_RECONBUFFER 32
|
||||
#define RF_MIN_FREE_RECONBUFFER 16
|
||||
@ -568,11 +569,12 @@ rf_ContinueReconstructFailedDisk(RF_RaidReconDesc_t *reconDesc)
|
||||
RF_ReconMap_t *mapPtr;
|
||||
RF_ReconCtrl_t *tmp_reconctrl;
|
||||
RF_ReconEvent_t *event;
|
||||
RF_CallbackDesc_t *p;
|
||||
RF_StripeCount_t incPSID,lastPSID,num_writes,pending_writes,prev;
|
||||
RF_ReconUnitCount_t RUsPerPU;
|
||||
struct timeval etime, elpsd;
|
||||
unsigned long xor_s, xor_resid_us;
|
||||
int i, ds;
|
||||
int status;
|
||||
int status, done;
|
||||
int recon_error, write_error;
|
||||
|
||||
raidPtr->accumXorTimeUs = 0;
|
||||
@ -608,92 +610,139 @@ rf_ContinueReconstructFailedDisk(RF_RaidReconDesc_t *reconDesc)
|
||||
|
||||
RF_GETTIME(raidPtr->reconControl->starttime);
|
||||
|
||||
/* now start up the actual reconstruction: issue a read for
|
||||
* each surviving disk */
|
||||
|
||||
reconDesc->numDisksDone = 0;
|
||||
for (i = 0; i < raidPtr->numCol; i++) {
|
||||
if (i != col) {
|
||||
/* find and issue the next I/O on the
|
||||
* indicated disk */
|
||||
if (IssueNextReadRequest(raidPtr, i)) {
|
||||
Dprintf1("RECON: done issuing for c%d\n", i);
|
||||
reconDesc->numDisksDone++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Dprintf("RECON: resume requests\n");
|
||||
rf_ResumeNewRequests(raidPtr);
|
||||
|
||||
/* process reconstruction events until all disks report that
|
||||
* they've completed all work */
|
||||
|
||||
mapPtr = raidPtr->reconControl->reconMap;
|
||||
|
||||
incPSID = RF_RECONMAP_SIZE;
|
||||
lastPSID = raidPtr->Layout.numStripe / raidPtr->Layout.SUsPerPU;
|
||||
RUsPerPU = raidPtr->Layout.SUsPerPU / raidPtr->Layout.SUsPerRU;
|
||||
recon_error = 0;
|
||||
write_error = 0;
|
||||
pending_writes = incPSID;
|
||||
raidPtr->reconControl->lastPSID = incPSID;
|
||||
|
||||
while (reconDesc->numDisksDone < raidPtr->numCol - 1) {
|
||||
/* start the actual reconstruction */
|
||||
|
||||
event = rf_GetNextReconEvent(reconDesc);
|
||||
status = ProcessReconEvent(raidPtr, event);
|
||||
|
||||
/* the normal case is that a read completes, and all is well. */
|
||||
if (status == RF_RECON_DONE_READS) {
|
||||
reconDesc->numDisksDone++;
|
||||
} else if ((status == RF_RECON_READ_ERROR) ||
|
||||
(status == RF_RECON_WRITE_ERROR)) {
|
||||
/* an error was encountered while reconstructing...
|
||||
Pretend we've finished this disk.
|
||||
*/
|
||||
recon_error = 1;
|
||||
raidPtr->reconControl->error = 1;
|
||||
|
||||
/* bump the numDisksDone count for reads,
|
||||
but not for writes */
|
||||
if (status == RF_RECON_READ_ERROR)
|
||||
reconDesc->numDisksDone++;
|
||||
|
||||
/* write errors are special -- when we are
|
||||
done dealing with the reads that are
|
||||
finished, we don't want to wait for any
|
||||
writes */
|
||||
if (status == RF_RECON_WRITE_ERROR)
|
||||
write_error = 1;
|
||||
|
||||
} else if (status == RF_RECON_READ_STOPPED) {
|
||||
/* count this component as being "done" */
|
||||
reconDesc->numDisksDone++;
|
||||
}
|
||||
|
||||
if (recon_error) {
|
||||
|
||||
/* make sure any stragglers are woken up so that
|
||||
their theads will complete, and we can get out
|
||||
of here with all IO processed */
|
||||
|
||||
while (raidPtr->reconControl->headSepCBList) {
|
||||
p = raidPtr->reconControl->headSepCBList;
|
||||
raidPtr->reconControl->headSepCBList = p->next;
|
||||
p->next = NULL;
|
||||
rf_CauseReconEvent(raidPtr, p->col, NULL, RF_REVENT_HEADSEPCLEAR);
|
||||
rf_FreeCallbackDesc(p);
|
||||
done = 0;
|
||||
while (!done) {
|
||||
|
||||
num_writes = 0;
|
||||
|
||||
/* issue a read for each surviving disk */
|
||||
|
||||
reconDesc->numDisksDone = 0;
|
||||
for (i = 0; i < raidPtr->numCol; i++) {
|
||||
if (i != col) {
|
||||
/* find and issue the next I/O on the
|
||||
* indicated disk */
|
||||
if (IssueNextReadRequest(raidPtr, i)) {
|
||||
Dprintf1("RECON: done issuing for c%d\n", i);
|
||||
reconDesc->numDisksDone++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
raidPtr->reconControl->numRUsTotal =
|
||||
mapPtr->totalRUs;
|
||||
raidPtr->reconControl->numRUsComplete =
|
||||
mapPtr->totalRUs -
|
||||
rf_UnitsLeftToReconstruct(mapPtr);
|
||||
/* process reconstruction events until all disks report that
|
||||
* they've completed all work */
|
||||
|
||||
while (reconDesc->numDisksDone < raidPtr->numCol - 1) {
|
||||
|
||||
event = rf_GetNextReconEvent(reconDesc);
|
||||
status = ProcessReconEvent(raidPtr, event);
|
||||
|
||||
/* the normal case is that a read completes, and all is well. */
|
||||
if (status == RF_RECON_DONE_READS) {
|
||||
reconDesc->numDisksDone++;
|
||||
} else if ((status == RF_RECON_READ_ERROR) ||
|
||||
(status == RF_RECON_WRITE_ERROR)) {
|
||||
/* an error was encountered while reconstructing...
|
||||
Pretend we've finished this disk.
|
||||
*/
|
||||
recon_error = 1;
|
||||
raidPtr->reconControl->error = 1;
|
||||
|
||||
/* bump the numDisksDone count for reads,
|
||||
but not for writes */
|
||||
if (status == RF_RECON_READ_ERROR)
|
||||
reconDesc->numDisksDone++;
|
||||
|
||||
/* write errors are special -- when we are
|
||||
done dealing with the reads that are
|
||||
finished, we don't want to wait for any
|
||||
writes */
|
||||
if (status == RF_RECON_WRITE_ERROR)
|
||||
write_error = 1;
|
||||
|
||||
} else if (status == RF_RECON_READ_STOPPED) {
|
||||
/* count this component as being "done" */
|
||||
reconDesc->numDisksDone++;
|
||||
} else if (status == RF_RECON_WRITE_DONE) {
|
||||
num_writes++;
|
||||
}
|
||||
|
||||
if (recon_error) {
|
||||
/* make sure any stragglers are woken up so that
|
||||
their theads will complete, and we can get out
|
||||
of here with all IO processed */
|
||||
|
||||
rf_WakeupHeadSepCBWaiters(raidPtr);
|
||||
}
|
||||
|
||||
raidPtr->reconControl->numRUsTotal =
|
||||
mapPtr->totalRUs;
|
||||
raidPtr->reconControl->numRUsComplete =
|
||||
mapPtr->totalRUs -
|
||||
rf_UnitsLeftToReconstruct(mapPtr);
|
||||
|
||||
#if RF_DEBUG_RECON
|
||||
raidPtr->reconControl->percentComplete =
|
||||
(raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal);
|
||||
if (rf_prReconSched) {
|
||||
rf_PrintReconSchedule(raidPtr->reconControl->reconMap, &(raidPtr->reconControl->starttime));
|
||||
}
|
||||
raidPtr->reconControl->percentComplete =
|
||||
(raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal);
|
||||
if (rf_prReconSched) {
|
||||
rf_PrintReconSchedule(raidPtr->reconControl->reconMap, &(raidPtr->reconControl->starttime));
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/* reads done, wakup any waiters, and then wait for writes */
|
||||
|
||||
rf_WakeupHeadSepCBWaiters(raidPtr);
|
||||
|
||||
while (!recon_error && (num_writes < pending_writes)) {
|
||||
event = rf_GetNextReconEvent(reconDesc);
|
||||
status = ProcessReconEvent(raidPtr, event);
|
||||
|
||||
if (status == RF_RECON_WRITE_ERROR) {
|
||||
recon_error = 1;
|
||||
raidPtr->reconControl->error = 1;
|
||||
/* an error was encountered at the very end... bail */
|
||||
} else if (status == RF_RECON_WRITE_DONE) {
|
||||
num_writes++;
|
||||
}
|
||||
}
|
||||
if (recon_error ||
|
||||
(raidPtr->reconControl->lastPSID == lastPSID)) {
|
||||
done = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
prev = raidPtr->reconControl->lastPSID;
|
||||
raidPtr->reconControl->lastPSID += incPSID;
|
||||
|
||||
if (raidPtr->reconControl->lastPSID > lastPSID) {
|
||||
pending_writes = lastPSID - prev;
|
||||
raidPtr->reconControl->lastPSID = lastPSID;
|
||||
}
|
||||
|
||||
/* back down curPSID to get ready for the next round... */
|
||||
for (i = 0; i < raidPtr->numCol; i++) {
|
||||
if (i != col) {
|
||||
raidPtr->reconControl->perDiskInfo[i].curPSID--;
|
||||
raidPtr->reconControl->perDiskInfo[i].ru_count = RUsPerPU - 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
mapPtr = raidPtr->reconControl->reconMap;
|
||||
@ -862,6 +911,7 @@ ProcessReconEvent(RF_Raid_t *raidPtr, RF_ReconEvent_t *event)
|
||||
retcode = RF_RECON_READ_STOPPED;
|
||||
|
||||
Dprintf1("RECON: ProcessReconEvent type %d\n", event->type);
|
||||
|
||||
switch (event->type) {
|
||||
|
||||
/* a read I/O has completed */
|
||||
@ -924,7 +974,7 @@ ProcessReconEvent(RF_Raid_t *raidPtr, RF_ReconEvent_t *event)
|
||||
rf_FreeReconBuffer(rbuf);
|
||||
else
|
||||
RF_ASSERT(0);
|
||||
retcode = 0;
|
||||
retcode = RF_RECON_WRITE_DONE;
|
||||
break;
|
||||
|
||||
case RF_REVENT_BUFCLEAR: /* A buffer-stall condition has been
|
||||
@ -1813,3 +1863,32 @@ out:
|
||||
RF_UNLOCK_PSS_MUTEX(raidPtr, psid);
|
||||
return (0);
|
||||
}
|
||||
|
||||
void
|
||||
rf_WakeupHeadSepCBWaiters(RF_Raid_t *raidPtr)
|
||||
{
|
||||
RF_CallbackDesc_t *p;
|
||||
|
||||
RF_LOCK_MUTEX(raidPtr->reconControl->rb_mutex);
|
||||
while(raidPtr->reconControl->rb_lock) {
|
||||
ltsleep(&raidPtr->reconControl->rb_lock, PRIBIO,
|
||||
"rf_wakeuphscbw", 0, &raidPtr->reconControl->rb_mutex);
|
||||
}
|
||||
|
||||
raidPtr->reconControl->rb_lock = 1;
|
||||
RF_UNLOCK_MUTEX(raidPtr->reconControl->rb_mutex);
|
||||
|
||||
while (raidPtr->reconControl->headSepCBList) {
|
||||
p = raidPtr->reconControl->headSepCBList;
|
||||
raidPtr->reconControl->headSepCBList = p->next;
|
||||
p->next = NULL;
|
||||
rf_CauseReconEvent(raidPtr, p->col, NULL, RF_REVENT_HEADSEPCLEAR);
|
||||
rf_FreeCallbackDesc(p);
|
||||
}
|
||||
RF_LOCK_MUTEX(raidPtr->reconControl->rb_mutex);
|
||||
raidPtr->reconControl->rb_lock = 0;
|
||||
wakeup(&raidPtr->reconControl->rb_lock);
|
||||
RF_UNLOCK_MUTEX(raidPtr->reconControl->rb_mutex);
|
||||
|
||||
}
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* $NetBSD: rf_reconstruct.h,v 1.23 2007/03/04 06:02:39 christos Exp $ */
|
||||
/* $NetBSD: rf_reconstruct.h,v 1.24 2008/05/19 19:49:55 oster Exp $ */
|
||||
/*
|
||||
* Copyright (c) 1995 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
@ -184,6 +184,7 @@ int rf_ForceOrBlockRecon(RF_Raid_t *, RF_AccessStripeMap_t *,
|
||||
void (*cbFunc) (RF_Raid_t *, void *),
|
||||
void *);
|
||||
int rf_UnblockRecon(RF_Raid_t *, RF_AccessStripeMap_t *);
|
||||
void rf_WakeupHeadSepCBWaiters(RF_Raid_t *);
|
||||
|
||||
extern struct pool rf_reconbuffer_pool;
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* $NetBSD: rf_revent.c,v 1.24 2006/11/16 01:33:23 christos Exp $ */
|
||||
/* $NetBSD: rf_revent.c,v 1.25 2008/05/19 19:49:55 oster Exp $ */
|
||||
/*
|
||||
* Copyright (c) 1995 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
@ -30,7 +30,7 @@
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__KERNEL_RCSID(0, "$NetBSD: rf_revent.c,v 1.24 2006/11/16 01:33:23 christos Exp $");
|
||||
__KERNEL_RCSID(0, "$NetBSD: rf_revent.c,v 1.25 2008/05/19 19:49:55 oster Exp $");
|
||||
|
||||
#include <sys/errno.h>
|
||||
|
||||
@ -43,6 +43,7 @@ __KERNEL_RCSID(0, "$NetBSD: rf_revent.c,v 1.24 2006/11/16 01:33:23 christos Exp
|
||||
|
||||
#define RF_MAX_FREE_REVENT 128
|
||||
#define RF_MIN_FREE_REVENT 32
|
||||
#define RF_EVENTQ_WAIT 5000
|
||||
|
||||
#include <sys/proc.h>
|
||||
#include <sys/kernel.h>
|
||||
@ -78,6 +79,7 @@ rf_GetNextReconEvent(RF_RaidReconDesc_t *reconDesc)
|
||||
RF_Raid_t *raidPtr = reconDesc->raidPtr;
|
||||
RF_ReconCtrl_t *rctrl = raidPtr->reconControl;
|
||||
RF_ReconEvent_t *event;
|
||||
int stall_count;
|
||||
|
||||
RF_LOCK_MUTEX(rctrl->eq_mutex);
|
||||
/* q null and count==0 must be equivalent conditions */
|
||||
@ -119,14 +121,25 @@ rf_GetNextReconEvent(RF_RaidReconDesc_t *reconDesc)
|
||||
reconDesc->reconExecTicks = 0;
|
||||
}
|
||||
}
|
||||
|
||||
stall_count = 0;
|
||||
while (!rctrl->eventQueue) {
|
||||
#if RF_RECON_STATS > 0
|
||||
reconDesc->numReconEventWaits++;
|
||||
#endif /* RF_RECON_STATS > 0 */
|
||||
|
||||
ltsleep(&(rctrl)->eventQueue, PRIBIO, "raidframe eventq",
|
||||
0, &((rctrl)->eq_mutex));
|
||||
RF_EVENTQ_WAIT, &((rctrl)->eq_mutex));
|
||||
|
||||
stall_count++;
|
||||
|
||||
if ((stall_count > 10) &&
|
||||
rctrl->headSepCBList) {
|
||||
/* There is work to do on the callback list, and
|
||||
we've waited long enough... */
|
||||
rf_WakeupHeadSepCBWaiters(raidPtr);
|
||||
stall_count = 0;
|
||||
}
|
||||
reconDesc->reconExecTicks = 0; /* we've just waited */
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user