NetBSD/sys/dev/raidframe/rf_disks.c
mrg 1812d0a239 apply my patch to support non-512K sector disks (at least, upto 16KB
sector disks..)  from my tech-kern post:


the following patch let's me access both 512 byte and 4K
sector disks at the same time, as long as they are in
separate raids.  the existing rf code assumes/enforces
this part, i just made it support other sets concurrently.

the main change is moving the parity bitmap to the sector
after the component label sector(s), instead of being
immediately after the label, which meant it was on the same
sector as the label for >1024 byte devices.

i'm a little annoyed at having to add a 2nd call to
getdisksize() to enable auto-configure to work, but i
don't see another way that wasn't much uglier.
2010-12-04 10:01:16 +00:00

1125 lines
30 KiB
C

/* $NetBSD: rf_disks.c,v 1.76 2010/12/04 10:01:16 mrg Exp $ */
/*-
* Copyright (c) 1999 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Greg Oster
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
*
* Author: Mark Holland
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/***************************************************************
* rf_disks.c -- code to perform operations on the actual disks
***************************************************************/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: rf_disks.c,v 1.76 2010/12/04 10:01:16 mrg Exp $");
#include <dev/raidframe/raidframevar.h>
#include "rf_raid.h"
#include "rf_alloclist.h"
#include "rf_utils.h"
#include "rf_general.h"
#include "rf_options.h"
#include "rf_kintf.h"
#include "rf_netbsd.h"
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/ioctl.h>
#include <sys/fcntl.h>
#include <sys/vnode.h>
#include <sys/namei.h> /* for pathbuf */
#include <sys/kauth.h>
static int rf_AllocDiskStructures(RF_Raid_t *, RF_Config_t *);
static void rf_print_label_status( RF_Raid_t *, int, char *,
RF_ComponentLabel_t *);
static int rf_check_label_vitals( RF_Raid_t *, int, int, char *,
RF_ComponentLabel_t *, int, int );
#define DPRINTF6(a,b,c,d,e,f) if (rf_diskDebug) printf(a,b,c,d,e,f)
#define DPRINTF7(a,b,c,d,e,f,g) if (rf_diskDebug) printf(a,b,c,d,e,f,g)
/**************************************************************************
*
* initialize the disks comprising the array
*
* We want the spare disks to have regular row,col numbers so that we can
* easily substitue a spare for a failed disk. But, the driver code assumes
* throughout that the array contains numRow by numCol _non-spare_ disks, so
* it's not clear how to fit in the spares. This is an unfortunate holdover
* from raidSim. The quick and dirty fix is to make row zero bigger than the
* rest, and put all the spares in it. This probably needs to get changed
* eventually.
*
**************************************************************************/
int
rf_ConfigureDisks(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
RF_Config_t *cfgPtr)
{
RF_RaidDisk_t *disks;
RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
RF_RowCol_t c;
int bs, ret;
unsigned i, count, foundone = 0, numFailuresThisRow;
int force;
force = cfgPtr->force;
ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
if (ret)
goto fail;
disks = raidPtr->Disks;
numFailuresThisRow = 0;
for (c = 0; c < raidPtr->numCol; c++) {
ret = rf_ConfigureDisk(raidPtr,
&cfgPtr->devnames[0][c][0],
&disks[c], c);
if (ret)
goto fail;
if (disks[c].status == rf_ds_optimal) {
ret = raidfetch_component_label(raidPtr, c);
if (ret)
goto fail;
}
if (disks[c].status != rf_ds_optimal) {
numFailuresThisRow++;
} else {
if (disks[c].numBlocks < min_numblks)
min_numblks = disks[c].numBlocks;
DPRINTF6("Disk at col %d: dev %s numBlocks %" PRIu64 " blockSize %d (%ld MB)\n",
c, disks[c].devname,
disks[c].numBlocks,
disks[c].blockSize,
(long int) disks[c].numBlocks *
disks[c].blockSize / 1024 / 1024);
}
}
/* XXX fix for n-fault tolerant */
/* XXX this should probably check to see how many failures
we can handle for this configuration! */
if (numFailuresThisRow > 0)
raidPtr->status = rf_rs_degraded;
/* all disks must be the same size & have the same block size, bs must
* be a power of 2 */
bs = 0;
foundone = 0;
for (c = 0; c < raidPtr->numCol; c++) {
if (disks[c].status == rf_ds_optimal) {
bs = disks[c].blockSize;
foundone = 1;
break;
}
}
if (!foundone) {
RF_ERRORMSG("RAIDFRAME: Did not find any live disks in the array.\n");
ret = EINVAL;
goto fail;
}
for (count = 0, i = 1; i; i <<= 1)
if (bs & i)
count++;
if (count != 1) {
RF_ERRORMSG1("Error: block size on disks (%d) must be a power of 2\n", bs);
ret = EINVAL;
goto fail;
}
if (rf_CheckLabels( raidPtr, cfgPtr )) {
printf("raid%d: There were fatal errors\n", raidPtr->raidid);
if (force != 0) {
printf("raid%d: Fatal errors being ignored.\n",
raidPtr->raidid);
} else {
ret = EINVAL;
goto fail;
}
}
for (c = 0; c < raidPtr->numCol; c++) {
if (disks[c].status == rf_ds_optimal) {
if (disks[c].blockSize != bs) {
RF_ERRORMSG1("Error: block size of disk at c %d different from disk at c 0\n", c);
ret = EINVAL;
goto fail;
}
if (disks[c].numBlocks != min_numblks) {
RF_ERRORMSG2("WARNING: truncating disk at c %d to %d blocks\n",
c, (int) min_numblks);
disks[c].numBlocks = min_numblks;
}
}
}
raidPtr->sectorsPerDisk = min_numblks;
raidPtr->logBytesPerSector = ffs(bs) - 1;
raidPtr->bytesPerSector = bs;
raidPtr->sectorMask = bs - 1;
return (0);
fail:
rf_UnconfigureVnodes( raidPtr );
return (ret);
}
/****************************************************************************
* set up the data structures describing the spare disks in the array
* recall from the above comment that the spare disk descriptors are stored
* in row zero, which is specially expanded to hold them.
****************************************************************************/
int
rf_ConfigureSpareDisks(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
RF_Config_t *cfgPtr)
{
int i, ret;
unsigned int bs;
RF_RaidDisk_t *disks;
int num_spares_done;
num_spares_done = 0;
/* The space for the spares should have already been allocated by
* ConfigureDisks() */
disks = &raidPtr->Disks[raidPtr->numCol];
for (i = 0; i < raidPtr->numSpare; i++) {
ret = rf_ConfigureDisk(raidPtr, &cfgPtr->spare_names[i][0],
&disks[i], raidPtr->numCol + i);
if (ret)
goto fail;
if (disks[i].status != rf_ds_optimal) {
RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
&cfgPtr->spare_names[i][0]);
} else {
disks[i].status = rf_ds_spare; /* change status to
* spare */
DPRINTF6("Spare Disk %d: dev %s numBlocks %" PRIu64 " blockSize %d (%ld MB)\n", i,
disks[i].devname,
disks[i].numBlocks, disks[i].blockSize,
(long int) disks[i].numBlocks *
disks[i].blockSize / 1024 / 1024);
}
num_spares_done++;
}
/* check sizes and block sizes on spare disks */
bs = 1 << raidPtr->logBytesPerSector;
for (i = 0; i < raidPtr->numSpare; i++) {
if (disks[i].blockSize != bs) {
RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[i].blockSize, disks[i].devname, bs);
ret = EINVAL;
goto fail;
}
if (disks[i].numBlocks < raidPtr->sectorsPerDisk) {
RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %" PRIu64 " blocks)\n",
disks[i].devname, disks[i].blockSize,
raidPtr->sectorsPerDisk);
ret = EINVAL;
goto fail;
} else
if (disks[i].numBlocks > raidPtr->sectorsPerDisk) {
RF_ERRORMSG3("Warning: truncating spare disk %s to %" PRIu64 " blocks (from %" PRIu64 ")\n",
disks[i].devname,
raidPtr->sectorsPerDisk,
disks[i].numBlocks);
disks[i].numBlocks = raidPtr->sectorsPerDisk;
}
}
return (0);
fail:
/* Release the hold on the main components. We've failed to allocate
* a spare, and since we're failing, we need to free things..
XXX failing to allocate a spare is *not* that big of a deal...
We *can* survive without it, if need be, esp. if we get hot
adding working.
If we don't fail out here, then we need a way to remove this spare...
that should be easier to do here than if we are "live"...
*/
rf_UnconfigureVnodes( raidPtr );
return (ret);
}
static int
rf_AllocDiskStructures(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr)
{
int ret;
/* We allocate RF_MAXSPARE on the first row so that we
have room to do hot-swapping of spares */
RF_MallocAndAdd(raidPtr->Disks, (raidPtr->numCol + RF_MAXSPARE) *
sizeof(RF_RaidDisk_t), (RF_RaidDisk_t *),
raidPtr->cleanupList);
if (raidPtr->Disks == NULL) {
ret = ENOMEM;
goto fail;
}
/* get space for device specific stuff.. */
RF_MallocAndAdd(raidPtr->raid_cinfo,
(raidPtr->numCol + RF_MAXSPARE) *
sizeof(struct raidcinfo), (struct raidcinfo *),
raidPtr->cleanupList);
if (raidPtr->raid_cinfo == NULL) {
ret = ENOMEM;
goto fail;
}
return(0);
fail:
rf_UnconfigureVnodes( raidPtr );
return(ret);
}
/* configure a single disk during auto-configuration at boot */
int
rf_AutoConfigureDisks(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr,
RF_AutoConfig_t *auto_config)
{
RF_RaidDisk_t *disks;
RF_RaidDisk_t *diskPtr;
RF_RowCol_t c;
RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
int bs, ret;
int numFailuresThisRow;
RF_AutoConfig_t *ac;
int parity_good;
int mod_counter;
int mod_counter_found;
#if DEBUG
printf("Starting autoconfiguration of RAID set...\n");
#endif
ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
if (ret)
goto fail;
disks = raidPtr->Disks;
/* assume the parity will be fine.. */
parity_good = RF_RAID_CLEAN;
/* Check for mod_counters that are too low */
mod_counter_found = 0;
mod_counter = 0;
ac = auto_config;
while(ac!=NULL) {
if (mod_counter_found==0) {
mod_counter = ac->clabel->mod_counter;
mod_counter_found = 1;
} else {
if (ac->clabel->mod_counter > mod_counter) {
mod_counter = ac->clabel->mod_counter;
}
}
ac->flag = 0; /* clear the general purpose flag */
ac = ac->next;
}
bs = 0;
numFailuresThisRow = 0;
for (c = 0; c < raidPtr->numCol; c++) {
diskPtr = &disks[c];
/* find this row/col in the autoconfig */
#if DEBUG
printf("Looking for %d in autoconfig\n",c);
#endif
ac = auto_config;
while(ac!=NULL) {
if (ac->clabel==NULL) {
/* big-time bad news. */
goto fail;
}
if ((ac->clabel->column == c) &&
(ac->clabel->mod_counter == mod_counter)) {
/* it's this one... */
/* flag it as 'used', so we don't
free it later. */
ac->flag = 1;
#if DEBUG
printf("Found: %s at %d\n",
ac->devname,c);
#endif
break;
}
ac=ac->next;
}
if (ac==NULL) {
/* we didn't find an exact match with a
correct mod_counter above... can we find
one with an incorrect mod_counter to use
instead? (this one, if we find it, will be
marked as failed once the set configures)
*/
ac = auto_config;
while(ac!=NULL) {
if (ac->clabel==NULL) {
/* big-time bad news. */
goto fail;
}
if (ac->clabel->column == c) {
/* it's this one...
flag it as 'used', so we
don't free it later. */
ac->flag = 1;
#if DEBUG
printf("Found(low mod_counter): %s at %d\n",
ac->devname,c);
#endif
break;
}
ac=ac->next;
}
}
if (ac!=NULL) {
/* Found it. Configure it.. */
diskPtr->blockSize = ac->clabel->blockSize;
diskPtr->numBlocks = ac->clabel->numBlocks;
diskPtr->numBlocks |=
(uint64_t)ac->clabel->numBlocksHi << 32;
/* Note: rf_protectedSectors is already
factored into numBlocks here */
raidPtr->raid_cinfo[c].ci_vp = ac->vp;
raidPtr->raid_cinfo[c].ci_dev = ac->dev;
memcpy(raidget_component_label(raidPtr, c),
ac->clabel, sizeof(*ac->clabel));
snprintf(diskPtr->devname, sizeof(diskPtr->devname),
"/dev/%s", ac->devname);
/* note the fact that this component was
autoconfigured. You'll need this info
later. Trust me :) */
diskPtr->auto_configured = 1;
diskPtr->dev = ac->dev;
/*
* we allow the user to specify that
* only a fraction of the disks should
* be used this is just for debug: it
* speeds up the parity scan
*/
diskPtr->numBlocks = diskPtr->numBlocks *
rf_sizePercentage / 100;
/* XXX these will get set multiple times,
but since we're autoconfiguring, they'd
better be always the same each time!
If not, this is the least of your worries */
bs = diskPtr->blockSize;
min_numblks = diskPtr->numBlocks;
/* this gets done multiple times, but that's
fine -- the serial number will be the same
for all components, guaranteed */
raidPtr->serial_number = ac->clabel->serial_number;
/* check the last time the label was modified */
if (ac->clabel->mod_counter != mod_counter) {
/* Even though we've filled in all of
the above, we don't trust this
component since it's modification
counter is not in sync with the
rest, and we really consider it to
be failed. */
disks[c].status = rf_ds_failed;
numFailuresThisRow++;
} else {
if (ac->clabel->clean != RF_RAID_CLEAN) {
parity_good = RF_RAID_DIRTY;
}
}
} else {
/* Didn't find it at all!! Component must
really be dead */
disks[c].status = rf_ds_failed;
snprintf(disks[c].devname, sizeof(disks[c].devname),
"component%d", c);
numFailuresThisRow++;
}
}
/* XXX fix for n-fault tolerant */
/* XXX this should probably check to see how many failures
we can handle for this configuration! */
if (numFailuresThisRow > 0) {
raidPtr->status = rf_rs_degraded;
raidPtr->numFailures = numFailuresThisRow;
}
/* close the device for the ones that didn't get used */
ac = auto_config;
while(ac!=NULL) {
if (ac->flag == 0) {
vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
VOP_CLOSE(ac->vp, FREAD | FWRITE, NOCRED);
vput(ac->vp);
ac->vp = NULL;
#if DEBUG
printf("Released %s from auto-config set.\n",
ac->devname);
#endif
}
ac = ac->next;
}
raidPtr->mod_counter = mod_counter;
/* note the state of the parity, if any */
raidPtr->parity_good = parity_good;
raidPtr->sectorsPerDisk = min_numblks;
raidPtr->logBytesPerSector = ffs(bs) - 1;
raidPtr->bytesPerSector = bs;
raidPtr->sectorMask = bs - 1;
return (0);
fail:
rf_UnconfigureVnodes( raidPtr );
return (ret);
}
/* configure a single disk in the array */
int
rf_ConfigureDisk(RF_Raid_t *raidPtr, char *bf, RF_RaidDisk_t *diskPtr,
RF_RowCol_t col)
{
char *p;
struct pathbuf *pb;
struct vnode *vp;
struct vattr va;
int error;
p = rf_find_non_white(bf);
if (p[strlen(p) - 1] == '\n') {
/* strip off the newline */
p[strlen(p) - 1] = '\0';
}
(void) strcpy(diskPtr->devname, p);
/* Let's start by claiming the component is fine and well... */
diskPtr->status = rf_ds_optimal;
raidPtr->raid_cinfo[col].ci_vp = NULL;
raidPtr->raid_cinfo[col].ci_dev = 0;
if (!strcmp("absent", diskPtr->devname)) {
printf("Ignoring missing component at column %d\n", col);
sprintf(diskPtr->devname, "component%d", col);
diskPtr->status = rf_ds_failed;
return (0);
}
pb = pathbuf_create(diskPtr->devname);
if (pb == NULL) {
printf("pathbuf_create for device: %s failed!\n",
diskPtr->devname);
return ENOMEM;
}
error = dk_lookup(pb, curlwp, &vp);
pathbuf_destroy(pb);
if (error) {
printf("dk_lookup on device: %s failed!\n", diskPtr->devname);
if (error == ENXIO) {
/* the component isn't there... must be dead :-( */
diskPtr->status = rf_ds_failed;
return 0;
} else {
return (error);
}
}
if ((error = rf_getdisksize(vp, curlwp, diskPtr)) != 0)
return (error);
/*
* If this raidPtr's bytesPerSector is zero, fill it in with this
* components blockSize. This will give us something to work with
* initially, and if it is wrong, we'll get errors later.
*/
if (raidPtr->bytesPerSector == 0)
raidPtr->bytesPerSector = diskPtr->blockSize;
if (diskPtr->status == rf_ds_optimal) {
if ((error = VOP_GETATTR(vp, &va, curlwp->l_cred)) != 0)
return (error);
raidPtr->raid_cinfo[col].ci_vp = vp;
raidPtr->raid_cinfo[col].ci_dev = va.va_rdev;
/* This component was not automatically configured */
diskPtr->auto_configured = 0;
diskPtr->dev = va.va_rdev;
/* we allow the user to specify that only a fraction of the
* disks should be used this is just for debug: it speeds up
* the parity scan */
diskPtr->numBlocks = diskPtr->numBlocks *
rf_sizePercentage / 100;
}
return (0);
}
static void
rf_print_label_status(RF_Raid_t *raidPtr, int column, char *dev_name,
RF_ComponentLabel_t *ci_label)
{
printf("raid%d: Component %s being configured at col: %d\n",
raidPtr->raidid, dev_name, column );
printf(" Column: %d Num Columns: %d\n",
ci_label->column,
ci_label->num_columns);
printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
ci_label->version, ci_label->serial_number,
ci_label->mod_counter);
printf(" Clean: %s Status: %d\n",
ci_label->clean ? "Yes" : "No", ci_label->status );
}
static int rf_check_label_vitals(RF_Raid_t *raidPtr, int row, int column,
char *dev_name, RF_ComponentLabel_t *ci_label,
int serial_number, int mod_counter)
{
int fatal_error = 0;
if (serial_number != ci_label->serial_number) {
printf("%s has a different serial number: %d %d\n",
dev_name, serial_number, ci_label->serial_number);
fatal_error = 1;
}
if (mod_counter != ci_label->mod_counter) {
printf("%s has a different modification count: %d %d\n",
dev_name, mod_counter, ci_label->mod_counter);
}
if (row != ci_label->row) {
printf("Row out of alignment for: %s\n", dev_name);
fatal_error = 1;
}
if (column != ci_label->column) {
printf("Column out of alignment for: %s\n", dev_name);
fatal_error = 1;
}
if (raidPtr->numCol != ci_label->num_columns) {
printf("Number of columns do not match for: %s\n", dev_name);
fatal_error = 1;
}
if (ci_label->clean == 0) {
/* it's not clean, but that's not fatal */
printf("%s is not clean!\n", dev_name);
}
return(fatal_error);
}
/*
rf_CheckLabels() - check all the component labels for consistency.
Return an error if there is anything major amiss.
*/
int
rf_CheckLabels(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr)
{
int c;
char *dev_name;
RF_ComponentLabel_t *ci_label;
int serial_number = 0;
int mod_number = 0;
int fatal_error = 0;
int mod_values[4];
int mod_count[4];
int ser_values[4];
int ser_count[4];
int num_ser;
int num_mod;
int i;
int found;
int hosed_column;
int too_fatal;
int parity_good;
int force;
hosed_column = -1;
too_fatal = 0;
force = cfgPtr->force;
/*
We're going to try to be a little intelligent here. If one
component's label is bogus, and we can identify that it's the
*only* one that's gone, we'll mark it as "failed" and allow
the configuration to proceed. This will be the *only* case
that we'll proceed if there would be (otherwise) fatal errors.
Basically we simply keep a count of how many components had
what serial number. If all but one agree, we simply mark
the disagreeing component as being failed, and allow
things to come up "normally".
We do this first for serial numbers, and then for "mod_counter".
*/
num_ser = 0;
num_mod = 0;
for (c = 0; c < raidPtr->numCol; c++) {
ci_label = raidget_component_label(raidPtr, c);
found=0;
for(i=0;i<num_ser;i++) {
if (ser_values[i] == ci_label->serial_number) {
ser_count[i]++;
found=1;
break;
}
}
if (!found) {
ser_values[num_ser] = ci_label->serial_number;
ser_count[num_ser] = 1;
num_ser++;
if (num_ser>2) {
fatal_error = 1;
break;
}
}
found=0;
for(i=0;i<num_mod;i++) {
if (mod_values[i] == ci_label->mod_counter) {
mod_count[i]++;
found=1;
break;
}
}
if (!found) {
mod_values[num_mod] = ci_label->mod_counter;
mod_count[num_mod] = 1;
num_mod++;
if (num_mod>2) {
fatal_error = 1;
break;
}
}
}
#if DEBUG
printf("raid%d: Summary of serial numbers:\n", raidPtr->raidid);
for(i=0;i<num_ser;i++) {
printf("%d %d\n", ser_values[i], ser_count[i]);
}
printf("raid%d: Summary of mod counters:\n", raidPtr->raidid);
for(i=0;i<num_mod;i++) {
printf("%d %d\n", mod_values[i], mod_count[i]);
}
#endif
serial_number = ser_values[0];
if (num_ser == 2) {
if ((ser_count[0] == 1) || (ser_count[1] == 1)) {
/* Locate the maverick component */
if (ser_count[1] > ser_count[0]) {
serial_number = ser_values[1];
}
for (c = 0; c < raidPtr->numCol; c++) {
ci_label = raidget_component_label(raidPtr, c);
if (serial_number != ci_label->serial_number) {
hosed_column = c;
break;
}
}
printf("Hosed component: %s\n",
&cfgPtr->devnames[0][hosed_column][0]);
if (!force) {
/* we'll fail this component, as if there are
other major errors, we arn't forcing things
and we'll abort the config anyways */
raidPtr->Disks[hosed_column].status
= rf_ds_failed;
raidPtr->numFailures++;
raidPtr->status = rf_rs_degraded;
}
} else {
too_fatal = 1;
}
if (cfgPtr->parityConfig == '0') {
/* We've identified two different serial numbers.
RAID 0 can't cope with that, so we'll punt */
too_fatal = 1;
}
}
/* record the serial number for later. If we bail later, setting
this doesn't matter, otherwise we've got the best guess at the
correct serial number */
raidPtr->serial_number = serial_number;
mod_number = mod_values[0];
if (num_mod == 2) {
if ((mod_count[0] == 1) || (mod_count[1] == 1)) {
/* Locate the maverick component */
if (mod_count[1] > mod_count[0]) {
mod_number = mod_values[1];
} else if (mod_count[1] < mod_count[0]) {
mod_number = mod_values[0];
} else {
/* counts of different modification values
are the same. Assume greater value is
the correct one, all other things
considered */
if (mod_values[0] > mod_values[1]) {
mod_number = mod_values[0];
} else {
mod_number = mod_values[1];
}
}
for (c = 0; c < raidPtr->numCol; c++) {
ci_label = raidget_component_label(raidPtr, c);
if (mod_number != ci_label->mod_counter) {
if (hosed_column == c) {
/* same one. Can
deal with it. */
} else {
hosed_column = c;
if (num_ser != 1) {
too_fatal = 1;
break;
}
}
}
}
printf("Hosed component: %s\n",
&cfgPtr->devnames[0][hosed_column][0]);
if (!force) {
/* we'll fail this component, as if there are
other major errors, we arn't forcing things
and we'll abort the config anyways */
if (raidPtr->Disks[hosed_column].status != rf_ds_failed) {
raidPtr->Disks[hosed_column].status
= rf_ds_failed;
raidPtr->numFailures++;
raidPtr->status = rf_rs_degraded;
}
}
} else {
too_fatal = 1;
}
if (cfgPtr->parityConfig == '0') {
/* We've identified two different mod counters.
RAID 0 can't cope with that, so we'll punt */
too_fatal = 1;
}
}
raidPtr->mod_counter = mod_number;
if (too_fatal) {
/* we've had both a serial number mismatch, and a mod_counter
mismatch -- and they involved two different components!!
Bail -- make things fail so that the user must force
the issue... */
hosed_column = -1;
fatal_error = 1;
}
if (num_ser > 2) {
printf("raid%d: Too many different serial numbers!\n",
raidPtr->raidid);
fatal_error = 1;
}
if (num_mod > 2) {
printf("raid%d: Too many different mod counters!\n",
raidPtr->raidid);
fatal_error = 1;
}
/* we start by assuming the parity will be good, and flee from
that notion at the slightest sign of trouble */
parity_good = RF_RAID_CLEAN;
for (c = 0; c < raidPtr->numCol; c++) {
dev_name = &cfgPtr->devnames[0][c][0];
ci_label = raidget_component_label(raidPtr, c);
if (c == hosed_column) {
printf("raid%d: Ignoring %s\n",
raidPtr->raidid, dev_name);
} else {
rf_print_label_status( raidPtr, c, dev_name, ci_label);
if (rf_check_label_vitals( raidPtr, 0, c,
dev_name, ci_label,
serial_number,
mod_number )) {
fatal_error = 1;
}
if (ci_label->clean != RF_RAID_CLEAN) {
parity_good = RF_RAID_DIRTY;
}
}
}
if (fatal_error) {
parity_good = RF_RAID_DIRTY;
}
/* we note the state of the parity */
raidPtr->parity_good = parity_good;
return(fatal_error);
}
int
rf_add_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *sparePtr)
{
RF_RaidDisk_t *disks;
RF_DiskQueue_t *spareQueues;
int ret;
unsigned int bs;
int spare_number;
ret=0;
if (raidPtr->numSpare >= RF_MAXSPARE) {
RF_ERRORMSG1("Too many spares: %d\n", raidPtr->numSpare);
return(EINVAL);
}
RF_LOCK_MUTEX(raidPtr->mutex);
while (raidPtr->adding_hot_spare==1) {
ltsleep(&(raidPtr->adding_hot_spare), PRIBIO, "raidhs", 0,
&(raidPtr->mutex));
}
raidPtr->adding_hot_spare=1;
RF_UNLOCK_MUTEX(raidPtr->mutex);
/* the beginning of the spares... */
disks = &raidPtr->Disks[raidPtr->numCol];
spare_number = raidPtr->numSpare;
ret = rf_ConfigureDisk(raidPtr, sparePtr->component_name,
&disks[spare_number],
raidPtr->numCol + spare_number);
if (ret)
goto fail;
if (disks[spare_number].status != rf_ds_optimal) {
RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
sparePtr->component_name);
rf_close_component(raidPtr, raidPtr->raid_cinfo[raidPtr->numCol+spare_number].ci_vp, 0);
ret=EINVAL;
goto fail;
} else {
disks[spare_number].status = rf_ds_spare;
DPRINTF6("Spare Disk %d: dev %s numBlocks %" PRIu64 " blockSize %d (%ld MB)\n",
spare_number,
disks[spare_number].devname,
disks[spare_number].numBlocks,
disks[spare_number].blockSize,
(long int) disks[spare_number].numBlocks *
disks[spare_number].blockSize / 1024 / 1024);
}
/* check sizes and block sizes on the spare disk */
bs = 1 << raidPtr->logBytesPerSector;
if (disks[spare_number].blockSize != bs) {
RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[spare_number].blockSize, disks[spare_number].devname, bs);
rf_close_component(raidPtr, raidPtr->raid_cinfo[raidPtr->numCol+spare_number].ci_vp, 0);
ret = EINVAL;
goto fail;
}
if (disks[spare_number].numBlocks < raidPtr->sectorsPerDisk) {
RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %" PRIu64 " blocks)\n",
disks[spare_number].devname,
disks[spare_number].blockSize,
raidPtr->sectorsPerDisk);
rf_close_component(raidPtr, raidPtr->raid_cinfo[raidPtr->numCol+spare_number].ci_vp, 0);
ret = EINVAL;
goto fail;
} else {
if (disks[spare_number].numBlocks >
raidPtr->sectorsPerDisk) {
RF_ERRORMSG3("Warning: truncating spare disk %s to %" PRIu64 " blocks (from %" PRIu64 ")\n",
disks[spare_number].devname,
raidPtr->sectorsPerDisk,
disks[spare_number].numBlocks);
disks[spare_number].numBlocks = raidPtr->sectorsPerDisk;
}
}
spareQueues = &raidPtr->Queues[raidPtr->numCol];
ret = rf_ConfigureDiskQueue( raidPtr, &spareQueues[spare_number],
raidPtr->numCol + spare_number,
raidPtr->qType,
raidPtr->sectorsPerDisk,
raidPtr->Disks[raidPtr->numCol +
spare_number].dev,
raidPtr->maxOutstanding,
&raidPtr->shutdownList,
raidPtr->cleanupList);
RF_LOCK_MUTEX(raidPtr->mutex);
raidPtr->numSpare++;
RF_UNLOCK_MUTEX(raidPtr->mutex);
fail:
RF_LOCK_MUTEX(raidPtr->mutex);
raidPtr->adding_hot_spare=0;
wakeup(&(raidPtr->adding_hot_spare));
RF_UNLOCK_MUTEX(raidPtr->mutex);
return(ret);
}
int
rf_remove_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *sparePtr)
{
int spare_number;
if (raidPtr->numSpare==0) {
printf("No spares to remove!\n");
return(EINVAL);
}
spare_number = sparePtr->column;
return(EINVAL); /* XXX not implemented yet */
#if 0
if (spare_number < 0 || spare_number > raidPtr->numSpare) {
return(EINVAL);
}
/* verify that this spare isn't in use... */
/* it's gone.. */
raidPtr->numSpare--;
return(0);
#endif
}
int
rf_delete_component(RF_Raid_t *raidPtr, RF_SingleComponent_t *component)
{
RF_RaidDisk_t *disks;
if ((component->column < 0) ||
(component->column >= raidPtr->numCol)) {
return(EINVAL);
}
disks = &raidPtr->Disks[component->column];
/* 1. This component must be marked as 'failed' */
return(EINVAL); /* Not implemented yet. */
}
int
rf_incorporate_hot_spare(RF_Raid_t *raidPtr,
RF_SingleComponent_t *component)
{
/* Issues here include how to 'move' this in if there is IO
taking place (e.g. component queues and such) */
return(EINVAL); /* Not implemented yet. */
}