38a3987b69
Carnegie Mellon University. Full RAID implementation, including levels 0, 1, 4, 5, 6, parity logging, and a few other goodies. Ported to NetBSD by Greg Oster.
887 lines
38 KiB
C
887 lines
38 KiB
C
/* $NetBSD: rf_evenodd_dagfuncs.c,v 1.1 1998/11/13 04:20:29 oster Exp $ */
|
|
/*
|
|
* Copyright (c) 1995 Carnegie-Mellon University.
|
|
* All rights reserved.
|
|
*
|
|
* Author: ChangMing Wu
|
|
*
|
|
* Permission to use, copy, modify and distribute this software and
|
|
* its documentation is hereby granted, provided that both the copyright
|
|
* notice and this permission notice appear in all copies of the
|
|
* software, derivative works or modified versions, and any portions
|
|
* thereof, and that both notices appear in supporting documentation.
|
|
*
|
|
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
|
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
|
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
|
*
|
|
* Carnegie Mellon requests users of this software to return to
|
|
*
|
|
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
|
* School of Computer Science
|
|
* Carnegie Mellon University
|
|
* Pittsburgh PA 15213-3890
|
|
*
|
|
* any improvements or extensions that they make and grant Carnegie the
|
|
* rights to redistribute these changes.
|
|
*/
|
|
|
|
/*
|
|
* Code for RAID-EVENODD architecture.
|
|
*/
|
|
|
|
#include "rf_types.h"
|
|
#include "rf_raid.h"
|
|
#include "rf_dag.h"
|
|
#include "rf_dagffrd.h"
|
|
#include "rf_dagffwr.h"
|
|
#include "rf_dagdegrd.h"
|
|
#include "rf_dagdegwr.h"
|
|
#include "rf_dagutils.h"
|
|
#include "rf_dagfuncs.h"
|
|
#include "rf_threadid.h"
|
|
#include "rf_etimer.h"
|
|
#include "rf_general.h"
|
|
#include "rf_configure.h"
|
|
#include "rf_parityscan.h"
|
|
#include "rf_sys.h"
|
|
#include "rf_evenodd.h"
|
|
#include "rf_evenodd_dagfuncs.h"
|
|
|
|
/* These redundant functions are for small write */
|
|
RF_RedFuncs_t rf_EOSmallWritePFuncs = { rf_RegularXorFunc, "Regular Old-New P", rf_SimpleXorFunc, "Simple Old-New P" };
|
|
RF_RedFuncs_t rf_EOSmallWriteEFuncs = { rf_RegularONEFunc, "Regular Old-New E", rf_SimpleONEFunc, "Regular Old-New E" };
|
|
|
|
/* These redundant functions are for degraded read */
|
|
RF_RedFuncs_t rf_eoPRecoveryFuncs = { rf_RecoveryXorFunc, "Recovery Xr", rf_RecoveryXorFunc, "Recovery Xr"};
|
|
RF_RedFuncs_t rf_eoERecoveryFuncs = { rf_RecoveryEFunc, "Recovery E Func", rf_RecoveryEFunc, "Recovery E Func" };
|
|
|
|
/**********************************************************************************************
|
|
* the following encoding node functions is used in EO_000_CreateLargeWriteDAG
|
|
**********************************************************************************************/
|
|
int rf_RegularPEFunc(node)
|
|
RF_DagNode_t *node;
|
|
{
|
|
rf_RegularESubroutine(node,node->results[1]);
|
|
rf_RegularXorFunc(node); /* does the wakeup here! */
|
|
#if 1
|
|
return(0); /* XXX This was missing... GO */
|
|
#endif
|
|
}
|
|
|
|
|
|
/************************************************************************************************
|
|
* For EO_001_CreateSmallWriteDAG, there are (i)RegularONEFunc() and (ii)SimpleONEFunc() to
|
|
* be used. The previous case is when write access at least sectors of full stripe unit.
|
|
* The later function is used when the write access two stripe units but with total sectors
|
|
* less than sectors per SU. In this case, the access of parity and 'E' are shown as disconnected
|
|
* areas in their stripe unit and parity write and 'E' write are both devided into two distinct
|
|
* writes( totally four). This simple old-new write and regular old-new write happen as in RAID-5
|
|
************************************************************************************************/
|
|
|
|
/* Algorithm:
|
|
1. Store the difference of old data and new data in the Rod buffer.
|
|
2. then encode this buffer into the buffer which already have old 'E' information inside it,
|
|
the result can be shown to be the new 'E' information.
|
|
3. xor the Wnd buffer into the difference buffer to recover the original old data.
|
|
Here we have another alternative: to allocate a temporary buffer for storing the difference of
|
|
old data and new data, then encode temp buf into old 'E' buf to form new 'E', but this approach
|
|
take the same speed as the previous, and need more memory.
|
|
*/
|
|
int rf_RegularONEFunc(node)
|
|
RF_DagNode_t *node;
|
|
{
|
|
RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams-1].p;
|
|
RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) &raidPtr->Layout;
|
|
int EpdaIndex = (node->numParams-1)/2 - 1; /* the parameter of node where you can find e-pda */
|
|
int i, k, retcode = 0;
|
|
int suoffset, length;
|
|
RF_RowCol_t scol;
|
|
char *srcbuf, *destbuf;
|
|
RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
|
|
RF_Etimer_t timer;
|
|
RF_PhysDiskAddr_t *pda, *EPDA = (RF_PhysDiskAddr_t *) node->params[EpdaIndex].p;
|
|
int ESUOffset = rf_StripeUnitOffset(layoutPtr, EPDA->startSector); /* generally zero */
|
|
|
|
RF_ASSERT( EPDA->type == RF_PDA_TYPE_Q );
|
|
RF_ASSERT(ESUOffset == 0);
|
|
|
|
RF_ETIMER_START(timer);
|
|
|
|
/* Xor the Wnd buffer into Rod buffer, the difference of old data and new data is stored in Rod buffer */
|
|
for( k=0; k< EpdaIndex; k += 2) {
|
|
length = rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *)node->params[k].p)->numSector );
|
|
retcode = rf_bxor( node->params[k+EpdaIndex+3].p, node->params[k+1].p, length, node->dagHdr->bp);
|
|
}
|
|
/* Start to encoding the buffer storing the difference of old data and new data into 'E' buffer */
|
|
for (i=0; i<EpdaIndex; i+=2) if (node->params[i+1].p != node->results[0]) { /* results[0] is buf ptr of E */
|
|
pda = (RF_PhysDiskAddr_t *) node->params[i].p;
|
|
srcbuf = (char *) node->params[i+1].p;
|
|
scol = rf_EUCol(layoutPtr, pda->raidAddress );
|
|
suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
|
|
destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr,suoffset);
|
|
rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector);
|
|
}
|
|
/* Recover the original old data to be used by parity encoding function in XorNode */
|
|
for( k=0; k< EpdaIndex; k += 2) {
|
|
length = rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *)node->params[k].p)->numSector );
|
|
retcode = rf_bxor( node->params[k+EpdaIndex+3].p, node->params[k+1].p, length, node->dagHdr->bp);
|
|
}
|
|
RF_ETIMER_STOP(timer);
|
|
RF_ETIMER_EVAL(timer);
|
|
tracerec->q_us += RF_ETIMER_VAL_US(timer);
|
|
rf_GenericWakeupFunc(node, 0);
|
|
#if 1
|
|
return(0); /* XXX this was missing.. GO */
|
|
#endif
|
|
}
|
|
|
|
int rf_SimpleONEFunc(node)
|
|
RF_DagNode_t *node;
|
|
{
|
|
RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams-1].p;
|
|
RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) &raidPtr->Layout;
|
|
RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p;
|
|
int retcode = 0;
|
|
char *srcbuf, *destbuf;
|
|
RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
|
|
int length;
|
|
RF_RowCol_t scol;
|
|
RF_Etimer_t timer;
|
|
|
|
RF_ASSERT( ((RF_PhysDiskAddr_t *)node->params[2].p)->type == RF_PDA_TYPE_Q );
|
|
if (node->dagHdr->status == rf_enable) {
|
|
RF_ETIMER_START(timer);
|
|
length = rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *)node->params[4].p)->numSector );/* this is a pda of writeDataNodes */
|
|
/* bxor to buffer of readDataNodes */
|
|
retcode = rf_bxor( node->params[5].p, node->params[1].p, length, node->dagHdr->bp);
|
|
/* find out the corresponding colume in encoding matrix for write colume to be encoded into redundant disk 'E' */
|
|
scol = rf_EUCol(layoutPtr, pda->raidAddress );
|
|
srcbuf = node->params[1].p;
|
|
destbuf = node->params[3].p;
|
|
/* Start encoding process */
|
|
rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector);
|
|
rf_bxor( node->params[5].p, node->params[1].p, length, node->dagHdr->bp);
|
|
RF_ETIMER_STOP(timer); RF_ETIMER_EVAL(timer); tracerec->q_us += RF_ETIMER_VAL_US(timer);
|
|
|
|
}
|
|
return(rf_GenericWakeupFunc(node, retcode)); /* call wake func explicitly since no I/O in this node */
|
|
}
|
|
|
|
|
|
/****** called by rf_RegularPEFunc(node) and rf_RegularEFunc(node) in f.f. large write ********/
|
|
void rf_RegularESubroutine(node, ebuf)
|
|
RF_DagNode_t *node;
|
|
char *ebuf;
|
|
{
|
|
RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams-1].p;
|
|
RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) &raidPtr->Layout;
|
|
RF_PhysDiskAddr_t *pda;
|
|
int i, suoffset;
|
|
RF_RowCol_t scol;
|
|
char *srcbuf, *destbuf;
|
|
RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
|
|
RF_Etimer_t timer;
|
|
|
|
RF_ETIMER_START(timer);
|
|
for (i=0; i<node->numParams-2; i+=2) {
|
|
RF_ASSERT( node->params[i+1].p != ebuf );
|
|
pda = (RF_PhysDiskAddr_t *) node->params[i].p;
|
|
suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
|
|
scol = rf_EUCol(layoutPtr, pda->raidAddress );
|
|
srcbuf = (char *) node->params[i+1].p;
|
|
destbuf = ebuf + rf_RaidAddressToByte(raidPtr, suoffset );
|
|
rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector);
|
|
}
|
|
RF_ETIMER_STOP(timer);
|
|
RF_ETIMER_EVAL(timer);
|
|
tracerec->xor_us += RF_ETIMER_VAL_US(timer);
|
|
}
|
|
|
|
|
|
/*******************************************************************************************
|
|
* Used in EO_001_CreateLargeWriteDAG
|
|
******************************************************************************************/
|
|
int rf_RegularEFunc(node)
|
|
RF_DagNode_t *node;
|
|
{
|
|
rf_RegularESubroutine(node, node->results[0]);
|
|
rf_GenericWakeupFunc(node, 0);
|
|
#if 1
|
|
return(0); /* XXX this was missing?.. GO */
|
|
#endif
|
|
}
|
|
|
|
/*******************************************************************************************
|
|
* This degraded function allow only two case:
|
|
* 1. when write access the full failed stripe unit, then the access can be more than
|
|
* one tripe units.
|
|
* 2. when write access only part of the failed SU, we assume accesses of more than
|
|
* one stripe unit is not allowed so that the write can be dealt with like a
|
|
* large write.
|
|
* The following function is based on these assumptions. So except in the second case,
|
|
* it looks the same as a large write encodeing function. But this is not exactly the
|
|
* normal way for doing a degraded write, since raidframe have to break cases of access
|
|
* other than the above two into smaller accesses. We may have to change
|
|
* DegrESubroutin in the future.
|
|
*******************************************************************************************/
|
|
void rf_DegrESubroutine(node, ebuf)
|
|
RF_DagNode_t *node;
|
|
char *ebuf;
|
|
{
|
|
RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams-1].p;
|
|
RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) &raidPtr->Layout;
|
|
RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams-2].p;
|
|
RF_PhysDiskAddr_t *pda;
|
|
int i, suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector);
|
|
RF_RowCol_t scol;
|
|
char *srcbuf, *destbuf;
|
|
RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
|
|
RF_Etimer_t timer;
|
|
|
|
RF_ETIMER_START(timer);
|
|
for (i=0; i<node->numParams-2; i+=2) {
|
|
RF_ASSERT( node->params[i+1].p != ebuf );
|
|
pda = (RF_PhysDiskAddr_t *) node->params[i].p;
|
|
suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
|
|
scol = rf_EUCol(layoutPtr, pda->raidAddress );
|
|
srcbuf = (char *) node->params[i+1].p;
|
|
destbuf = ebuf + rf_RaidAddressToByte(raidPtr, suoffset-failedSUOffset);
|
|
rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector);
|
|
}
|
|
|
|
RF_ETIMER_STOP(timer); RF_ETIMER_EVAL(timer); tracerec->q_us += RF_ETIMER_VAL_US(timer);
|
|
}
|
|
|
|
|
|
/**************************************************************************************
|
|
* This function is used in case where one data disk failed and both redundant disks
|
|
* alive. It is used in the EO_100_CreateWriteDAG. Note: if there is another disk
|
|
* failed in the stripe but not accessed at this time, then we should, instead, use
|
|
* the rf_EOWriteDoubleRecoveryFunc().
|
|
**************************************************************************************/
|
|
int rf_Degraded_100_EOFunc(node)
|
|
RF_DagNode_t *node;
|
|
{
|
|
rf_DegrESubroutine(node, node->results[1]);
|
|
rf_RecoveryXorFunc(node); /* does the wakeup here! */
|
|
#if 1
|
|
return(0); /* XXX this was missing... SHould these be void functions??? GO */
|
|
#endif
|
|
}
|
|
|
|
/**************************************************************************************
|
|
* This function is to encode one sector in one of the data disks to the E disk.
|
|
* However, in evenodd this function can also be used as decoding function to recover
|
|
* data from dead disk in the case of parity failure and a single data failure.
|
|
**************************************************************************************/
|
|
void rf_e_EncOneSect(
|
|
RF_RowCol_t srcLogicCol,
|
|
char *srcSecbuf,
|
|
RF_RowCol_t destLogicCol,
|
|
char *destSecbuf,
|
|
int bytesPerSector)
|
|
{
|
|
int S_index; /* index of the EU in the src col which need be Xored into all EUs in a dest sector */
|
|
int numRowInEncMatix = (RF_EO_MATRIX_DIM) -1;
|
|
RF_RowCol_t j, indexInDest, /* row index of an encoding unit in the destination colume of encoding matrix */
|
|
indexInSrc; /* row index of an encoding unit in the source colume used for recovery */
|
|
int bytesPerEU = bytesPerSector/numRowInEncMatix;
|
|
|
|
#if RF_EO_MATRIX_DIM > 17
|
|
int shortsPerEU = bytesPerEU/sizeof(short);
|
|
short *destShortBuf, *srcShortBuf1, *srcShortBuf2;
|
|
register short temp1;
|
|
#elif RF_EO_MATRIX_DIM == 17
|
|
int longsPerEU = bytesPerEU/sizeof(long);
|
|
long *destLongBuf, *srcLongBuf1, *srcLongBuf2;
|
|
register long temp1;
|
|
#endif
|
|
|
|
#if RF_EO_MATRIX_DIM > 17
|
|
RF_ASSERT( sizeof(short) == 2 || sizeof(short) == 1 );
|
|
RF_ASSERT( bytesPerEU % sizeof(short) == 0 );
|
|
#elif RF_EO_MATRIX_DIM == 17
|
|
RF_ASSERT( sizeof(long) == 8 || sizeof(long) == 4 );
|
|
RF_ASSERT( bytesPerEU % sizeof(long) == 0);
|
|
#endif
|
|
|
|
S_index = rf_EO_Mod( ( RF_EO_MATRIX_DIM -1 + destLogicCol - srcLogicCol), RF_EO_MATRIX_DIM);
|
|
#if RF_EO_MATRIX_DIM > 17
|
|
srcShortBuf1 = (short *)(srcSecbuf + S_index * bytesPerEU);
|
|
#elif RF_EO_MATRIX_DIM == 17
|
|
srcLongBuf1 = (long *)(srcSecbuf + S_index * bytesPerEU);
|
|
#endif
|
|
|
|
for( indexInDest = 0; indexInDest < numRowInEncMatix ; indexInDest++){
|
|
indexInSrc = rf_EO_Mod( (indexInDest + destLogicCol - srcLogicCol), RF_EO_MATRIX_DIM );
|
|
|
|
#if RF_EO_MATRIX_DIM > 17
|
|
destShortBuf = (short *)(destSecbuf + indexInDest * bytesPerEU);
|
|
srcShortBuf2 = (short *)(srcSecbuf + indexInSrc * bytesPerEU);
|
|
for(j=0; j < shortsPerEU; j++) {
|
|
temp1 = destShortBuf[j]^srcShortBuf1[j];
|
|
/* note: S_index won't be at the end row for any src col! */
|
|
if(indexInSrc != RF_EO_MATRIX_DIM -1) destShortBuf[j] = (srcShortBuf2[j])^temp1;
|
|
/* if indexInSrc is at the end row, ie. RF_EO_MATRIX_DIM -1, then all elements are zero! */
|
|
else destShortBuf[j] = temp1;
|
|
}
|
|
|
|
#elif RF_EO_MATRIX_DIM == 17
|
|
destLongBuf = (long *)(destSecbuf + indexInDest * bytesPerEU);
|
|
srcLongBuf2 = (long *)(srcSecbuf + indexInSrc * bytesPerEU);
|
|
for(j=0; j < longsPerEU; j++) {
|
|
temp1 = destLongBuf[j]^srcLongBuf1[j];
|
|
if(indexInSrc != RF_EO_MATRIX_DIM -1) destLongBuf[j] = (srcLongBuf2[j])^temp1;
|
|
else destLongBuf[j] = temp1;
|
|
}
|
|
#endif
|
|
}
|
|
}
|
|
|
|
void rf_e_encToBuf(
|
|
RF_Raid_t *raidPtr,
|
|
RF_RowCol_t srcLogicCol,
|
|
char *srcbuf,
|
|
RF_RowCol_t destLogicCol,
|
|
char *destbuf,
|
|
int numSector)
|
|
{
|
|
int i, bytesPerSector = rf_RaidAddressToByte(raidPtr, 1);
|
|
|
|
for (i=0; i < numSector; i++)
|
|
{
|
|
rf_e_EncOneSect( srcLogicCol, srcbuf, destLogicCol, destbuf, bytesPerSector);
|
|
srcbuf += bytesPerSector;
|
|
destbuf += bytesPerSector;
|
|
}
|
|
}
|
|
|
|
/**************************************************************************************
|
|
* when parity die and one data die, We use second redundant information, 'E',
|
|
* to recover the data in dead disk. This function is used in the recovery node of
|
|
* for EO_110_CreateReadDAG
|
|
**************************************************************************************/
|
|
int rf_RecoveryEFunc(node)
|
|
RF_DagNode_t *node;
|
|
{
|
|
RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams-1].p;
|
|
RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) &raidPtr->Layout;
|
|
RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams-2].p;
|
|
RF_RowCol_t scol, /*source logical column*/
|
|
fcol = rf_EUCol(layoutPtr, failedPDA->raidAddress ); /* logical column of failed SU */
|
|
int i;
|
|
RF_PhysDiskAddr_t *pda;
|
|
int suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr,failedPDA->startSector);
|
|
char *srcbuf, *destbuf;
|
|
RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
|
|
RF_Etimer_t timer;
|
|
|
|
bzero( (char *)node->results[0], rf_RaidAddressToByte(raidPtr,failedPDA->numSector));
|
|
if (node->dagHdr->status == rf_enable) {
|
|
RF_ETIMER_START(timer);
|
|
for (i=0; i<node->numParams-2; i+=2) if (node->params[i+1].p != node->results[0]) {
|
|
pda = (RF_PhysDiskAddr_t *) node->params[i].p;
|
|
if( i == node->numParams - 4 ) scol = RF_EO_MATRIX_DIM - 2; /* the colume of redundant E */
|
|
else scol = rf_EUCol(layoutPtr, pda->raidAddress );
|
|
srcbuf = (char *) node->params[i+1].p;
|
|
suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
|
|
destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr,suoffset-failedSUOffset);
|
|
rf_e_encToBuf(raidPtr, scol, srcbuf, fcol, destbuf, pda->numSector);
|
|
}
|
|
RF_ETIMER_STOP(timer);
|
|
RF_ETIMER_EVAL(timer);
|
|
tracerec->xor_us += RF_ETIMER_VAL_US(timer);
|
|
}
|
|
return (rf_GenericWakeupFunc(node, 0)); /* node execute successfully */
|
|
}
|
|
|
|
/**************************************************************************************
|
|
* This function is used in the case where one data and the parity have filed.
|
|
* (in EO_110_CreateWriteDAG )
|
|
**************************************************************************************/
|
|
int rf_EO_DegradedWriteEFunc(RF_DagNode_t *node)
|
|
{
|
|
rf_DegrESubroutine(node, node->results[0]);
|
|
rf_GenericWakeupFunc(node, 0);
|
|
#if 1
|
|
return(0); /* XXX Yet another one!! GO */
|
|
#endif
|
|
}
|
|
|
|
|
|
|
|
/**************************************************************************************
|
|
* THE FUNCTION IS FOR DOUBLE DEGRADED READ AND WRITE CASES
|
|
**************************************************************************************/
|
|
|
|
void rf_doubleEOdecode(
|
|
RF_Raid_t *raidPtr,
|
|
char **rrdbuf,
|
|
char **dest,
|
|
RF_RowCol_t *fcol,
|
|
char *pbuf,
|
|
char *ebuf)
|
|
{
|
|
RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) &(raidPtr->Layout);
|
|
int i, j, k, f1, f2, row;
|
|
int rrdrow, erow, count = 0;
|
|
int bytesPerSector = rf_RaidAddressToByte(raidPtr, 1 );
|
|
int numRowInEncMatix = (RF_EO_MATRIX_DIM) -1;
|
|
#if 0
|
|
int pcol = (RF_EO_MATRIX_DIM) - 1;
|
|
#endif
|
|
int ecol = (RF_EO_MATRIX_DIM) - 2;
|
|
int bytesPerEU = bytesPerSector/numRowInEncMatix;
|
|
int numDataCol = layoutPtr->numDataCol;
|
|
#if RF_EO_MATRIX_DIM > 17
|
|
int shortsPerEU = bytesPerEU/sizeof(short);
|
|
short *rrdbuf_current, *pbuf_current, *ebuf_current;
|
|
short *dest_smaller, *dest_smaller_current, *dest_larger, *dest_larger_current;
|
|
register short *temp;
|
|
short *P;
|
|
|
|
RF_ASSERT( bytesPerEU % sizeof(short) == 0);
|
|
RF_Malloc(P, bytesPerEU, (short *));
|
|
RF_Malloc(temp, bytesPerEU, (short *));
|
|
#elif RF_EO_MATRIX_DIM == 17
|
|
int longsPerEU = bytesPerEU/sizeof(long);
|
|
long *rrdbuf_current, *pbuf_current, *ebuf_current;
|
|
long *dest_smaller, *dest_smaller_current, *dest_larger, *dest_larger_current;
|
|
register long *temp;
|
|
long *P;
|
|
|
|
RF_ASSERT( bytesPerEU % sizeof(long) == 0);
|
|
RF_Malloc(P, bytesPerEU, (long *));
|
|
RF_Malloc(temp, bytesPerEU, (long *));
|
|
#endif
|
|
RF_ASSERT( *((long *)dest[0]) == 0);
|
|
RF_ASSERT( *((long *)dest[1]) == 0);
|
|
bzero((char *)P, bytesPerEU);
|
|
bzero((char *)temp, bytesPerEU);
|
|
RF_ASSERT( *P == 0 );
|
|
/* calculate the 'P' parameter, which, not parity, is the Xor of all elements in
|
|
the last two column, ie. 'E' and 'parity' colume, see the Ref. paper by Blaum, et al 1993 */
|
|
for( i=0; i< numRowInEncMatix; i++)
|
|
for( k=0; k< longsPerEU; k++) {
|
|
#if RF_EO_MATRIX_DIM > 17
|
|
ebuf_current = ((short *)ebuf) + i*shortsPerEU + k;
|
|
pbuf_current = ((short *)pbuf) + i*shortsPerEU + k;
|
|
#elif RF_EO_MATRIX_DIM == 17
|
|
ebuf_current = ((long *)ebuf) + i*longsPerEU + k;
|
|
pbuf_current = ((long *)pbuf) + i*longsPerEU + k;
|
|
#endif
|
|
P[k] ^= *ebuf_current;
|
|
P[k] ^= *pbuf_current;
|
|
}
|
|
RF_ASSERT( fcol[0] != fcol[1] );
|
|
if( fcol[0] < fcol[1] ) {
|
|
#if RF_EO_MATRIX_DIM > 17
|
|
dest_smaller = (short *)(dest[0]);
|
|
dest_larger = (short *)(dest[1]);
|
|
#elif RF_EO_MATRIX_DIM == 17
|
|
dest_smaller = (long *)(dest[0]);
|
|
dest_larger = (long *)(dest[1]);
|
|
#endif
|
|
f1 = fcol[0];
|
|
f2 = fcol[1];
|
|
}
|
|
else {
|
|
#if RF_EO_MATRIX_DIM > 17
|
|
dest_smaller = (short *)(dest[1]);
|
|
dest_larger = (short *)(dest[0]);
|
|
#elif RF_EO_MATRIX_DIM == 17
|
|
dest_smaller = (long *)(dest[1]);
|
|
dest_larger = (long *)(dest[0]);
|
|
#endif
|
|
f1 = fcol[1];
|
|
f2 = fcol[0];
|
|
}
|
|
row = (RF_EO_MATRIX_DIM) -1;
|
|
while( (row = rf_EO_Mod( (row+f1-f2), RF_EO_MATRIX_DIM )) != ( (RF_EO_MATRIX_DIM) -1) )
|
|
{
|
|
#if RF_EO_MATRIX_DIM > 17
|
|
dest_larger_current = dest_larger + row*shortsPerEU;
|
|
dest_smaller_current = dest_smaller + row*shortsPerEU;
|
|
#elif RF_EO_MATRIX_DIM == 17
|
|
dest_larger_current = dest_larger + row*longsPerEU;
|
|
dest_smaller_current = dest_smaller + row*longsPerEU;
|
|
#endif
|
|
/** Do the diagonal recovery. Initially, temp[k] = (failed 1),
|
|
which is the failed data in the colume which has smaller col index. **/
|
|
/* step 1: ^(SUM of nonfailed in-diagonal A(rrdrow,0..m-3)) */
|
|
for( j=0; j< numDataCol; j++)
|
|
{
|
|
if( j == f1 || j == f2 ) continue;
|
|
rrdrow = rf_EO_Mod( (row+f2-j), RF_EO_MATRIX_DIM );
|
|
if ( rrdrow != (RF_EO_MATRIX_DIM) -1 ) {
|
|
#if RF_EO_MATRIX_DIM > 17
|
|
rrdbuf_current = (short *)(rrdbuf[j]) + rrdrow * shortsPerEU;
|
|
for (k=0; k< shortsPerEU; k++) temp[k] ^= *(rrdbuf_current + k);
|
|
#elif RF_EO_MATRIX_DIM == 17
|
|
rrdbuf_current = (long *)(rrdbuf[j]) + rrdrow * longsPerEU;
|
|
for (k=0; k< longsPerEU; k++) temp[k] ^= *(rrdbuf_current + k);
|
|
#endif
|
|
}
|
|
}
|
|
/* step 2: ^E(erow,m-2), If erow is at the buttom row, don't Xor into it
|
|
E(erow,m-2) = (principle diagonal) ^ (failed 1) ^ (failed 2)
|
|
^ ( SUM of nonfailed in-diagonal A(rrdrow,0..m-3) )
|
|
After this step, temp[k] = (principle diagonal) ^ (failed 2) */
|
|
|
|
erow = rf_EO_Mod( (row+f2-ecol), (RF_EO_MATRIX_DIM) );
|
|
if ( erow != (RF_EO_MATRIX_DIM) -1) {
|
|
#if RF_EO_MATRIX_DIM > 17
|
|
ebuf_current = (short *)ebuf + shortsPerEU * erow;
|
|
for (k=0; k< shortsPerEU; k++) temp[k] ^= *(ebuf_current+k);
|
|
#elif RF_EO_MATRIX_DIM == 17
|
|
ebuf_current = (long *)ebuf + longsPerEU * erow;
|
|
for (k=0; k< longsPerEU; k++) temp[k] ^= *(ebuf_current+k);
|
|
#endif
|
|
}
|
|
/* step 3: ^P to obtain the failed data (failed 2).
|
|
P can be proved to be actually (principle diagonal)
|
|
After this step, temp[k] = (failed 2), the failed data to be recovered */
|
|
#if RF_EO_MATRIX_DIM > 17
|
|
for (k=0; k< shortsPerEU; k++) temp[k] ^= P[k];
|
|
/* Put the data to the destination buffer */
|
|
for (k=0; k< shortsPerEU; k++) dest_larger_current[k] = temp[k];
|
|
#elif RF_EO_MATRIX_DIM == 17
|
|
for (k=0; k< longsPerEU; k++) temp[k] ^= P[k];
|
|
/* Put the data to the destination buffer */
|
|
for (k=0; k< longsPerEU; k++) dest_larger_current[k] = temp[k];
|
|
#endif
|
|
|
|
/** THE FOLLOWING DO THE HORIZONTAL XOR **/
|
|
/* step 1: ^(SUM of A(row,0..m-3)), ie. all nonfailed data columes */
|
|
for (j=0; j< numDataCol; j++)
|
|
{
|
|
if( j == f1 || j == f2 ) continue;
|
|
#if RF_EO_MATRIX_DIM > 17
|
|
rrdbuf_current = (short *)(rrdbuf[j]) + row * shortsPerEU;
|
|
for (k=0; k< shortsPerEU; k++) temp[k] ^= *(rrdbuf_current+k);
|
|
#elif RF_EO_MATRIX_DIM == 17
|
|
rrdbuf_current = (long *)(rrdbuf[j]) + row * longsPerEU;
|
|
for (k=0; k< longsPerEU; k++) temp[k] ^= *(rrdbuf_current+k);
|
|
#endif
|
|
}
|
|
/* step 2: ^A(row,m-1) */
|
|
/* step 3: Put the data to the destination buffer */
|
|
#if RF_EO_MATRIX_DIM > 17
|
|
pbuf_current = (short *)pbuf + shortsPerEU * row;
|
|
for (k=0; k< shortsPerEU; k++) temp[k] ^= *(pbuf_current+k);
|
|
for (k=0; k< shortsPerEU; k++) dest_smaller_current[k] = temp[k];
|
|
#elif RF_EO_MATRIX_DIM == 17
|
|
pbuf_current = (long *)pbuf + longsPerEU * row;
|
|
for (k=0; k< longsPerEU; k++) temp[k] ^= *(pbuf_current+k);
|
|
for (k=0; k< longsPerEU; k++) dest_smaller_current[k] = temp[k];
|
|
#endif
|
|
count++;
|
|
}
|
|
/* Check if all Encoding Unit in the data buffer have been decoded,
|
|
according EvenOdd theory, if "RF_EO_MATRIX_DIM" is a prime number,
|
|
this algorithm will covered all buffer */
|
|
RF_ASSERT( count == numRowInEncMatix );
|
|
RF_Free((char *)P, bytesPerEU);
|
|
RF_Free((char *)temp, bytesPerEU);
|
|
}
|
|
|
|
|
|
/***************************************************************************************
|
|
* This function is called by double degragded read
|
|
* EO_200_CreateReadDAG
|
|
*
|
|
***************************************************************************************/
|
|
int rf_EvenOddDoubleRecoveryFunc(node)
|
|
RF_DagNode_t *node;
|
|
{
|
|
int ndataParam = 0;
|
|
int np = node->numParams;
|
|
RF_AccessStripeMap_t *asmap = (RF_AccessStripeMap_t *) node->params[np-1].p;
|
|
RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np-2].p;
|
|
RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) &(raidPtr->Layout);
|
|
int i, prm, sector, nresults = node->numResults;
|
|
RF_SectorCount_t secPerSU = layoutPtr->sectorsPerStripeUnit;
|
|
unsigned sosAddr;
|
|
int two = 0, mallc_one= 0, mallc_two = 0; /* flags to indicate if memory is allocated */
|
|
int bytesPerSector = rf_RaidAddressToByte(raidPtr, 1 );
|
|
RF_PhysDiskAddr_t *ppda,*ppda2,*epda,*epda2,*pda, *pda0, *pda1, npda;
|
|
RF_RowCol_t fcol[2], fsuoff[2], fsuend[2], numDataCol = layoutPtr->numDataCol;
|
|
char **buf, *ebuf, *pbuf, *dest[2];
|
|
long *suoff=NULL, *suend=NULL, *prmToCol=NULL, psuoff, esuoff;
|
|
RF_SectorNum_t startSector, endSector;
|
|
RF_Etimer_t timer;
|
|
RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
|
|
|
|
RF_ETIMER_START(timer);
|
|
|
|
/* Find out the number of parameters which are pdas for data information */
|
|
for (i = 0; i<= np; i++)
|
|
if( ((RF_PhysDiskAddr_t *)node->params[i].p)->type != RF_PDA_TYPE_DATA) {ndataParam = i ; break; }
|
|
|
|
RF_Malloc(buf, numDataCol*sizeof(char *), (char **));
|
|
if (ndataParam != 0 ){
|
|
RF_Malloc(suoff, ndataParam*sizeof(long), (long *) );
|
|
RF_Malloc(suend, ndataParam*sizeof(long), (long *) );
|
|
RF_Malloc(prmToCol, ndataParam*sizeof(long), (long *) );
|
|
}
|
|
|
|
if (asmap->failedPDAs[1] &&
|
|
(asmap->failedPDAs[1]->numSector + asmap->failedPDAs[0]->numSector < secPerSU)) {
|
|
RF_ASSERT(0); /* currently, no support for this situation */
|
|
ppda = node->params[np-6].p;
|
|
ppda2 = node->params[np-5].p;
|
|
RF_ASSERT( ppda2->type == RF_PDA_TYPE_PARITY );
|
|
epda = node->params[np-4].p;
|
|
epda2 = node->params[np-3].p;
|
|
RF_ASSERT( epda2->type == RF_PDA_TYPE_Q );
|
|
two = 1;
|
|
}
|
|
else {
|
|
ppda = node->params[np-4].p;
|
|
epda = node->params[np-3].p;
|
|
psuoff = rf_StripeUnitOffset(layoutPtr, ppda->startSector);
|
|
esuoff = rf_StripeUnitOffset(layoutPtr, epda->startSector);
|
|
RF_ASSERT( psuoff == esuoff );
|
|
}
|
|
/*
|
|
the followings have three goals:
|
|
1. determine the startSector to begin decoding and endSector to end decoding.
|
|
2. determine the colume numbers of the two failed disks.
|
|
3. determine the offset and end offset of the access within each failed stripe unit.
|
|
*/
|
|
if( nresults == 1 ) {
|
|
/* find the startSector to begin decoding */
|
|
pda = node->results[0];
|
|
bzero(pda->bufPtr, bytesPerSector*pda->numSector );
|
|
fsuoff[0] = rf_StripeUnitOffset(layoutPtr, pda->startSector );
|
|
fsuend[0] = fsuoff[0] + pda->numSector;
|
|
startSector = fsuoff[0];
|
|
endSector = fsuend[0];
|
|
|
|
/* find out the the column of failed disk being accessed */
|
|
fcol[0] = rf_EUCol(layoutPtr, pda->raidAddress );
|
|
|
|
/* find out the other failed colume not accessed */
|
|
sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress);
|
|
for (i=0; i < numDataCol; i++) {
|
|
npda.raidAddress = sosAddr + (i * secPerSU);
|
|
(raidPtr->Layout.map->MapSector)(raidPtr, npda.raidAddress, &(npda.row), &(npda.col), &(npda.startSector), 0);
|
|
/* skip over dead disks */
|
|
if (RF_DEAD_DISK(raidPtr->Disks[npda.row][npda.col].status))
|
|
if (i != fcol[0]) break;
|
|
}
|
|
RF_ASSERT (i < numDataCol);
|
|
fcol[1] = i;
|
|
}
|
|
else {
|
|
RF_ASSERT ( nresults == 2 );
|
|
pda0 = node->results[0]; bzero(pda0->bufPtr, bytesPerSector*pda0->numSector );
|
|
pda1 = node->results[1]; bzero(pda1->bufPtr, bytesPerSector*pda1->numSector );
|
|
/* determine the failed colume numbers of the two failed disks. */
|
|
fcol[0] = rf_EUCol(layoutPtr, pda0->raidAddress );
|
|
fcol[1] = rf_EUCol(layoutPtr, pda1->raidAddress );
|
|
/* determine the offset and end offset of the access within each failed stripe unit. */
|
|
fsuoff[0] = rf_StripeUnitOffset(layoutPtr, pda0->startSector );
|
|
fsuend[0] = fsuoff[0] + pda0->numSector;
|
|
fsuoff[1] = rf_StripeUnitOffset(layoutPtr, pda1->startSector );
|
|
fsuend[1] = fsuoff[1] + pda1->numSector;
|
|
/* determine the startSector to begin decoding */
|
|
startSector = RF_MIN( pda0->startSector, pda1->startSector );
|
|
/* determine the endSector to end decoding */
|
|
endSector = RF_MAX( fsuend[0], fsuend[1] );
|
|
}
|
|
/*
|
|
assign the beginning sector and the end sector for each parameter
|
|
find out the corresponding colume # for each parameter
|
|
*/
|
|
for( prm=0; prm < ndataParam; prm++ ) {
|
|
pda = node->params[prm].p;
|
|
suoff[prm] = rf_StripeUnitOffset(layoutPtr, pda->startSector);
|
|
suend[prm] = suoff[prm] + pda->numSector;
|
|
prmToCol[prm] = rf_EUCol(layoutPtr, pda->raidAddress );
|
|
}
|
|
/* 'sector' is the sector for the current decoding algorithm. For each sector in the failed SU,
|
|
find out the corresponding parameters that cover the current sector and that are needed for
|
|
decoding of this sector in failed SU. 2. Find out if sector is in the shadow of any accessed
|
|
failed SU. If not, malloc a temporary space of a sector in size.
|
|
*/
|
|
for( sector = startSector; sector < endSector; sector++ ){
|
|
if ( nresults == 2 )
|
|
if( !(fsuoff[0]<=sector && sector<fsuend[0]) && !(fsuoff[1]<=sector && sector<fsuend[1]) )continue;
|
|
for( prm=0; prm < ndataParam; prm++ )
|
|
if( suoff[prm] <= sector && sector < suend[prm] )
|
|
buf[(prmToCol[prm])] = ((RF_PhysDiskAddr_t *)node->params[prm].p)->bufPtr +
|
|
rf_RaidAddressToByte(raidPtr, sector-suoff[prm]);
|
|
/* find out if sector is in the shadow of any accessed failed SU. If yes, assign dest[0], dest[1] to point
|
|
at suitable position of the buffer corresponding to failed SUs. if no, malloc a temporary space of
|
|
a sector in size for destination of decoding.
|
|
*/
|
|
RF_ASSERT( nresults == 1 || nresults == 2 );
|
|
if ( nresults == 1) {
|
|
dest[0] = ((RF_PhysDiskAddr_t *)node->results[0])->bufPtr + rf_RaidAddressToByte(raidPtr, sector-fsuoff[0]);
|
|
/* Always malloc temp buffer to dest[1] */
|
|
RF_Malloc( dest[1], bytesPerSector, (char *) );
|
|
bzero(dest[1],bytesPerSector); mallc_two = 1; }
|
|
else {
|
|
if( fsuoff[0] <= sector && sector < fsuend[0] )
|
|
dest[0] = ((RF_PhysDiskAddr_t *)node->results[0])->bufPtr + rf_RaidAddressToByte(raidPtr, sector-fsuoff[0]);
|
|
else { RF_Malloc( dest[0], bytesPerSector, (char *) );
|
|
bzero(dest[0],bytesPerSector); mallc_one = 1; }
|
|
if( fsuoff[1] <= sector && sector < fsuend[1] )
|
|
dest[1] = ((RF_PhysDiskAddr_t *)node->results[1])->bufPtr + rf_RaidAddressToByte(raidPtr, sector-fsuoff[1]);
|
|
else { RF_Malloc( dest[1], bytesPerSector, (char *) );
|
|
bzero(dest[1],bytesPerSector); mallc_two = 1; }
|
|
RF_ASSERT( mallc_one == 0 || mallc_two == 0 );
|
|
}
|
|
pbuf = ppda->bufPtr + rf_RaidAddressToByte(raidPtr, sector-psuoff );
|
|
ebuf = epda->bufPtr + rf_RaidAddressToByte(raidPtr, sector-esuoff );
|
|
/*
|
|
* After finish finding all needed sectors, call doubleEOdecode function for decoding
|
|
* one sector to destination.
|
|
*/
|
|
rf_doubleEOdecode(raidPtr, buf, dest, fcol, pbuf, ebuf );
|
|
/* free all allocated memory, and mark flag to indicate no memory is being allocated */
|
|
if( mallc_one == 1) RF_Free( dest[0], bytesPerSector );
|
|
if( mallc_two == 1) RF_Free( dest[1], bytesPerSector );
|
|
mallc_one = mallc_two = 0;
|
|
}
|
|
RF_Free(buf, numDataCol*sizeof(char *));
|
|
if (ndataParam != 0){
|
|
RF_Free(suoff, ndataParam*sizeof(long));
|
|
RF_Free(suend, ndataParam*sizeof(long));
|
|
RF_Free(prmToCol, ndataParam*sizeof(long));
|
|
}
|
|
|
|
RF_ETIMER_STOP(timer);
|
|
RF_ETIMER_EVAL(timer);
|
|
if (tracerec) {
|
|
tracerec->q_us += RF_ETIMER_VAL_US(timer);
|
|
}
|
|
rf_GenericWakeupFunc(node,0);
|
|
#if 1
|
|
return(0); /* XXX is this even close!!?!?!!? GO */
|
|
#endif
|
|
}
|
|
|
|
|
|
/* currently, only access of one of the two failed SU is allowed in this function.
|
|
* also, asmap->numStripeUnitsAccessed is limited to be one, the RaidFrame will break large access into
|
|
* many accesses of single stripe unit.
|
|
*/
|
|
|
|
int rf_EOWriteDoubleRecoveryFunc(node)
|
|
RF_DagNode_t *node;
|
|
{
|
|
int np = node->numParams;
|
|
RF_AccessStripeMap_t *asmap = (RF_AccessStripeMap_t *) node->params[np-1].p;
|
|
RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np-2].p;
|
|
RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) &(raidPtr->Layout);
|
|
RF_SectorNum_t sector;
|
|
RF_RowCol_t col, scol;
|
|
int prm, i, j;
|
|
RF_SectorCount_t secPerSU = layoutPtr->sectorsPerStripeUnit;
|
|
unsigned sosAddr;
|
|
unsigned bytesPerSector = rf_RaidAddressToByte(raidPtr, 1 );
|
|
RF_int64 numbytes;
|
|
RF_SectorNum_t startSector, endSector;
|
|
RF_PhysDiskAddr_t *ppda,*epda,*pda, *fpda, npda;
|
|
RF_RowCol_t fcol[2], numDataCol = layoutPtr->numDataCol;
|
|
char **buf; /* buf[0], buf[1], buf[2], ...etc. point to buffer storing data read from col0, col1, col2 */
|
|
char *ebuf, *pbuf, *dest[2], *olddata[2];
|
|
RF_Etimer_t timer;
|
|
RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
|
|
|
|
RF_ASSERT( asmap->numDataFailed == 1 ); /* currently only support this case, the other failed SU is not being accessed */
|
|
RF_ETIMER_START(timer);
|
|
RF_Malloc(buf, numDataCol*sizeof(char *), (char **));
|
|
|
|
ppda = node->results[0]; /* Instead of being buffers, node->results[0] and [1] are Ppda and Epda */
|
|
epda = node->results[1];
|
|
fpda = asmap->failedPDAs[0];
|
|
|
|
/* First, recovery the failed old SU using EvenOdd double decoding */
|
|
/* determine the startSector and endSector for decoding */
|
|
startSector = rf_StripeUnitOffset(layoutPtr, fpda->startSector );
|
|
endSector = startSector + fpda->numSector;
|
|
/* Assign buf[col] pointers to point to each non-failed colume and initialize the pbuf
|
|
and ebuf to point at the beginning of each source buffers and destination buffers */
|
|
for( prm=0; prm < numDataCol-2; prm++ ) {
|
|
pda = (RF_PhysDiskAddr_t *)node->params[prm].p;
|
|
col = rf_EUCol(layoutPtr, pda->raidAddress );
|
|
buf[col] = pda->bufPtr;
|
|
}
|
|
/* pbuf and ebuf: they will change values as double recovery decoding goes on */
|
|
pbuf = ppda->bufPtr;
|
|
ebuf = epda->bufPtr;
|
|
/* find out the logical colume numbers in the encoding matrix of the two failed columes */
|
|
fcol[0] = rf_EUCol(layoutPtr, fpda->raidAddress );
|
|
|
|
/* find out the other failed colume not accessed this time */
|
|
sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress);
|
|
for (i=0; i < numDataCol; i++) {
|
|
npda.raidAddress = sosAddr + (i * secPerSU);
|
|
(raidPtr->Layout.map->MapSector)(raidPtr, npda.raidAddress, &(npda.row), &(npda.col), &(npda.startSector), 0);
|
|
/* skip over dead disks */
|
|
if (RF_DEAD_DISK(raidPtr->Disks[npda.row][npda.col].status))
|
|
if (i != fcol[0]) break;
|
|
}
|
|
RF_ASSERT (i < numDataCol);
|
|
fcol[1] = i;
|
|
/* assign temporary space to put recovered failed SU */
|
|
numbytes = fpda->numSector * bytesPerSector;
|
|
RF_Malloc(olddata[0], numbytes, (char *) );
|
|
RF_Malloc(olddata[1], numbytes, (char *) );
|
|
dest[0] = olddata[0];
|
|
dest[1] = olddata[1];
|
|
bzero(olddata[0], numbytes);
|
|
bzero(olddata[1], numbytes);
|
|
/* Begin the recovery decoding, initially buf[j], ebuf, pbuf, dest[j] have already
|
|
pointed at the beginning of each source buffers and destination buffers */
|
|
for( sector = startSector, i=0; sector < endSector; sector++ , i++){
|
|
rf_doubleEOdecode(raidPtr, buf, dest, fcol, pbuf, ebuf );
|
|
for (j=0; j < numDataCol; j++)
|
|
if( ( j != fcol[0]) && ( j != fcol[1] ) ) buf[j] += bytesPerSector;
|
|
dest[0] += bytesPerSector;
|
|
dest[1] += bytesPerSector;
|
|
ebuf += bytesPerSector;
|
|
pbuf += bytesPerSector;
|
|
}
|
|
/* after recovery, the buffer pointed by olddata[0] is the old failed data.
|
|
With new writing data and this old data, use small write to calculate
|
|
the new redundant informations
|
|
*/
|
|
/* node->params[ 0, ... PDAPerDisk * (numDataCol - 2)-1 ] are Pdas of Rrd;
|
|
params[ PDAPerDisk*(numDataCol - 2), ... PDAPerDisk*numDataCol -1 ] are Pdas of Rp, ( Rp2 ), Re, ( Re2 ) ;
|
|
params[ PDAPerDisk*numDataCol, ... PDAPerDisk*numDataCol +asmap->numStripeUnitsAccessed -asmap->numDataFailed-1]
|
|
are Pdas of wudNodes;
|
|
For current implementation, we assume the simplest case:
|
|
asmap->numStripeUnitsAccessed == 1 and asmap->numDataFailed == 1 ie. PDAPerDisk = 1
|
|
then node->params[numDataCol] must be the new data to be writen to the failed disk. We first bxor the new data
|
|
into the old recovered data, then do the same things as small write.
|
|
*/
|
|
|
|
rf_bxor( ((RF_PhysDiskAddr_t *)node->params[numDataCol].p)->bufPtr, olddata[0], numbytes, node->dagHdr->bp);
|
|
/* do new 'E' calculation */
|
|
/* find out the corresponding colume in encoding matrix for write colume to be encoded into redundant disk 'E' */
|
|
scol = rf_EUCol(layoutPtr, fpda->raidAddress );
|
|
/* olddata[0] now is source buffer pointer; epda->bufPtr is the dest buffer pointer */
|
|
rf_e_encToBuf(raidPtr, scol, olddata[0], RF_EO_MATRIX_DIM - 2, epda->bufPtr, fpda->numSector);
|
|
|
|
/* do new 'P' calculation */
|
|
rf_bxor( olddata[0], ppda->bufPtr, numbytes, node->dagHdr->bp);
|
|
/* Free the allocated buffer */
|
|
RF_Free( olddata[0], numbytes );
|
|
RF_Free( olddata[1], numbytes );
|
|
RF_Free( buf, numDataCol*sizeof(char *));
|
|
|
|
RF_ETIMER_STOP(timer);
|
|
RF_ETIMER_EVAL(timer);
|
|
if (tracerec) {
|
|
tracerec->q_us += RF_ETIMER_VAL_US(timer);
|
|
}
|
|
|
|
rf_GenericWakeupFunc(node,0);
|
|
return(0);
|
|
}
|