RAIDframe, version 1.1, from the Parallel Data Laboratory at
Carnegie Mellon University. Full RAID implementation, including levels 0, 1, 4, 5, 6, parity logging, and a few other goodies. Ported to NetBSD by Greg Oster.
This commit is contained in:
parent
2f3f9379cf
commit
38a3987b69
|
@ -0,0 +1,294 @@
|
|||
/* $NetBSD: rf_acctrace.c,v 1.1 1998/11/13 04:20:26 oster Exp $ */
|
||||
/*
|
||||
* Copyright (c) 1995 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Author: Mark Holland
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and
|
||||
* its documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
||||
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*/
|
||||
|
||||
/*****************************************************************************
|
||||
*
|
||||
* acctrace.c -- code to support collecting information about each access
|
||||
*
|
||||
*****************************************************************************/
|
||||
|
||||
/* :
|
||||
* Log: rf_acctrace.c,v
|
||||
* Revision 1.29 1996/07/27 23:36:08 jimz
|
||||
* Solaris port of simulator
|
||||
*
|
||||
* Revision 1.28 1996/07/17 21:00:58 jimz
|
||||
* clean up timer interface, tracing
|
||||
*
|
||||
* Revision 1.27 1996/06/14 14:35:24 jimz
|
||||
* clean up dfstrace protection
|
||||
*
|
||||
* Revision 1.26 1996/06/13 19:09:04 jimz
|
||||
* remove trace.dat file before beginning
|
||||
*
|
||||
* Revision 1.25 1996/06/12 04:41:26 jimz
|
||||
* tweaks to make genplot work with user-level driver
|
||||
* (mainly change stat collection)
|
||||
*
|
||||
* Revision 1.24 1996/06/10 11:55:47 jimz
|
||||
* Straightened out some per-array/not-per-array distinctions, fixed
|
||||
* a couple bugs related to confusion. Added shutdown lists. Removed
|
||||
* layout shutdown function (now subsumed by shutdown lists).
|
||||
*
|
||||
* Revision 1.23 1996/06/09 02:36:46 jimz
|
||||
* lots of little crufty cleanup- fixup whitespace
|
||||
* issues, comment #ifdefs, improve typing in some
|
||||
* places (esp size-related)
|
||||
*
|
||||
* Revision 1.22 1996/06/05 18:06:02 jimz
|
||||
* Major code cleanup. The Great Renaming is now done.
|
||||
* Better modularity. Better typing. Fixed a bunch of
|
||||
* synchronization bugs. Made a lot of global stuff
|
||||
* per-desc or per-array. Removed dead code.
|
||||
*
|
||||
* Revision 1.21 1996/05/31 22:26:54 jimz
|
||||
* fix a lot of mapping problems, memory allocation problems
|
||||
* found some weird lock issues, fixed 'em
|
||||
* more code cleanup
|
||||
*
|
||||
* Revision 1.20 1996/05/30 23:22:16 jimz
|
||||
* bugfixes of serialization, timing problems
|
||||
* more cleanup
|
||||
*
|
||||
* Revision 1.19 1996/05/30 12:59:18 jimz
|
||||
* make etimer happier, more portable
|
||||
*
|
||||
* Revision 1.18 1996/05/27 18:56:37 jimz
|
||||
* more code cleanup
|
||||
* better typing
|
||||
* compiles in all 3 environments
|
||||
*
|
||||
* Revision 1.17 1996/05/23 00:33:23 jimz
|
||||
* code cleanup: move all debug decls to rf_options.c, all extern
|
||||
* debug decls to rf_options.h, all debug vars preceded by rf_
|
||||
*
|
||||
* Revision 1.16 1996/05/20 16:15:49 jimz
|
||||
* switch to rf_{mutex,cond}_{init,destroy}
|
||||
*
|
||||
* Revision 1.15 1996/05/18 20:10:00 jimz
|
||||
* bit of cleanup to compile cleanly in kernel, once again
|
||||
*
|
||||
* Revision 1.14 1996/05/18 19:51:34 jimz
|
||||
* major code cleanup- fix syntax, make some types consistent,
|
||||
* add prototypes, clean out dead code, et cetera
|
||||
*
|
||||
* Revision 1.13 1995/11/30 16:26:43 wvcii
|
||||
* added copyright info
|
||||
*
|
||||
*/
|
||||
|
||||
#ifdef _KERNEL
|
||||
#define KERNEL
|
||||
#endif
|
||||
|
||||
#include "rf_threadstuff.h"
|
||||
#include "rf_types.h"
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
#ifdef KERNEL
|
||||
#ifndef __NetBSD__
|
||||
#include <dfstrace.h>
|
||||
#endif /* !__NetBSD__ */
|
||||
#if DFSTRACE > 0
|
||||
#include <sys/dfs_log.h>
|
||||
#include <sys/dfstracebuf.h>
|
||||
#endif /* DFSTRACE > 0 */
|
||||
#endif /* KERNEL */
|
||||
|
||||
#include "rf_debugMem.h"
|
||||
#include "rf_acctrace.h"
|
||||
#include "rf_general.h"
|
||||
#include "rf_raid.h"
|
||||
#include "rf_etimer.h"
|
||||
#include "rf_hist.h"
|
||||
#include "rf_shutdown.h"
|
||||
#include "rf_sys.h"
|
||||
|
||||
static long numTracesSoFar;
|
||||
static int accessTraceBufCount = 0;
|
||||
static RF_AccTraceEntry_t *access_tracebuf;
|
||||
static long traceCount;
|
||||
|
||||
int rf_stopCollectingTraces;
|
||||
RF_DECLARE_MUTEX(rf_tracing_mutex)
|
||||
int rf_trace_fd;
|
||||
|
||||
static void rf_ShutdownAccessTrace(void *);
|
||||
|
||||
static void rf_ShutdownAccessTrace(ignored)
|
||||
void *ignored;
|
||||
{
|
||||
if (rf_accessTraceBufSize) {
|
||||
if (accessTraceBufCount) rf_FlushAccessTraceBuf();
|
||||
#ifndef KERNEL
|
||||
close(rf_trace_fd);
|
||||
#endif /* !KERNEL */
|
||||
RF_Free(access_tracebuf, rf_accessTraceBufSize * sizeof(RF_AccTraceEntry_t));
|
||||
}
|
||||
rf_mutex_destroy(&rf_tracing_mutex);
|
||||
#if defined(KERNEL) && DFSTRACE > 0
|
||||
printf("RAIDFRAME: %d trace entries were sent to dfstrace\n",traceCount);
|
||||
#endif /* KERNEL && DFSTRACE > 0 */
|
||||
}
|
||||
|
||||
int rf_ConfigureAccessTrace(listp)
|
||||
RF_ShutdownList_t **listp;
|
||||
{
|
||||
int rc;
|
||||
|
||||
numTracesSoFar = accessTraceBufCount = rf_stopCollectingTraces = 0;
|
||||
if (rf_accessTraceBufSize) {
|
||||
RF_Malloc(access_tracebuf, rf_accessTraceBufSize * sizeof(RF_AccTraceEntry_t), (RF_AccTraceEntry_t *));
|
||||
accessTraceBufCount = 0;
|
||||
#ifndef KERNEL
|
||||
rc = unlink("trace.dat");
|
||||
if (rc && (errno != ENOENT)) {
|
||||
perror("unlink");
|
||||
RF_ERRORMSG("Unable to remove existing trace.dat\n");
|
||||
return(errno);
|
||||
}
|
||||
if ((rf_trace_fd = open("trace.dat",O_WRONLY|O_CREAT|O_TRUNC, S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH)) < 0 ) {
|
||||
perror("Unable to open trace.dat for output");
|
||||
return(errno);
|
||||
}
|
||||
#endif /* !KERNEL */
|
||||
}
|
||||
traceCount = 0;
|
||||
numTracesSoFar = 0;
|
||||
rc = rf_mutex_init(&rf_tracing_mutex);
|
||||
if (rc) {
|
||||
RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__,
|
||||
__LINE__, rc);
|
||||
}
|
||||
rc = rf_ShutdownCreate(listp, rf_ShutdownAccessTrace, NULL);
|
||||
if (rc) {
|
||||
RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", __FILE__,
|
||||
__LINE__, rc);
|
||||
if (rf_accessTraceBufSize) {
|
||||
RF_Free(access_tracebuf, rf_accessTraceBufSize * sizeof(RF_AccTraceEntry_t));
|
||||
#ifndef KERNEL
|
||||
close(rf_trace_fd);
|
||||
#endif /* !KERNEL */
|
||||
rf_mutex_destroy(&rf_tracing_mutex);
|
||||
}
|
||||
}
|
||||
return(rc);
|
||||
}
|
||||
|
||||
/* install a trace record. cause a flush to disk or to the trace collector daemon
|
||||
* if the trace buffer is at least 1/2 full.
|
||||
*/
|
||||
void rf_LogTraceRec(raid, rec)
|
||||
RF_Raid_t *raid;
|
||||
RF_AccTraceEntry_t *rec;
|
||||
{
|
||||
RF_AccTotals_t *acc = &raid->acc_totals;
|
||||
#if 0
|
||||
RF_Etimer_t timer;
|
||||
int i, n;
|
||||
#endif
|
||||
|
||||
if (rf_stopCollectingTraces || ((rf_maxNumTraces >= 0) && (numTracesSoFar >= rf_maxNumTraces)))
|
||||
return;
|
||||
|
||||
#ifndef KERNEL
|
||||
if (rf_accessTraceBufSize) {
|
||||
RF_LOCK_MUTEX(rf_tracing_mutex);
|
||||
numTracesSoFar++;
|
||||
bcopy((char *)rec, (char *)&access_tracebuf[ accessTraceBufCount++ ], sizeof(RF_AccTraceEntry_t));
|
||||
if (accessTraceBufCount == rf_accessTraceBufSize)
|
||||
rf_FlushAccessTraceBuf();
|
||||
RF_UNLOCK_MUTEX(rf_tracing_mutex);
|
||||
}
|
||||
#endif /* !KERNEL */
|
||||
#if defined(KERNEL) && DFSTRACE > 0
|
||||
rec->index = traceCount++;
|
||||
if (traceon & DFS_TRACE_RAIDFRAME) {
|
||||
dfs_log(DFS_NOTE, (char *) rec, (int) sizeof(*rec), 0);
|
||||
}
|
||||
#endif /* KERNEL && DFSTRACE > 0 */
|
||||
/* update AccTotals for this device */
|
||||
if (!raid->keep_acc_totals)
|
||||
return;
|
||||
acc->num_log_ents++;
|
||||
if (rec->reconacc) {
|
||||
acc->recon_start_to_fetch_us += rec->specific.recon.recon_start_to_fetch_us;
|
||||
acc->recon_fetch_to_return_us += rec->specific.recon.recon_fetch_to_return_us;
|
||||
acc->recon_return_to_submit_us += rec->specific.recon.recon_return_to_submit_us;
|
||||
acc->recon_num_phys_ios += rec->num_phys_ios;
|
||||
acc->recon_phys_io_us += rec->phys_io_us;
|
||||
acc->recon_diskwait_us += rec->diskwait_us;
|
||||
acc->recon_reccount++;
|
||||
}
|
||||
else {
|
||||
RF_HIST_ADD(acc->tot_hist, rec->total_us);
|
||||
RF_HIST_ADD(acc->dw_hist, rec->diskwait_us);
|
||||
/* count of physical ios which are too big. often due to thermal recalibration */
|
||||
/* if bigvals > 0, you should probably ignore this data set */
|
||||
if (rec->diskwait_us > 100000)
|
||||
acc->bigvals++;
|
||||
acc->total_us += rec->total_us;
|
||||
acc->suspend_ovhd_us += rec->specific.user.suspend_ovhd_us;
|
||||
acc->map_us += rec->specific.user.map_us;
|
||||
acc->lock_us += rec->specific.user.lock_us;
|
||||
acc->dag_create_us += rec->specific.user.dag_create_us;
|
||||
acc->dag_retry_us += rec->specific.user.dag_retry_us;
|
||||
acc->exec_us += rec->specific.user.exec_us;
|
||||
acc->cleanup_us += rec->specific.user.cleanup_us;
|
||||
acc->exec_engine_us += rec->specific.user.exec_engine_us;
|
||||
acc->xor_us += rec->xor_us;
|
||||
acc->q_us += rec->q_us;
|
||||
acc->plog_us += rec->plog_us;
|
||||
acc->diskqueue_us += rec->diskqueue_us;
|
||||
acc->diskwait_us += rec->diskwait_us;
|
||||
acc->num_phys_ios += rec->num_phys_ios;
|
||||
acc->phys_io_us = rec->phys_io_us;
|
||||
acc->user_reccount++;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* assumes the tracing mutex is locked at entry. In order to allow this to be called
|
||||
* from interrupt context, we don't do any copyouts here, but rather just wake trace
|
||||
* buffer collector thread.
|
||||
*/
|
||||
void rf_FlushAccessTraceBuf()
|
||||
{
|
||||
#ifndef KERNEL
|
||||
int size = accessTraceBufCount * sizeof(RF_AccTraceEntry_t);
|
||||
|
||||
if (write(rf_trace_fd, (char *) access_tracebuf, size) < size ) {
|
||||
fprintf(stderr, "Unable to write traces to file. tracing disabled\n");
|
||||
RF_Free(access_tracebuf, rf_accessTraceBufSize * sizeof(RF_AccTraceEntry_t));
|
||||
rf_accessTraceBufSize = 0;
|
||||
close(rf_trace_fd);
|
||||
}
|
||||
#endif /* !KERNEL */
|
||||
accessTraceBufCount = 0;
|
||||
}
|
|
@ -0,0 +1,195 @@
|
|||
/* $NetBSD: rf_acctrace.h,v 1.1 1998/11/13 04:20:26 oster Exp $ */
|
||||
/*
|
||||
* Copyright (c) 1995 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Author: Mark Holland
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and
|
||||
* its documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
||||
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*/
|
||||
|
||||
/*****************************************************************************
|
||||
*
|
||||
* acctrace.h -- header file for acctrace.c
|
||||
*
|
||||
*****************************************************************************/
|
||||
|
||||
/* :
|
||||
*
|
||||
* Log: rf_acctrace.h,v
|
||||
* Revision 1.32 1996/08/02 15:12:38 jimz
|
||||
* remove dead code
|
||||
*
|
||||
* Revision 1.31 1996/07/27 14:34:39 jimz
|
||||
* remove bogus semicolon
|
||||
*
|
||||
* Revision 1.30 1996/07/18 22:57:14 jimz
|
||||
* port simulator to AIX
|
||||
*
|
||||
* Revision 1.29 1996/07/17 21:00:58 jimz
|
||||
* clean up timer interface, tracing
|
||||
*
|
||||
* Revision 1.28 1996/06/10 11:55:47 jimz
|
||||
* Straightened out some per-array/not-per-array distinctions, fixed
|
||||
* a couple bugs related to confusion. Added shutdown lists. Removed
|
||||
* layout shutdown function (now subsumed by shutdown lists).
|
||||
*
|
||||
* Revision 1.27 1996/06/09 02:36:46 jimz
|
||||
* lots of little crufty cleanup- fixup whitespace
|
||||
* issues, comment #ifdefs, improve typing in some
|
||||
* places (esp size-related)
|
||||
* /
|
||||
*
|
||||
* Revision 1.26 1996/06/05 18:06:02 jimz
|
||||
* Major code cleanup. The Great Renaming is now done.
|
||||
* Better modularity. Better typing. Fixed a bunch of
|
||||
* synchronization bugs. Made a lot of global stuff
|
||||
* per-desc or per-array. Removed dead code.
|
||||
*
|
||||
* Revision 1.25 1996/05/31 22:26:54 jimz
|
||||
* fix a lot of mapping problems, memory allocation problems
|
||||
* found some weird lock issues, fixed 'em
|
||||
* more code cleanup
|
||||
*
|
||||
* Revision 1.24 1996/05/30 12:59:18 jimz
|
||||
* make etimer happier, more portable
|
||||
*
|
||||
* Revision 1.23 1996/05/28 12:34:30 jimz
|
||||
* nail down size of reconacc
|
||||
*
|
||||
* Revision 1.22 1996/05/23 00:33:23 jimz
|
||||
* code cleanup: move all debug decls to rf_options.c, all extern
|
||||
* debug decls to rf_options.h, all debug vars preceded by rf_
|
||||
*
|
||||
* Revision 1.21 1996/05/18 19:51:34 jimz
|
||||
* major code cleanup- fix syntax, make some types consistent,
|
||||
* add prototypes, clean out dead code, et cetera
|
||||
*
|
||||
* Revision 1.20 1996/05/02 14:57:24 jimz
|
||||
* change to boolean_t
|
||||
*
|
||||
* Revision 1.19 1995/12/14 18:37:06 jimz
|
||||
* convert to rf_types.h types
|
||||
*
|
||||
* Revision 1.18 1995/11/30 16:26:49 wvcii
|
||||
* added copyright info
|
||||
*
|
||||
* Revision 1.17 1995/09/30 19:49:23 jimz
|
||||
* add AccTotals structure, for capturing totals in kernel
|
||||
*
|
||||
* Revision 1.16 1995/09/12 00:20:55 wvcii
|
||||
* added support for tracing disk queue time
|
||||
*
|
||||
* Revision 1.15 95/09/06 19:23:12 wvcii
|
||||
* increased MAX_IOS_PER_TRACE_ENTRY from 1 to 4
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _RF__RF_ACCTRACE_H_
|
||||
#define _RF__RF_ACCTRACE_H_
|
||||
|
||||
#include "rf_types.h"
|
||||
#include "rf_hist.h"
|
||||
#include "rf_etimer.h"
|
||||
|
||||
typedef struct RF_user_acc_stats_s {
|
||||
RF_uint64 suspend_ovhd_us; /* us spent mucking in the access-suspension code */
|
||||
RF_uint64 map_us; /* us spent mapping the access */
|
||||
RF_uint64 lock_us; /* us spent locking & unlocking stripes, including time spent blocked */
|
||||
RF_uint64 dag_create_us; /* us spent creating the DAGs */
|
||||
RF_uint64 dag_retry_us; /* _total_ us spent retrying the op -- not broken down into components */
|
||||
RF_uint64 exec_us; /* us spent in DispatchDAG */
|
||||
RF_uint64 exec_engine_us; /* us spent in engine, not including blocking time */
|
||||
RF_uint64 cleanup_us; /* us spent tearing down the dag & maps, and generally cleaning up */
|
||||
} RF_user_acc_stats_t;
|
||||
|
||||
typedef struct RF_recon_acc_stats_s {
|
||||
RF_uint32 recon_start_to_fetch_us;
|
||||
RF_uint32 recon_fetch_to_return_us;
|
||||
RF_uint32 recon_return_to_submit_us;
|
||||
} RF_recon_acc_stats_t;
|
||||
|
||||
typedef struct RF_acctrace_entry_s {
|
||||
union {
|
||||
RF_user_acc_stats_t user;
|
||||
RF_recon_acc_stats_t recon;
|
||||
} specific;
|
||||
RF_uint8 reconacc; /* whether this is a tracerec for a user acc or a recon acc */
|
||||
RF_uint64 xor_us; /* us spent doing XORs */
|
||||
RF_uint64 q_us; /* us spent doing XORs */
|
||||
RF_uint64 plog_us; /* us spent waiting to stuff parity into log */
|
||||
RF_uint64 diskqueue_us; /* _total_ us spent in disk queue(s), incl concurrent ops */
|
||||
RF_uint64 diskwait_us; /* _total_ us spent waiting actually waiting on the disk, incl concurrent ops */
|
||||
RF_uint64 total_us; /* total us spent on this access */
|
||||
RF_uint64 num_phys_ios; /* number of physical I/Os invoked */
|
||||
RF_uint64 phys_io_us; /* time of physical I/O */
|
||||
RF_Etimer_t tot_timer; /* a timer used to compute total access time */
|
||||
RF_Etimer_t timer; /* a generic timer val for timing events that live across procedure boundaries */
|
||||
RF_Etimer_t recon_timer; /* generic timer for recon stuff */
|
||||
RF_uint64 index;
|
||||
} RF_AccTraceEntry_t;
|
||||
|
||||
typedef struct RF_AccTotals_s {
|
||||
/* user acc stats */
|
||||
RF_uint64 suspend_ovhd_us;
|
||||
RF_uint64 map_us;
|
||||
RF_uint64 lock_us;
|
||||
RF_uint64 dag_create_us;
|
||||
RF_uint64 dag_retry_us;
|
||||
RF_uint64 exec_us;
|
||||
RF_uint64 exec_engine_us;
|
||||
RF_uint64 cleanup_us;
|
||||
RF_uint64 user_reccount;
|
||||
/* recon acc stats */
|
||||
RF_uint64 recon_start_to_fetch_us;
|
||||
RF_uint64 recon_fetch_to_return_us;
|
||||
RF_uint64 recon_return_to_submit_us;
|
||||
RF_uint64 recon_io_overflow_count;
|
||||
RF_uint64 recon_phys_io_us;
|
||||
RF_uint64 recon_num_phys_ios;
|
||||
RF_uint64 recon_diskwait_us;
|
||||
RF_uint64 recon_reccount;
|
||||
/* trace entry stats */
|
||||
RF_uint64 xor_us;
|
||||
RF_uint64 q_us;
|
||||
RF_uint64 plog_us;
|
||||
RF_uint64 diskqueue_us;
|
||||
RF_uint64 diskwait_us;
|
||||
RF_uint64 total_us;
|
||||
RF_uint64 num_log_ents;
|
||||
RF_uint64 phys_io_overflow_count;
|
||||
RF_uint64 num_phys_ios;
|
||||
RF_uint64 phys_io_us;
|
||||
RF_uint64 bigvals;
|
||||
/* histograms */
|
||||
RF_Hist_t dw_hist[RF_HIST_NUM_BUCKETS];
|
||||
RF_Hist_t tot_hist[RF_HIST_NUM_BUCKETS];
|
||||
} RF_AccTotals_t;
|
||||
|
||||
#if RF_UTILITY == 0
|
||||
RF_DECLARE_EXTERN_MUTEX(rf_tracing_mutex)
|
||||
#endif /* RF_UTILITY == 0 */
|
||||
|
||||
int rf_ConfigureAccessTrace(RF_ShutdownList_t **listp);
|
||||
void rf_LogTraceRec(RF_Raid_t *raid, RF_AccTraceEntry_t *rec);
|
||||
void rf_FlushAccessTraceBuf(void);
|
||||
|
||||
#endif /* !_RF__RF_ACCTRACE_H_ */
|
|
@ -0,0 +1,293 @@
|
|||
/* $NetBSD: rf_alloclist.c,v 1.1 1998/11/13 04:20:26 oster Exp $ */
|
||||
/*
|
||||
* Copyright (c) 1995 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Author: Mark Holland
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and
|
||||
* its documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
||||
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Log: rf_alloclist.c,v
|
||||
* Revision 1.28 1996/07/27 23:36:08 jimz
|
||||
* Solaris port of simulator
|
||||
*
|
||||
* Revision 1.27 1996/06/12 03:29:54 jimz
|
||||
* don't barf just because we can't create an alloclist
|
||||
*
|
||||
* Revision 1.26 1996/06/10 11:55:47 jimz
|
||||
* Straightened out some per-array/not-per-array distinctions, fixed
|
||||
* a couple bugs related to confusion. Added shutdown lists. Removed
|
||||
* layout shutdown function (now subsumed by shutdown lists).
|
||||
*
|
||||
* Revision 1.25 1996/06/09 02:36:46 jimz
|
||||
* lots of little crufty cleanup- fixup whitespace
|
||||
* issues, comment #ifdefs, improve typing in some
|
||||
* places (esp size-related)
|
||||
*
|
||||
* Revision 1.24 1996/06/05 18:06:02 jimz
|
||||
* Major code cleanup. The Great Renaming is now done.
|
||||
* Better modularity. Better typing. Fixed a bunch of
|
||||
* synchronization bugs. Made a lot of global stuff
|
||||
* per-desc or per-array. Removed dead code.
|
||||
*
|
||||
* Revision 1.23 1996/05/30 23:22:16 jimz
|
||||
* bugfixes of serialization, timing problems
|
||||
* more cleanup
|
||||
*
|
||||
* Revision 1.22 1996/05/27 18:56:37 jimz
|
||||
* more code cleanup
|
||||
* better typing
|
||||
* compiles in all 3 environments
|
||||
*
|
||||
* Revision 1.21 1996/05/23 21:46:35 jimz
|
||||
* checkpoint in code cleanup (release prep)
|
||||
* lots of types, function names have been fixed
|
||||
*
|
||||
* Revision 1.20 1996/05/20 16:15:59 jimz
|
||||
* switch to rf_{mutex,cond}_{init,destroy}
|
||||
*
|
||||
* Revision 1.19 1996/05/18 19:51:34 jimz
|
||||
* major code cleanup- fix syntax, make some types consistent,
|
||||
* add prototypes, clean out dead code, et cetera
|
||||
*
|
||||
* Revision 1.18 1996/05/16 22:27:45 jimz
|
||||
* get rid of surreal_MakeAllocList (what was that, anyway?)
|
||||
*
|
||||
* Revision 1.17 1995/12/12 18:10:06 jimz
|
||||
* MIN -> RF_MIN, MAX -> RF_MAX, ASSERT -> RF_ASSERT
|
||||
* fix 80-column brain damage in comments
|
||||
*
|
||||
* Revision 1.16 1995/11/30 16:27:07 wvcii
|
||||
* added copyright info
|
||||
*
|
||||
* Revision 1.15 1995/10/05 20:37:56 jimz
|
||||
* assert non-NULLness of pointer to FREE in FreeAllocList()
|
||||
*
|
||||
* Revision 1.14 1995/06/11 20:11:24 holland
|
||||
* changed fl_hist,miss_count from long to int to get around weird kernel bug
|
||||
*
|
||||
* Revision 1.13 1995/05/01 13:28:00 holland
|
||||
* parity range locks, locking disk requests, recon+parityscan in kernel, etc.
|
||||
*
|
||||
* Revision 1.12 1995/04/21 19:13:04 holland
|
||||
* minor change to avoid a syntax error on DO_FREE
|
||||
*
|
||||
* Revision 1.11 1995/02/17 19:39:56 holland
|
||||
* added size param to all calls to Free().
|
||||
* this is ignored at user level, but necessary in the kernel.
|
||||
*
|
||||
* Revision 1.10 1995/02/10 18:08:07 holland
|
||||
* added DO_FREE macro to fix what I broke during kernelization
|
||||
*
|
||||
* Revision 1.9 1995/02/10 17:34:10 holland
|
||||
* kernelization changes
|
||||
*
|
||||
* Revision 1.8 1995/02/03 22:31:36 holland
|
||||
* many changes related to kernelization
|
||||
*
|
||||
* Revision 1.7 1995/02/01 15:13:05 holland
|
||||
* moved #include of general.h out of raid.h and into each file
|
||||
*
|
||||
* Revision 1.6 1995/01/11 19:27:02 holland
|
||||
* many changes related to performance tuning
|
||||
*
|
||||
* Revision 1.5 1994/11/29 20:53:10 danner
|
||||
* Marks mods
|
||||
*
|
||||
* Revision 1.3 1994/11/19 21:01:07 danner
|
||||
* First merge with mark
|
||||
*
|
||||
* Revision 1.1.1.1 1994/11/19 20:23:38 danner
|
||||
* First PQ checkin
|
||||
*
|
||||
* Revision 1.2 1994/11/16 15:45:35 danner
|
||||
* fixed free bug in FreeAllocList
|
||||
*
|
||||
*
|
||||
*/
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* Alloclist.c -- code to manipulate allocation lists
|
||||
*
|
||||
* an allocation list is just a list of AllocListElem structures. Each
|
||||
* such structure contains a fixed-size array of pointers. Calling
|
||||
* FreeAList() causes each pointer to be freed.
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
#include "rf_types.h"
|
||||
#include "rf_threadstuff.h"
|
||||
#include "rf_alloclist.h"
|
||||
#include "rf_debugMem.h"
|
||||
#include "rf_etimer.h"
|
||||
#include "rf_general.h"
|
||||
#include "rf_shutdown.h"
|
||||
#include "rf_sys.h"
|
||||
|
||||
RF_DECLARE_STATIC_MUTEX(alist_mutex)
|
||||
static unsigned int fl_hit_count, fl_miss_count;
|
||||
|
||||
static RF_AllocListElem_t *al_free_list=NULL;
|
||||
static int al_free_list_count;
|
||||
|
||||
#define RF_AL_FREELIST_MAX 256
|
||||
|
||||
#ifndef KERNEL
|
||||
#define DO_FREE(_p,_sz) free((_p))
|
||||
#else /* !KERNEL */
|
||||
#define DO_FREE(_p,_sz) RF_Free((_p),(_sz))
|
||||
#endif /* !KERNEL */
|
||||
|
||||
static void rf_ShutdownAllocList(void *);
|
||||
|
||||
static void rf_ShutdownAllocList(ignored)
|
||||
void *ignored;
|
||||
{
|
||||
RF_AllocListElem_t *p, *pt;
|
||||
|
||||
for (p = al_free_list; p; ) {
|
||||
pt = p;
|
||||
p = p->next;
|
||||
DO_FREE(pt, sizeof(*pt));
|
||||
}
|
||||
rf_mutex_destroy(&alist_mutex);
|
||||
/*
|
||||
printf("Alloclist: Free list hit count %lu (%lu %%) miss count %lu (%lu %%)\n",
|
||||
fl_hit_count, (100*fl_hit_count)/(fl_hit_count+fl_miss_count),
|
||||
fl_miss_count, (100*fl_miss_count)/(fl_hit_count+fl_miss_count));
|
||||
*/
|
||||
}
|
||||
|
||||
int rf_ConfigureAllocList(listp)
|
||||
RF_ShutdownList_t **listp;
|
||||
{
|
||||
int rc;
|
||||
|
||||
rc = rf_mutex_init(&alist_mutex);
|
||||
if (rc) {
|
||||
RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__,
|
||||
__LINE__, rc);
|
||||
return(rc);
|
||||
}
|
||||
al_free_list = NULL;
|
||||
fl_hit_count = fl_miss_count = al_free_list_count = 0;
|
||||
rc = rf_ShutdownCreate(listp, rf_ShutdownAllocList, NULL);
|
||||
if (rc) {
|
||||
RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n",
|
||||
__FILE__, __LINE__, rc);
|
||||
rf_mutex_destroy(&alist_mutex);
|
||||
return(rc);
|
||||
}
|
||||
return(0);
|
||||
}
|
||||
|
||||
|
||||
/* we expect the lists to have at most one or two elements, so we're willing
|
||||
* to search for the end. If you ever observe the lists growing longer,
|
||||
* increase POINTERS_PER_ALLOC_LIST_ELEMENT.
|
||||
*/
|
||||
void rf_real_AddToAllocList(l, p, size, lockflag)
|
||||
RF_AllocListElem_t *l;
|
||||
void *p;
|
||||
int size;
|
||||
int lockflag;
|
||||
{
|
||||
RF_AllocListElem_t *newelem;
|
||||
|
||||
for ( ; l->next; l=l->next)
|
||||
RF_ASSERT(l->numPointers == RF_POINTERS_PER_ALLOC_LIST_ELEMENT); /* find end of list */
|
||||
|
||||
RF_ASSERT(l->numPointers >= 0 && l->numPointers <= RF_POINTERS_PER_ALLOC_LIST_ELEMENT);
|
||||
if (l->numPointers == RF_POINTERS_PER_ALLOC_LIST_ELEMENT) {
|
||||
newelem = rf_real_MakeAllocList(lockflag);
|
||||
l->next = newelem;
|
||||
l = newelem;
|
||||
}
|
||||
l->pointers[ l->numPointers ] = p;
|
||||
l->sizes [ l->numPointers ] = size;
|
||||
l->numPointers++;
|
||||
|
||||
}
|
||||
|
||||
|
||||
/* we use the debug_mem_mutex here because we need to lock it anyway to call free.
|
||||
* this is probably a bug somewhere else in the code, but when I call malloc/free
|
||||
* outside of any lock I have endless trouble with malloc appearing to return the
|
||||
* same pointer twice. Since we have to lock it anyway, we might as well use it
|
||||
* as the lock around the al_free_list. Note that we can't call Free with the
|
||||
* debug_mem_mutex locked.
|
||||
*/
|
||||
void rf_FreeAllocList(l)
|
||||
RF_AllocListElem_t *l;
|
||||
{
|
||||
int i;
|
||||
RF_AllocListElem_t *temp, *p;
|
||||
|
||||
for (p=l; p; p=p->next) {
|
||||
RF_ASSERT(p->numPointers >= 0 && p->numPointers <= RF_POINTERS_PER_ALLOC_LIST_ELEMENT);
|
||||
for (i=0; i<p->numPointers; i++) {
|
||||
RF_ASSERT(p->pointers[i]);
|
||||
RF_Free(p->pointers[i], p->sizes[i]);
|
||||
}
|
||||
}
|
||||
#ifndef KERNEL
|
||||
RF_LOCK_MUTEX(rf_debug_mem_mutex);
|
||||
#endif /* !KERNEL */
|
||||
while (l) {
|
||||
temp = l;
|
||||
l = l->next;
|
||||
if (al_free_list_count > RF_AL_FREELIST_MAX) {DO_FREE(temp, sizeof(*temp));}
|
||||
else {temp->next = al_free_list; al_free_list = temp; al_free_list_count++;}
|
||||
}
|
||||
#ifndef KERNEL
|
||||
RF_UNLOCK_MUTEX(rf_debug_mem_mutex);
|
||||
#endif /* !KERNEL */
|
||||
}
|
||||
|
||||
RF_AllocListElem_t *rf_real_MakeAllocList(lockflag)
|
||||
int lockflag;
|
||||
{
|
||||
RF_AllocListElem_t *p;
|
||||
|
||||
#ifndef KERNEL
|
||||
if (lockflag) { RF_LOCK_MUTEX(rf_debug_mem_mutex); }
|
||||
#endif /* !KERNEL */
|
||||
if (al_free_list) {fl_hit_count++; p = al_free_list; al_free_list = p->next; al_free_list_count--;}
|
||||
else {
|
||||
fl_miss_count++;
|
||||
#ifndef KERNEL
|
||||
p = (RF_AllocListElem_t *) malloc(sizeof(RF_AllocListElem_t)); /* can't use Malloc at user level b/c we already locked the mutex */
|
||||
#else /* !KERNEL */
|
||||
RF_Malloc(p, sizeof(RF_AllocListElem_t), (RF_AllocListElem_t *)); /* no allocation locking in kernel, so this is fine */
|
||||
#endif /* !KERNEL */
|
||||
}
|
||||
#ifndef KERNEL
|
||||
if (lockflag) { RF_UNLOCK_MUTEX(rf_debug_mem_mutex); }
|
||||
#endif /* !KERNEL */
|
||||
if (p == NULL) {
|
||||
return(NULL);
|
||||
}
|
||||
bzero((char *)p, sizeof(RF_AllocListElem_t));
|
||||
return(p);
|
||||
}
|
|
@ -0,0 +1,83 @@
|
|||
/* $NetBSD: rf_alloclist.h,v 1.1 1998/11/13 04:20:26 oster Exp $ */
|
||||
/*
|
||||
* Copyright (c) 1995 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Author: Mark Holland
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and
|
||||
* its documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
||||
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*/
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* alloclist.h -- header file for alloclist.c
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
/* :
|
||||
* Log: rf_alloclist.h,v
|
||||
* Revision 1.11 1996/07/18 22:57:14 jimz
|
||||
* port simulator to AIX
|
||||
*
|
||||
* Revision 1.10 1996/06/10 11:55:47 jimz
|
||||
* Straightened out some per-array/not-per-array distinctions, fixed
|
||||
* a couple bugs related to confusion. Added shutdown lists. Removed
|
||||
* layout shutdown function (now subsumed by shutdown lists).
|
||||
*
|
||||
* Revision 1.9 1996/05/23 21:46:35 jimz
|
||||
* checkpoint in code cleanup (release prep)
|
||||
* lots of types, function names have been fixed
|
||||
*
|
||||
* Revision 1.8 1996/05/18 19:51:34 jimz
|
||||
* major code cleanup- fix syntax, make some types consistent,
|
||||
* add prototypes, clean out dead code, et cetera
|
||||
*
|
||||
* Revision 1.7 1995/11/30 16:27:13 wvcii
|
||||
* added copyright info
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _RF__RF_ALLOCLIST_H_
|
||||
#define _RF__RF_ALLOCLIST_H_
|
||||
|
||||
#include "rf_types.h"
|
||||
|
||||
#define RF_POINTERS_PER_ALLOC_LIST_ELEMENT 20
|
||||
|
||||
struct RF_AllocListElem_s {
|
||||
void *pointers[RF_POINTERS_PER_ALLOC_LIST_ELEMENT];
|
||||
int sizes[RF_POINTERS_PER_ALLOC_LIST_ELEMENT];
|
||||
int numPointers;
|
||||
RF_AllocListElem_t *next;
|
||||
};
|
||||
|
||||
#define rf_MakeAllocList(_ptr_) _ptr_ = rf_real_MakeAllocList(1);
|
||||
#define rf_AddToAllocList(_l_,_ptr_,_sz_) rf_real_AddToAllocList((_l_), (_ptr_), (_sz_), 1)
|
||||
|
||||
int rf_ConfigureAllocList(RF_ShutdownList_t **listp);
|
||||
|
||||
#if RF_UTILITY == 0
|
||||
void rf_real_AddToAllocList(RF_AllocListElem_t *l, void *p, int size, int lockflag);
|
||||
void rf_FreeAllocList(RF_AllocListElem_t *l);
|
||||
RF_AllocListElem_t *rf_real_MakeAllocList(int lockflag);
|
||||
#endif /* RF_UTILITY == 0 */
|
||||
|
||||
#endif /* !_RF__RF_ALLOCLIST_H_ */
|
|
@ -0,0 +1,210 @@
|
|||
/* $NetBSD: rf_archs.h,v 1.1 1998/11/13 04:20:26 oster Exp $ */
|
||||
/*
|
||||
* Copyright (c) 1995 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Author: Mark Holland
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and
|
||||
* its documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
||||
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*/
|
||||
|
||||
/* rf_archs.h -- defines for which architectures you want to
|
||||
* include is some particular build of raidframe. Unfortunately,
|
||||
* it's difficult to exclude declustering, P+Q, and distributed
|
||||
* sparing because the code is intermixed with RAID5 code. This
|
||||
* should be fixed.
|
||||
*
|
||||
* this is really intended only for use in the kernel, where I
|
||||
* am worried about the size of the object module. At user level and
|
||||
* in the simulator, I don't really care that much, so all the
|
||||
* architectures can be compiled together. Note that by itself, turning
|
||||
* off these defines does not affect the size of the executable; you
|
||||
* have to edit the makefile for that.
|
||||
*
|
||||
* comment out any line below to eliminate that architecture.
|
||||
* the list below includes all the modules that can be compiled
|
||||
* out.
|
||||
*
|
||||
* :
|
||||
* Log: rf_archs.h,v
|
||||
* Revision 1.32 1996/08/20 23:05:40 jimz
|
||||
* define RF_KEEP_DISKSTATS to 1
|
||||
*
|
||||
* Revision 1.31 1996/07/31 15:34:04 jimz
|
||||
* include evenodd
|
||||
*
|
||||
* Revision 1.30 1996/07/27 23:36:08 jimz
|
||||
* Solaris port of simulator
|
||||
*
|
||||
* Revision 1.29 1996/07/26 20:11:46 jimz
|
||||
* only define RF_DEMO for CMU_PDL
|
||||
*
|
||||
* Revision 1.28 1996/07/26 20:10:57 jimz
|
||||
* define RF_CMU_PDL only if it isn't already defined
|
||||
*
|
||||
* Revision 1.27 1996/07/18 22:57:14 jimz
|
||||
* port simulator to AIX
|
||||
*
|
||||
* Revision 1.26 1996/06/17 14:38:33 jimz
|
||||
* properly #if out RF_DEMO code
|
||||
* fix bug in MakeConfig that was causing weird behavior
|
||||
* in configuration routines (config was not zeroed at start)
|
||||
* clean up genplot handling of stacks
|
||||
*
|
||||
* Revision 1.25 1996/06/14 21:24:59 jimz
|
||||
* turn on RF_CMU_PDL by default
|
||||
*
|
||||
* Revision 1.24 1996/06/13 20:41:57 jimz
|
||||
* add RF_INCLUDE_QUEUE_RANDOM (0)
|
||||
*
|
||||
* Revision 1.23 1996/06/11 18:12:36 jimz
|
||||
* get rid of JOIN operations
|
||||
* use ThreadGroup stuff instead
|
||||
* fix some allocation/deallocation and sync bugs
|
||||
*
|
||||
* Revision 1.22 1996/06/10 22:24:55 wvcii
|
||||
* added symbols for enabling forward or backward error
|
||||
* recovery experiments
|
||||
*
|
||||
* Revision 1.21 1996/06/05 18:06:02 jimz
|
||||
* Major code cleanup. The Great Renaming is now done.
|
||||
* Better modularity. Better typing. Fixed a bunch of
|
||||
* synchronization bugs. Made a lot of global stuff
|
||||
* per-desc or per-array. Removed dead code.
|
||||
*
|
||||
* Revision 1.20 1996/05/30 11:29:41 jimz
|
||||
* Numerous bug fixes. Stripe lock release code disagreed with the taking code
|
||||
* about when stripes should be locked (I made it consistent: no parity, no lock)
|
||||
* There was a lot of extra serialization of I/Os which I've removed- a lot of
|
||||
* it was to calculate values for the cache code, which is no longer with us.
|
||||
* More types, function, macro cleanup. Added code to properly quiesce the array
|
||||
* on shutdown. Made a lot of stuff array-specific which was (bogusly) general
|
||||
* before. Fixed memory allocation, freeing bugs.
|
||||
*
|
||||
* Revision 1.19 1996/05/27 18:56:37 jimz
|
||||
* more code cleanup
|
||||
* better typing
|
||||
* compiles in all 3 environments
|
||||
*
|
||||
* Revision 1.18 1996/05/23 21:46:35 jimz
|
||||
* checkpoint in code cleanup (release prep)
|
||||
* lots of types, function names have been fixed
|
||||
*
|
||||
* Revision 1.17 1996/05/23 00:33:23 jimz
|
||||
* code cleanup: move all debug decls to rf_options.c, all extern
|
||||
* debug decls to rf_options.h, all debug vars preceded by rf_
|
||||
*
|
||||
* Revision 1.16 1996/05/18 19:51:34 jimz
|
||||
* major code cleanup- fix syntax, make some types consistent,
|
||||
* add prototypes, clean out dead code, et cetera
|
||||
*
|
||||
* Revision 1.15 1996/05/15 22:32:59 jimz
|
||||
* remove cache and vs stuff
|
||||
*
|
||||
* Revision 1.14 1995/11/30 16:27:34 wvcii
|
||||
* added copyright info
|
||||
*
|
||||
* Revision 1.13 1995/11/28 21:23:44 amiri
|
||||
* added the interleaved declustering architecture
|
||||
* ('I'), with distributed sparing.
|
||||
*
|
||||
* Revision 1.12 1995/11/17 16:59:45 amiri
|
||||
* don't INCLUDE_CHAINDECLUSTER in the kernel
|
||||
* source.
|
||||
*
|
||||
* Revision 1.11 1995/11/16 16:15:21 amiri
|
||||
* don't include RAID5 with rotated sparing (INCLUDE_RAID5_RS) in kernel
|
||||
*
|
||||
* Revision 1.10 1995/10/12 17:40:47 jimz
|
||||
* define INCLUDE_LS
|
||||
*
|
||||
* Revision 1.9 1995/10/11 06:56:47 jimz
|
||||
* define INCLUDE_VS (sanity check for compilation)
|
||||
*
|
||||
* Revision 1.8 1995/10/05 18:56:24 jimz
|
||||
* don't INCLUDE_VS
|
||||
*
|
||||
* Revision 1.7 1995/10/04 03:51:20 wvcii
|
||||
* added raid 1
|
||||
*
|
||||
* Revision 1.6 1995/09/07 09:59:29 wvcii
|
||||
* unstable archs conditionally defined for !KERNEL makes
|
||||
*
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _RF__RF_ARCHS_H_
|
||||
#define _RF__RF_ARCHS_H_
|
||||
|
||||
/*
|
||||
* Turn off if you do not have CMU PDL support compiled
|
||||
* into your kernel.
|
||||
*/
|
||||
#ifndef RF_CMU_PDL
|
||||
#define RF_CMU_PDL 0
|
||||
#endif /* !RF_CMU_PDL */
|
||||
|
||||
/*
|
||||
* Khalil's performance-displaying demo stuff.
|
||||
* Relies on CMU meter tools.
|
||||
*/
|
||||
#ifndef KERNEL
|
||||
#if RF_CMU_PDL > 0
|
||||
#define RF_DEMO 1
|
||||
#endif /* RF_CMU_PDL > 0 */
|
||||
#endif /* !KERNEL */
|
||||
|
||||
#define RF_INCLUDE_EVENODD 1
|
||||
|
||||
#define RF_INCLUDE_RAID5_RS 1
|
||||
#define RF_INCLUDE_PARITYLOGGING 1
|
||||
|
||||
#define RF_INCLUDE_CHAINDECLUSTER 1
|
||||
#define RF_INCLUDE_INTERDECLUSTER 1
|
||||
|
||||
#define RF_INCLUDE_RAID0 1
|
||||
#define RF_INCLUDE_RAID1 1
|
||||
#define RF_INCLUDE_RAID4 1
|
||||
#define RF_INCLUDE_RAID5 1
|
||||
#define RF_INCLUDE_RAID6 0
|
||||
#define RF_INCLUDE_DECL_PQ 0
|
||||
|
||||
#define RF_MEMORY_REDZONES 0
|
||||
#define RF_RECON_STATS 1
|
||||
|
||||
#define RF_INCLUDE_QUEUE_RANDOM 0
|
||||
|
||||
#define RF_KEEP_DISKSTATS 1
|
||||
|
||||
/* These two symbols enable nonstandard forms of error recovery.
|
||||
* These modes are only valid for performance measurements and
|
||||
* data corruption will occur if an error occurs when either
|
||||
* forward or backward error recovery are enabled. In general
|
||||
* both of the following two definitions should be commented
|
||||
* out--this forces RAIDframe to use roll-away error recovery
|
||||
* which does guarantee proper error recovery without data corruption
|
||||
*/
|
||||
/* #define RF_FORWARD 1 */
|
||||
/* #define RF_BACKWARD 1 */
|
||||
|
||||
#include "rf_options.h"
|
||||
|
||||
#endif /* !_RF__RF_ARCHS_H_ */
|
|
@ -0,0 +1,617 @@
|
|||
/* $NetBSD: rf_aselect.c,v 1.1 1998/11/13 04:20:26 oster Exp $ */
|
||||
/*
|
||||
* Copyright (c) 1995 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Author: Mark Holland, William V. Courtright II
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and
|
||||
* its documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
||||
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*/
|
||||
|
||||
/*****************************************************************************
|
||||
*
|
||||
* aselect.c -- algorithm selection code
|
||||
*
|
||||
*****************************************************************************/
|
||||
/*
|
||||
* :
|
||||
* Log: rf_aselect.c,v
|
||||
* Revision 1.35 1996/07/28 20:31:39 jimz
|
||||
* i386netbsd port
|
||||
* true/false fixup
|
||||
*
|
||||
* Revision 1.34 1996/07/27 18:39:39 jimz
|
||||
* cleanup sweep
|
||||
*
|
||||
* Revision 1.33 1996/07/22 19:52:16 jimz
|
||||
* switched node params to RF_DagParam_t, a union of
|
||||
* a 64-bit int and a void *, for better portability
|
||||
* attempted hpux port, but failed partway through for
|
||||
* lack of a single C compiler capable of compiling all
|
||||
* source files
|
||||
*
|
||||
* Revision 1.32 1996/06/12 03:29:40 jimz
|
||||
* Note: things that call InitHdrNode should check
|
||||
* for successful return.
|
||||
*
|
||||
* Revision 1.31 1996/06/07 21:33:04 jimz
|
||||
* begin using consistent types for sector numbers,
|
||||
* stripe numbers, row+col numbers, recon unit numbers
|
||||
*
|
||||
* Revision 1.30 1996/06/05 18:06:02 jimz
|
||||
* Major code cleanup. The Great Renaming is now done.
|
||||
* Better modularity. Better typing. Fixed a bunch of
|
||||
* synchronization bugs. Made a lot of global stuff
|
||||
* per-desc or per-array. Removed dead code.
|
||||
*
|
||||
* Revision 1.29 1996/05/31 22:26:54 jimz
|
||||
* fix a lot of mapping problems, memory allocation problems
|
||||
* found some weird lock issues, fixed 'em
|
||||
* more code cleanup
|
||||
*
|
||||
* Revision 1.28 1996/05/30 11:29:41 jimz
|
||||
* Numerous bug fixes. Stripe lock release code disagreed with the taking code
|
||||
* about when stripes should be locked (I made it consistent: no parity, no lock)
|
||||
* There was a lot of extra serialization of I/Os which I've removed- a lot of
|
||||
* it was to calculate values for the cache code, which is no longer with us.
|
||||
* More types, function, macro cleanup. Added code to properly quiesce the array
|
||||
* on shutdown. Made a lot of stuff array-specific which was (bogusly) general
|
||||
* before. Fixed memory allocation, freeing bugs.
|
||||
*
|
||||
* Revision 1.27 1996/05/27 18:56:37 jimz
|
||||
* more code cleanup
|
||||
* better typing
|
||||
* compiles in all 3 environments
|
||||
*
|
||||
* Revision 1.26 1996/05/24 22:17:04 jimz
|
||||
* continue code + namespace cleanup
|
||||
* typed a bunch of flags
|
||||
*
|
||||
* Revision 1.25 1996/05/24 04:28:55 jimz
|
||||
* release cleanup ckpt
|
||||
*
|
||||
* Revision 1.24 1996/05/23 21:46:35 jimz
|
||||
* checkpoint in code cleanup (release prep)
|
||||
* lots of types, function names have been fixed
|
||||
*
|
||||
* Revision 1.23 1996/05/23 00:33:23 jimz
|
||||
* code cleanup: move all debug decls to rf_options.c, all extern
|
||||
* debug decls to rf_options.h, all debug vars preceded by rf_
|
||||
*
|
||||
* Revision 1.22 1996/05/18 19:51:34 jimz
|
||||
* major code cleanup- fix syntax, make some types consistent,
|
||||
* add prototypes, clean out dead code, et cetera
|
||||
*
|
||||
* Revision 1.21 1996/05/08 21:01:24 jimz
|
||||
* fixed up enum type names that were conflicting with other
|
||||
* enums and function names (ie, "panic")
|
||||
* future naming trends will be towards RF_ and rf_ for
|
||||
* everything raidframe-related
|
||||
*
|
||||
* Revision 1.20 1996/05/03 19:45:35 wvcii
|
||||
* removed includes of old deg creation files
|
||||
* updated SelectAlgorithm comments
|
||||
*
|
||||
* Revision 1.19 1995/12/12 18:10:06 jimz
|
||||
* MIN -> RF_MIN, MAX -> RF_MAX, ASSERT -> RF_ASSERT
|
||||
* fix 80-column brain damage in comments
|
||||
*
|
||||
* Revision 1.18 1995/11/30 16:27:48 wvcii
|
||||
* added copyright info
|
||||
*
|
||||
* Revision 1.17 1995/11/19 16:25:55 wvcii
|
||||
* SelectAlgorithm now creates an array, returned in desc->dagArray
|
||||
* return value is now int (1 = FAIL)
|
||||
*
|
||||
* Revision 1.16 1995/11/17 15:09:58 wvcii
|
||||
* fixed bug in SelectAlgorithm in which multiple graphs per stripe are required
|
||||
*
|
||||
* Revision 1.15 1995/11/07 17:12:42 wvcii
|
||||
* changed SelectAlgorithm as follows:
|
||||
*
|
||||
* dag creation funcs now create term nodes
|
||||
* dag selection funcs no longer return numHdrSucc, numTermAnt
|
||||
* there is now one dag hdr for each dag in a request, implying
|
||||
* that SelectAlgorithm now returns a linked list of dag hdrs
|
||||
*
|
||||
*/
|
||||
|
||||
#include "rf_archs.h"
|
||||
#include "rf_types.h"
|
||||
#include "rf_raid.h"
|
||||
#include "rf_dag.h"
|
||||
#include "rf_dagutils.h"
|
||||
#include "rf_dagfuncs.h"
|
||||
#include "rf_general.h"
|
||||
#include "rf_desc.h"
|
||||
#include "rf_map.h"
|
||||
|
||||
#if defined(__NetBSD__) && defined(_KERNEL)
|
||||
/* the function below is not used... so don't define it! */
|
||||
#else
|
||||
static void TransferDagMemory(RF_DagHeader_t *, RF_DagHeader_t *);
|
||||
#endif
|
||||
|
||||
static int InitHdrNode(RF_DagHeader_t **, RF_Raid_t *, int);
|
||||
static void UpdateNodeHdrPtr(RF_DagHeader_t *, RF_DagNode_t *);
|
||||
int rf_SelectAlgorithm(RF_RaidAccessDesc_t *, RF_RaidAccessFlags_t );
|
||||
|
||||
|
||||
/******************************************************************************
|
||||
*
|
||||
* Create and Initialiaze a dag header and termination node
|
||||
*
|
||||
*****************************************************************************/
|
||||
static int InitHdrNode(hdr, raidPtr, memChunkEnable)
|
||||
RF_DagHeader_t **hdr;
|
||||
RF_Raid_t *raidPtr;
|
||||
int memChunkEnable;
|
||||
{
|
||||
/* create and initialize dag hdr */
|
||||
*hdr = rf_AllocDAGHeader();
|
||||
rf_MakeAllocList((*hdr)->allocList);
|
||||
if ((*hdr)->allocList == NULL) {
|
||||
rf_FreeDAGHeader(*hdr);
|
||||
return(ENOMEM);
|
||||
}
|
||||
(*hdr)->status = rf_enable;
|
||||
(*hdr)->numSuccedents = 0;
|
||||
(*hdr)->raidPtr = raidPtr;
|
||||
(*hdr)->next = NULL;
|
||||
return(0);
|
||||
}
|
||||
|
||||
/******************************************************************************
|
||||
*
|
||||
* Transfer allocation list and mem chunks from one dag to another
|
||||
*
|
||||
*****************************************************************************/
|
||||
#if defined(__NetBSD__) && defined(_KERNEL)
|
||||
/* the function below is not used... so don't define it! */
|
||||
#else
|
||||
static void TransferDagMemory(daga, dagb)
|
||||
RF_DagHeader_t *daga;
|
||||
RF_DagHeader_t *dagb;
|
||||
{
|
||||
RF_AccessStripeMapHeader_t *end;
|
||||
RF_AllocListElem_t *p;
|
||||
int i, memChunksXfrd = 0, xtraChunksXfrd = 0;
|
||||
|
||||
/* transfer allocList from dagb to daga */
|
||||
for (p = dagb->allocList; p ; p = p->next)
|
||||
{
|
||||
for (i = 0; i < p->numPointers; i++)
|
||||
{
|
||||
rf_AddToAllocList(daga->allocList, p->pointers[i], p->sizes[i]);
|
||||
p->pointers[i] = NULL;
|
||||
p->sizes[i] = 0;
|
||||
}
|
||||
p->numPointers = 0;
|
||||
}
|
||||
|
||||
/* transfer chunks from dagb to daga */
|
||||
while ((memChunksXfrd + xtraChunksXfrd < dagb->chunkIndex + dagb->xtraChunkIndex) && (daga->chunkIndex < RF_MAXCHUNKS))
|
||||
{
|
||||
/* stuff chunks into daga's memChunk array */
|
||||
if (memChunksXfrd < dagb->chunkIndex)
|
||||
{
|
||||
daga->memChunk[daga->chunkIndex++] = dagb->memChunk[memChunksXfrd];
|
||||
dagb->memChunk[memChunksXfrd++] = NULL;
|
||||
}
|
||||
else
|
||||
{
|
||||
daga->memChunk[daga->xtraChunkIndex++] = dagb->xtraMemChunk[xtraChunksXfrd];
|
||||
dagb->xtraMemChunk[xtraChunksXfrd++] = NULL;
|
||||
}
|
||||
}
|
||||
/* use escape hatch to hold excess chunks */
|
||||
while (memChunksXfrd + xtraChunksXfrd < dagb->chunkIndex + dagb->xtraChunkIndex) {
|
||||
if (memChunksXfrd < dagb->chunkIndex)
|
||||
{
|
||||
daga->xtraMemChunk[daga->xtraChunkIndex++] = dagb->memChunk[memChunksXfrd];
|
||||
dagb->memChunk[memChunksXfrd++] = NULL;
|
||||
}
|
||||
else
|
||||
{
|
||||
daga->xtraMemChunk[daga->xtraChunkIndex++] = dagb->xtraMemChunk[xtraChunksXfrd];
|
||||
dagb->xtraMemChunk[xtraChunksXfrd++] = NULL;
|
||||
}
|
||||
}
|
||||
RF_ASSERT((memChunksXfrd == dagb->chunkIndex) && (xtraChunksXfrd == dagb->xtraChunkIndex));
|
||||
RF_ASSERT(daga->chunkIndex <= RF_MAXCHUNKS);
|
||||
RF_ASSERT(daga->xtraChunkIndex <= daga->xtraChunkCnt);
|
||||
dagb->chunkIndex = 0;
|
||||
dagb->xtraChunkIndex = 0;
|
||||
|
||||
/* transfer asmList from dagb to daga */
|
||||
if (dagb->asmList)
|
||||
{
|
||||
if (daga->asmList)
|
||||
{
|
||||
end = daga->asmList;
|
||||
while (end->next)
|
||||
end = end->next;
|
||||
end->next = dagb->asmList;
|
||||
}
|
||||
else
|
||||
daga->asmList = dagb->asmList;
|
||||
dagb->asmList = NULL;
|
||||
}
|
||||
}
|
||||
#endif /* __NetBSD__ */
|
||||
|
||||
/*****************************************************************************************
|
||||
*
|
||||
* Ensure that all node->dagHdr fields in a dag are consistent
|
||||
*
|
||||
* IMPORTANT: This routine recursively searches all succedents of the node. If a
|
||||
* succedent is encountered whose dagHdr ptr does not require adjusting, that node's
|
||||
* succedents WILL NOT BE EXAMINED.
|
||||
*
|
||||
****************************************************************************************/
|
||||
static void UpdateNodeHdrPtr(hdr, node)
|
||||
RF_DagHeader_t *hdr;
|
||||
RF_DagNode_t *node;
|
||||
{
|
||||
int i;
|
||||
RF_ASSERT(hdr != NULL && node != NULL);
|
||||
for (i = 0; i < node->numSuccedents; i++)
|
||||
if (node->succedents[i]->dagHdr != hdr)
|
||||
UpdateNodeHdrPtr(hdr, node->succedents[i]);
|
||||
node->dagHdr = hdr;
|
||||
}
|
||||
|
||||
/******************************************************************************
|
||||
*
|
||||
* Create a DAG to do a read or write operation.
|
||||
*
|
||||
* create an array of dagLists, one list per parity stripe.
|
||||
* return the lists in the array desc->dagArray.
|
||||
*
|
||||
* Normally, each list contains one dag for the entire stripe. In some
|
||||
* tricky cases, we break this into multiple dags, either one per stripe
|
||||
* unit or one per block (sector). When this occurs, these dags are returned
|
||||
* as a linked list (dagList) which is executed sequentially (to preserve
|
||||
* atomic parity updates in the stripe).
|
||||
*
|
||||
* dags which operate on independent parity goups (stripes) are returned in
|
||||
* independent dagLists (distinct elements in desc->dagArray) and may be
|
||||
* executed concurrently.
|
||||
*
|
||||
* Finally, if the SelectionFunc fails to create a dag for a block, we punt
|
||||
* and return 1.
|
||||
*
|
||||
* The above process is performed in two phases:
|
||||
* 1) create an array(s) of creation functions (eg stripeFuncs)
|
||||
* 2) create dags and concatenate/merge to form the final dag.
|
||||
*
|
||||
* Because dag's are basic blocks (single entry, single exit, unconditional
|
||||
* control flow, we can add the following optimizations (future work):
|
||||
* first-pass optimizer to allow max concurrency (need all data dependencies)
|
||||
* second-pass optimizer to eliminate common subexpressions (need true
|
||||
* data dependencies)
|
||||
* third-pass optimizer to eliminate dead code (need true data dependencies)
|
||||
*****************************************************************************/
|
||||
|
||||
#define MAXNSTRIPES 50
|
||||
|
||||
int rf_SelectAlgorithm(desc, flags)
|
||||
RF_RaidAccessDesc_t *desc;
|
||||
RF_RaidAccessFlags_t flags;
|
||||
{
|
||||
RF_AccessStripeMapHeader_t *asm_h = desc->asmap;
|
||||
RF_IoType_t type = desc->type;
|
||||
RF_Raid_t *raidPtr = desc->raidPtr;
|
||||
void *bp = desc->bp;
|
||||
|
||||
RF_AccessStripeMap_t *asmap = asm_h->stripeMap;
|
||||
RF_AccessStripeMap_t *asm_p;
|
||||
RF_DagHeader_t *dag_h = NULL, *tempdag_h, *lastdag_h;
|
||||
int i, j, k;
|
||||
RF_VoidFuncPtr *stripeFuncs, normalStripeFuncs[MAXNSTRIPES];
|
||||
RF_AccessStripeMap_t *asm_up, *asm_bp;
|
||||
RF_AccessStripeMapHeader_t ***asmh_u, *endASMList;
|
||||
RF_AccessStripeMapHeader_t ***asmh_b;
|
||||
RF_VoidFuncPtr **stripeUnitFuncs, uFunc;
|
||||
RF_VoidFuncPtr **blockFuncs, bFunc;
|
||||
int numStripesBailed = 0, cantCreateDAGs = RF_FALSE;
|
||||
int numStripeUnitsBailed = 0;
|
||||
int stripeNum, numUnitDags = 0, stripeUnitNum, numBlockDags = 0;
|
||||
RF_StripeNum_t numStripeUnits;
|
||||
RF_SectorNum_t numBlocks;
|
||||
RF_RaidAddr_t address;
|
||||
int length;
|
||||
RF_PhysDiskAddr_t *physPtr;
|
||||
caddr_t buffer;
|
||||
|
||||
lastdag_h = NULL;
|
||||
asmh_u = asmh_b = NULL;
|
||||
stripeUnitFuncs = NULL;
|
||||
blockFuncs = NULL;
|
||||
|
||||
/* get an array of dag-function creation pointers, try to avoid calling malloc */
|
||||
if (asm_h->numStripes <= MAXNSTRIPES) stripeFuncs = normalStripeFuncs;
|
||||
else RF_Calloc(stripeFuncs, asm_h->numStripes, sizeof(RF_VoidFuncPtr), (RF_VoidFuncPtr *));
|
||||
|
||||
/* walk through the asm list once collecting information */
|
||||
/* attempt to find a single creation function for each stripe */
|
||||
desc->numStripes = 0;
|
||||
for (i=0,asm_p = asmap; asm_p; asm_p=asm_p->next,i++) {
|
||||
desc->numStripes++;
|
||||
(raidPtr->Layout.map->SelectionFunc)(raidPtr, type, asm_p, &stripeFuncs[i]);
|
||||
/* check to see if we found a creation func for this stripe */
|
||||
if (stripeFuncs[i] == (RF_VoidFuncPtr) NULL)
|
||||
{
|
||||
/* could not find creation function for entire stripe
|
||||
so, let's see if we can find one for each stripe unit in the stripe */
|
||||
|
||||
if (numStripesBailed == 0)
|
||||
{
|
||||
/* one stripe map header for each stripe we bail on */
|
||||
RF_Malloc(asmh_u, sizeof(RF_AccessStripeMapHeader_t **) * asm_h->numStripes, (RF_AccessStripeMapHeader_t ***));
|
||||
/* create an array of ptrs to arrays of stripeFuncs */
|
||||
RF_Calloc(stripeUnitFuncs, asm_h->numStripes, sizeof(RF_VoidFuncPtr), (RF_VoidFuncPtr **));
|
||||
}
|
||||
|
||||
/* create an array of creation funcs (called stripeFuncs) for this stripe */
|
||||
numStripeUnits = asm_p->numStripeUnitsAccessed;
|
||||
RF_Calloc(stripeUnitFuncs[numStripesBailed], numStripeUnits, sizeof(RF_VoidFuncPtr), (RF_VoidFuncPtr *));
|
||||
RF_Malloc(asmh_u[numStripesBailed], numStripeUnits * sizeof(RF_AccessStripeMapHeader_t *), (RF_AccessStripeMapHeader_t **));
|
||||
|
||||
/* lookup array of stripeUnitFuncs for this stripe */
|
||||
for (j=0, physPtr = asm_p->physInfo; physPtr; physPtr = physPtr->next, j++)
|
||||
{
|
||||
/* remap for series of single stripe-unit accesses */
|
||||
address = physPtr->raidAddress;
|
||||
length = physPtr->numSector;
|
||||
buffer = physPtr->bufPtr;
|
||||
|
||||
asmh_u[numStripesBailed][j] = rf_MapAccess(raidPtr, address, length, buffer, RF_DONT_REMAP);
|
||||
asm_up = asmh_u[numStripesBailed][j]->stripeMap;
|
||||
|
||||
/* get the creation func for this stripe unit */
|
||||
(raidPtr->Layout.map-> SelectionFunc)(raidPtr, type, asm_up, &(stripeUnitFuncs[numStripesBailed][j]));
|
||||
|
||||
/* check to see if we found a creation func for this stripe unit */
|
||||
if (stripeUnitFuncs[numStripesBailed][j] == (RF_VoidFuncPtr) NULL)
|
||||
{
|
||||
/* could not find creation function for stripe unit so,
|
||||
let's see if we can find one for each block in the stripe unit */
|
||||
if (numStripeUnitsBailed == 0)
|
||||
{
|
||||
/* one stripe map header for each stripe unit we bail on */
|
||||
RF_Malloc(asmh_b, sizeof(RF_AccessStripeMapHeader_t **) * asm_h->numStripes * raidPtr->Layout.numDataCol, (RF_AccessStripeMapHeader_t ***));
|
||||
/* create an array of ptrs to arrays of blockFuncs */
|
||||
RF_Calloc(blockFuncs, asm_h->numStripes * raidPtr->Layout.numDataCol, sizeof(RF_VoidFuncPtr), (RF_VoidFuncPtr **));
|
||||
}
|
||||
|
||||
/* create an array of creation funcs (called blockFuncs) for this stripe unit */
|
||||
numBlocks = physPtr->numSector;
|
||||
numBlockDags += numBlocks;
|
||||
RF_Calloc(blockFuncs[numStripeUnitsBailed], numBlocks, sizeof(RF_VoidFuncPtr), (RF_VoidFuncPtr *));
|
||||
RF_Malloc(asmh_b[numStripeUnitsBailed], numBlocks * sizeof(RF_AccessStripeMapHeader_t *), (RF_AccessStripeMapHeader_t **));
|
||||
|
||||
/* lookup array of blockFuncs for this stripe unit */
|
||||
for (k=0; k < numBlocks; k++)
|
||||
{
|
||||
/* remap for series of single stripe-unit accesses */
|
||||
address = physPtr->raidAddress + k;
|
||||
length = 1;
|
||||
buffer = physPtr->bufPtr + (k * (1<<raidPtr->logBytesPerSector));
|
||||
|
||||
asmh_b[numStripeUnitsBailed][k] = rf_MapAccess(raidPtr, address, length, buffer, RF_DONT_REMAP);
|
||||
asm_bp = asmh_b[numStripeUnitsBailed][k]->stripeMap;
|
||||
|
||||
/* get the creation func for this stripe unit */
|
||||
(raidPtr->Layout.map-> SelectionFunc)(raidPtr, type, asm_bp, &(blockFuncs[numStripeUnitsBailed][k]));
|
||||
|
||||
/* check to see if we found a creation func for this stripe unit */
|
||||
if (blockFuncs[numStripeUnitsBailed][k] == NULL)
|
||||
cantCreateDAGs = RF_TRUE;
|
||||
}
|
||||
numStripeUnitsBailed++;
|
||||
}
|
||||
else
|
||||
{
|
||||
numUnitDags++;
|
||||
}
|
||||
}
|
||||
RF_ASSERT(j == numStripeUnits);
|
||||
numStripesBailed++;
|
||||
}
|
||||
}
|
||||
|
||||
if (cantCreateDAGs)
|
||||
{
|
||||
/* free memory and punt */
|
||||
if (asm_h->numStripes > MAXNSTRIPES)
|
||||
RF_Free(stripeFuncs, asm_h->numStripes * sizeof(RF_VoidFuncPtr));
|
||||
if (numStripesBailed > 0)
|
||||
{
|
||||
stripeNum = 0;
|
||||
for (i = 0, asm_p = asmap; asm_p; asm_p = asm_p->next, i++)
|
||||
if (stripeFuncs[i] == NULL)
|
||||
{
|
||||
numStripeUnits = asm_p->numStripeUnitsAccessed;
|
||||
for (j = 0; j < numStripeUnits; j++)
|
||||
rf_FreeAccessStripeMap(asmh_u[stripeNum][j]);
|
||||
RF_Free(asmh_u[stripeNum], numStripeUnits * sizeof(RF_AccessStripeMapHeader_t *));
|
||||
RF_Free(stripeUnitFuncs[stripeNum], numStripeUnits * sizeof(RF_VoidFuncPtr));
|
||||
stripeNum++;
|
||||
}
|
||||
RF_ASSERT(stripeNum == numStripesBailed);
|
||||
RF_Free(stripeUnitFuncs, asm_h->numStripes * sizeof(RF_VoidFuncPtr));
|
||||
RF_Free(asmh_u, asm_h->numStripes * sizeof(RF_AccessStripeMapHeader_t **));
|
||||
}
|
||||
return(1);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* begin dag creation */
|
||||
stripeNum = 0;
|
||||
stripeUnitNum = 0;
|
||||
|
||||
/* create an array of dagLists and fill them in */
|
||||
RF_CallocAndAdd(desc->dagArray, desc->numStripes, sizeof(RF_DagList_t), (RF_DagList_t *), desc->cleanupList);
|
||||
|
||||
for (i=0, asm_p = asmap; asm_p; asm_p=asm_p->next,i++) {
|
||||
/* grab dag header for this stripe */
|
||||
dag_h = NULL;
|
||||
desc->dagArray[i].desc = desc;
|
||||
|
||||
if (stripeFuncs[i] == (RF_VoidFuncPtr) NULL)
|
||||
{
|
||||
/* use bailout functions for this stripe */
|
||||
for (j = 0, physPtr = asm_p->physInfo; physPtr; physPtr=physPtr->next, j++)
|
||||
{
|
||||
uFunc = stripeUnitFuncs[stripeNum][j];
|
||||
if (uFunc == (RF_VoidFuncPtr) NULL)
|
||||
{
|
||||
/* use bailout functions for this stripe unit */
|
||||
for (k = 0; k < physPtr->numSector; k++)
|
||||
{
|
||||
/* create a dag for this block */
|
||||
InitHdrNode(&tempdag_h, raidPtr, rf_useMemChunks);
|
||||
desc->dagArray[i].numDags++;
|
||||
if (dag_h == NULL) {
|
||||
dag_h = tempdag_h;
|
||||
}
|
||||
else {
|
||||
lastdag_h->next = tempdag_h;
|
||||
}
|
||||
lastdag_h = tempdag_h;
|
||||
|
||||
bFunc = blockFuncs[stripeUnitNum][k];
|
||||
RF_ASSERT(bFunc);
|
||||
asm_bp = asmh_b[stripeUnitNum][k]->stripeMap;
|
||||
(*bFunc)(raidPtr, asm_bp, tempdag_h, bp, flags, tempdag_h->allocList);
|
||||
}
|
||||
stripeUnitNum++;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* create a dag for this unit */
|
||||
InitHdrNode(&tempdag_h, raidPtr, rf_useMemChunks);
|
||||
desc->dagArray[i].numDags++;
|
||||
if (dag_h == NULL) {
|
||||
dag_h = tempdag_h;
|
||||
}
|
||||
else {
|
||||
lastdag_h->next = tempdag_h;
|
||||
}
|
||||
lastdag_h = tempdag_h;
|
||||
|
||||
asm_up = asmh_u[stripeNum][j]->stripeMap;
|
||||
(*uFunc)(raidPtr, asm_up, tempdag_h, bp, flags, tempdag_h->allocList);
|
||||
}
|
||||
}
|
||||
RF_ASSERT(j == asm_p->numStripeUnitsAccessed);
|
||||
/* merge linked bailout dag to existing dag collection */
|
||||
stripeNum++;
|
||||
}
|
||||
else {
|
||||
/* Create a dag for this parity stripe */
|
||||
InitHdrNode(&tempdag_h, raidPtr, rf_useMemChunks);
|
||||
desc->dagArray[i].numDags++;
|
||||
if (dag_h == NULL) {
|
||||
dag_h = tempdag_h;
|
||||
}
|
||||
else {
|
||||
lastdag_h->next = tempdag_h;
|
||||
}
|
||||
lastdag_h = tempdag_h;
|
||||
|
||||
(stripeFuncs[i])(raidPtr, asm_p, tempdag_h, bp, flags, tempdag_h->allocList);
|
||||
}
|
||||
desc->dagArray[i].dags = dag_h;
|
||||
}
|
||||
RF_ASSERT(i == desc->numStripes);
|
||||
|
||||
/* free memory */
|
||||
if (asm_h->numStripes > MAXNSTRIPES)
|
||||
RF_Free(stripeFuncs, asm_h->numStripes * sizeof(RF_VoidFuncPtr));
|
||||
if ((numStripesBailed > 0) || (numStripeUnitsBailed > 0))
|
||||
{
|
||||
stripeNum = 0;
|
||||
stripeUnitNum = 0;
|
||||
if (dag_h->asmList)
|
||||
{
|
||||
endASMList = dag_h->asmList;
|
||||
while (endASMList->next)
|
||||
endASMList = endASMList->next;
|
||||
}
|
||||
else
|
||||
endASMList = NULL;
|
||||
/* walk through io, stripe by stripe */
|
||||
for (i = 0, asm_p = asmap; asm_p; asm_p = asm_p->next, i++)
|
||||
if (stripeFuncs[i] == NULL)
|
||||
{
|
||||
numStripeUnits = asm_p->numStripeUnitsAccessed;
|
||||
/* walk through stripe, stripe unit by stripe unit */
|
||||
for (j = 0, physPtr = asm_p->physInfo; physPtr; physPtr = physPtr->next, j++)
|
||||
{
|
||||
if (stripeUnitFuncs[stripeNum][j] == NULL)
|
||||
{
|
||||
numBlocks = physPtr->numSector;
|
||||
/* walk through stripe unit, block by block */
|
||||
for (k = 0; k < numBlocks; k++)
|
||||
if (dag_h->asmList == NULL)
|
||||
{
|
||||
dag_h->asmList = asmh_b[stripeUnitNum][k];
|
||||
endASMList = dag_h->asmList;
|
||||
}
|
||||
else
|
||||
{
|
||||
endASMList->next = asmh_b[stripeUnitNum][k];
|
||||
endASMList = endASMList->next;
|
||||
}
|
||||
RF_Free(asmh_b[stripeUnitNum], numBlocks * sizeof(RF_AccessStripeMapHeader_t *));
|
||||
RF_Free(blockFuncs[stripeUnitNum], numBlocks * sizeof(RF_VoidFuncPtr));
|
||||
stripeUnitNum++;
|
||||
}
|
||||
if (dag_h->asmList == NULL)
|
||||
{
|
||||
dag_h->asmList = asmh_u[stripeNum][j];
|
||||
endASMList = dag_h->asmList;
|
||||
}
|
||||
else
|
||||
{
|
||||
endASMList->next = asmh_u[stripeNum][j];
|
||||
endASMList = endASMList->next;
|
||||
}
|
||||
}
|
||||
RF_Free(asmh_u[stripeNum], numStripeUnits * sizeof(RF_AccessStripeMapHeader_t *));
|
||||
RF_Free(stripeUnitFuncs[stripeNum], numStripeUnits * sizeof(RF_VoidFuncPtr));
|
||||
stripeNum++;
|
||||
}
|
||||
RF_ASSERT(stripeNum == numStripesBailed);
|
||||
RF_Free(stripeUnitFuncs, asm_h->numStripes * sizeof(RF_VoidFuncPtr));
|
||||
RF_Free(asmh_u, asm_h->numStripes * sizeof(RF_AccessStripeMapHeader_t **));
|
||||
if (numStripeUnitsBailed > 0)
|
||||
{
|
||||
RF_ASSERT(stripeUnitNum == numStripeUnitsBailed);
|
||||
RF_Free(blockFuncs, raidPtr->Layout.numDataCol * asm_h->numStripes * sizeof(RF_VoidFuncPtr));
|
||||
RF_Free(asmh_b, raidPtr->Layout.numDataCol * asm_h->numStripes * sizeof(RF_AccessStripeMapHeader_t **));
|
||||
}
|
||||
}
|
||||
return(0);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,59 @@
|
|||
/* $NetBSD: rf_aselect.h,v 1.1 1998/11/13 04:20:26 oster Exp $ */
|
||||
/*
|
||||
* Copyright (c) 1995 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Author: Mark Holland, William V. Courtright II
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and
|
||||
* its documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
||||
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*/
|
||||
|
||||
/*****************************************************************************
|
||||
*
|
||||
* aselect.h -- header file for algorithm selection code
|
||||
*
|
||||
*****************************************************************************/
|
||||
/* :
|
||||
* Log: rf_aselect.h,v
|
||||
* Revision 1.5 1996/05/24 22:17:04 jimz
|
||||
* continue code + namespace cleanup
|
||||
* typed a bunch of flags
|
||||
*
|
||||
* Revision 1.4 1996/05/23 21:46:35 jimz
|
||||
* checkpoint in code cleanup (release prep)
|
||||
* lots of types, function names have been fixed
|
||||
*
|
||||
* Revision 1.3 1995/11/30 16:28:00 wvcii
|
||||
* added copyright info
|
||||
*
|
||||
* Revision 1.2 1995/11/19 16:20:46 wvcii
|
||||
* changed SelectAlgorithm prototype
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _RF__RF_ASELECT_H_
|
||||
#define _RF__RF_ASELECT_H_
|
||||
|
||||
#include "rf_desc.h"
|
||||
|
||||
int rf_SelectAlgorithm(RF_RaidAccessDesc_t *desc, RF_RaidAccessFlags_t flags);
|
||||
|
||||
#endif /* !_RF__RF_ASELECT_H_ */
|
|
@ -0,0 +1,120 @@
|
|||
/* $NetBSD: rf_callback.c,v 1.1 1998/11/13 04:20:26 oster Exp $ */
|
||||
/*
|
||||
* Copyright (c) 1995 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Author: Jim Zelenka
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and
|
||||
* its documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
||||
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*/
|
||||
|
||||
/*****************************************************************************************
|
||||
*
|
||||
* callback.c -- code to manipulate callback descriptor
|
||||
*
|
||||
****************************************************************************************/
|
||||
|
||||
/* :
|
||||
* Log: rf_callback.c,v
|
||||
* Revision 1.11 1996/06/17 03:18:04 jimz
|
||||
* include shutdown.h for macroized ShutdownCreate
|
||||
*
|
||||
* Revision 1.10 1996/06/10 11:55:47 jimz
|
||||
* Straightened out some per-array/not-per-array distinctions, fixed
|
||||
* a couple bugs related to confusion. Added shutdown lists. Removed
|
||||
* layout shutdown function (now subsumed by shutdown lists).
|
||||
*
|
||||
* Revision 1.9 1996/05/23 21:46:35 jimz
|
||||
* checkpoint in code cleanup (release prep)
|
||||
* lots of types, function names have been fixed
|
||||
*
|
||||
* Revision 1.8 1996/05/18 19:51:34 jimz
|
||||
* major code cleanup- fix syntax, make some types consistent,
|
||||
* add prototypes, clean out dead code, et cetera
|
||||
*
|
||||
* Revision 1.7 1996/05/17 16:30:41 jimz
|
||||
* convert to RF_FREELIST stuff
|
||||
*
|
||||
* Revision 1.6 1995/12/01 15:16:04 root
|
||||
* added copyright info
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _KERNEL
|
||||
#ifdef __NetBSD__
|
||||
#include <unistd.h>
|
||||
#endif /* __NetBSD__ */
|
||||
#endif
|
||||
|
||||
#include "rf_types.h"
|
||||
#include "rf_threadstuff.h"
|
||||
#include "rf_callback.h"
|
||||
#include "rf_debugMem.h"
|
||||
#include "rf_freelist.h"
|
||||
#include "rf_shutdown.h"
|
||||
|
||||
static RF_FreeList_t *rf_callback_freelist;
|
||||
|
||||
#define RF_MAX_FREE_CALLBACK 64
|
||||
#define RF_CALLBACK_INC 4
|
||||
#define RF_CALLBACK_INITIAL 4
|
||||
|
||||
static void rf_ShutdownCallback(void *);
|
||||
static void rf_ShutdownCallback(ignored)
|
||||
void *ignored;
|
||||
{
|
||||
RF_FREELIST_DESTROY(rf_callback_freelist,next,(RF_CallbackDesc_t *));
|
||||
}
|
||||
|
||||
int rf_ConfigureCallback(listp)
|
||||
RF_ShutdownList_t **listp;
|
||||
{
|
||||
int rc;
|
||||
|
||||
RF_FREELIST_CREATE(rf_callback_freelist, RF_MAX_FREE_CALLBACK,
|
||||
RF_CALLBACK_INC, sizeof(RF_CallbackDesc_t));
|
||||
if (rf_callback_freelist == NULL)
|
||||
return(ENOMEM);
|
||||
rc = rf_ShutdownCreate(listp, rf_ShutdownCallback, NULL);
|
||||
if (rc) {
|
||||
RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", __FILE__,
|
||||
__LINE__, rc);
|
||||
rf_ShutdownCallback(NULL);
|
||||
return(rc);
|
||||
}
|
||||
RF_FREELIST_PRIME(rf_callback_freelist, RF_CALLBACK_INITIAL,next,
|
||||
(RF_CallbackDesc_t *));
|
||||
return(0);
|
||||
}
|
||||
|
||||
RF_CallbackDesc_t *rf_AllocCallbackDesc()
|
||||
{
|
||||
RF_CallbackDesc_t *p;
|
||||
|
||||
RF_FREELIST_GET(rf_callback_freelist,p,next,(RF_CallbackDesc_t *));
|
||||
return(p);
|
||||
}
|
||||
|
||||
void rf_FreeCallbackDesc(p)
|
||||
RF_CallbackDesc_t *p;
|
||||
{
|
||||
RF_FREELIST_FREE(rf_callback_freelist,p,next);
|
||||
}
|
|
@ -0,0 +1,91 @@
|
|||
/* $NetBSD: rf_callback.h,v 1.1 1998/11/13 04:20:26 oster Exp $ */
|
||||
/*
|
||||
* Copyright (c) 1995 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Author: Mark Holland
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and
|
||||
* its documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
||||
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*/
|
||||
|
||||
/*****************************************************************************************
|
||||
*
|
||||
* callback.h -- header file for callback.c
|
||||
*
|
||||
* the reconstruction code must manage concurrent I/Os on multiple drives.
|
||||
* it sometimes needs to suspend operation on a particular drive until some
|
||||
* condition occurs. we can't block the thread, of course, or we wouldn't
|
||||
* be able to manage our other outstanding I/Os. Instead we just suspend
|
||||
* new activity on the indicated disk, and create a callback descriptor and
|
||||
* put it someplace where it will get invoked when the condition that's
|
||||
* stalling us has cleared. When the descriptor is invoked, it will call
|
||||
* a function that will restart operation on the indicated disk.
|
||||
*
|
||||
****************************************************************************************/
|
||||
|
||||
/* :
|
||||
* Log: rf_callback.h,v
|
||||
* Revision 1.8 1996/08/01 15:57:28 jimz
|
||||
* minor cleanup
|
||||
*
|
||||
* Revision 1.7 1996/07/27 23:36:08 jimz
|
||||
* Solaris port of simulator
|
||||
*
|
||||
* Revision 1.6 1996/06/10 11:55:47 jimz
|
||||
* Straightened out some per-array/not-per-array distinctions, fixed
|
||||
* a couple bugs related to confusion. Added shutdown lists. Removed
|
||||
* layout shutdown function (now subsumed by shutdown lists).
|
||||
*
|
||||
* Revision 1.5 1996/05/23 21:46:35 jimz
|
||||
* checkpoint in code cleanup (release prep)
|
||||
* lots of types, function names have been fixed
|
||||
*
|
||||
* Revision 1.4 1996/05/18 19:51:34 jimz
|
||||
* major code cleanup- fix syntax, make some types consistent,
|
||||
* add prototypes, clean out dead code, et cetera
|
||||
*
|
||||
* Revision 1.3 1996/05/17 16:30:46 jimz
|
||||
* add prototypes
|
||||
*
|
||||
* Revision 1.2 1995/12/01 15:15:55 root
|
||||
* added copyright info
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _RF__RF_CALLBACK_H_
|
||||
#define _RF__RF_CALLBACK_H_
|
||||
|
||||
#include "rf_types.h"
|
||||
|
||||
struct RF_CallbackDesc_s {
|
||||
void (*callbackFunc)(RF_CBParam_t); /* function to call */
|
||||
RF_CBParam_t callbackArg; /* args to give to function, or just info about this callback */
|
||||
RF_CBParam_t callbackArg2;
|
||||
RF_RowCol_t row; /* disk row and column IDs to give to the callback func */
|
||||
RF_RowCol_t col;
|
||||
RF_CallbackDesc_t *next; /* next entry in list */
|
||||
};
|
||||
|
||||
int rf_ConfigureCallback(RF_ShutdownList_t **listp);
|
||||
RF_CallbackDesc_t *rf_AllocCallbackDesc(void);
|
||||
void rf_FreeCallbackDesc(RF_CallbackDesc_t *p);
|
||||
|
||||
#endif /* !_RF__RF_CALLBACK_H_ */
|
|
@ -0,0 +1,114 @@
|
|||
/* $NetBSD: rf_ccmn.h,v 1.1 1998/11/13 04:20:26 oster Exp $ */
|
||||
/*
|
||||
* Copyright (c) 1995 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Author: Mark Holland
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and
|
||||
* its documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
||||
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*/
|
||||
|
||||
/* rf_ccmn.h
|
||||
* header file that declares the ccmn routines, and includes
|
||||
* the files needed to use them.
|
||||
*/
|
||||
|
||||
/* :
|
||||
* Log: rf_ccmn.h,v
|
||||
* Revision 1.4 1996/07/18 22:57:14 jimz
|
||||
* port simulator to AIX
|
||||
*
|
||||
* Revision 1.3 1996/05/23 21:46:35 jimz
|
||||
* checkpoint in code cleanup (release prep)
|
||||
* lots of types, function names have been fixed
|
||||
*
|
||||
* Revision 1.2 1995/12/01 15:16:45 root
|
||||
* added copyright info
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _RF__RF_CCMN_H_
|
||||
#define _RF__RF_CCMN_H_
|
||||
|
||||
#ifdef __osf__
|
||||
#include <sys/errno.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/file.h>
|
||||
#include <sys/param.h>
|
||||
#include <sys/uio.h>
|
||||
#include <sys/time.h>
|
||||
#include <sys/buf.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <io/common/iotypes.h>
|
||||
#include <io/cam/cam_debug.h>
|
||||
#include <io/cam/cam.h>
|
||||
#include <io/cam/dec_cam.h>
|
||||
#include <io/cam/uagt.h>
|
||||
#include <io/cam/scsi_all.h>
|
||||
#include <io/cam/scsi_direct.h>
|
||||
|
||||
#ifdef KERNEL
|
||||
#include <sys/conf.h>
|
||||
#include <sys/mtio.h>
|
||||
#include <io/common/devio.h>
|
||||
#include <io/common/devdriver.h>
|
||||
#include <io/cam/scsi_status.h>
|
||||
#include <io/cam/pdrv.h>
|
||||
#include <io/common/pt.h>
|
||||
#include <sys/disklabel.h>
|
||||
#include <io/cam/cam_disk.h>
|
||||
#include <io/cam/ccfg.h>
|
||||
|
||||
extern void ccmn_init();
|
||||
extern long ccmn_open_unit();
|
||||
extern void ccmn_close_unit();
|
||||
extern u_long ccmn_send_ccb();
|
||||
extern void ccmn_rem_ccb();
|
||||
extern void ccmn_abort_que();
|
||||
extern void ccmn_term_que();
|
||||
extern CCB_HEADER *ccmn_get_ccb();
|
||||
extern void ccmn_rel_ccb();
|
||||
extern CCB_SCSIIO *ccmn_io_ccb_bld();
|
||||
extern CCB_GETDEV *ccmn_gdev_ccb_bld();
|
||||
extern CCB_SETDEV *ccmn_sdev_ccb_bld();
|
||||
extern CCB_SETASYNC *ccmn_sasy_ccb_bld();
|
||||
extern CCB_RELSIM *ccmn_rsq_ccb_bld();
|
||||
extern CCB_PATHINQ *ccmn_pinq_ccb_bld();
|
||||
extern CCB_ABORT *ccmn_abort_ccb_bld();
|
||||
extern CCB_TERMIO *ccmn_term_ccb_bld();
|
||||
extern CCB_RESETDEV *ccmn_bdr_ccb_bld();
|
||||
extern CCB_RESETBUS *ccmn_br_ccb_bld();
|
||||
extern CCB_SCSIIO *ccmn_tur();
|
||||
extern CCB_SCSIIO *ccmn_mode_select();
|
||||
extern u_long ccmn_ccb_status();
|
||||
extern struct buf *ccmn_get_bp();
|
||||
extern void ccmn_rel_bp();
|
||||
extern u_char *ccmn_get_dbuf();
|
||||
extern void ccmn_rel_dbuf();
|
||||
|
||||
extern struct device *camdinfo[];
|
||||
extern struct controller *camminfo[];
|
||||
extern PDRV_UNIT_ELEM pdrv_unit_table[];
|
||||
|
||||
#endif /* KERNEL */
|
||||
#endif /* __osf__ */
|
||||
|
||||
#endif /* !_RF__RF_CCMN_H_ */
|
|
@ -0,0 +1,381 @@
|
|||
/* $NetBSD: rf_chaindecluster.c,v 1.1 1998/11/13 04:20:26 oster Exp $ */
|
||||
/*
|
||||
* Copyright (c) 1995 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Author: Khalil Amiri
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and
|
||||
* its documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
||||
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*/
|
||||
|
||||
/******************************************************************************
|
||||
*
|
||||
* rf_chaindecluster.c -- implements chained declustering
|
||||
*
|
||||
*****************************************************************************/
|
||||
|
||||
/* :
|
||||
* Log: rf_chaindecluster.c,v
|
||||
* Revision 1.33 1996/08/02 13:20:34 jimz
|
||||
* get rid of bogus (long) casts
|
||||
*
|
||||
* Revision 1.32 1996/07/31 16:56:18 jimz
|
||||
* dataBytesPerStripe, sectorsPerDisk init arch-indep.
|
||||
*
|
||||
* Revision 1.31 1996/07/29 14:05:12 jimz
|
||||
* fix numPUs/numRUs confusion (everything is now numRUs)
|
||||
* clean up some commenting, return values
|
||||
*
|
||||
* Revision 1.30 1996/07/22 19:52:16 jimz
|
||||
* switched node params to RF_DagParam_t, a union of
|
||||
* a 64-bit int and a void *, for better portability
|
||||
* attempted hpux port, but failed partway through for
|
||||
* lack of a single C compiler capable of compiling all
|
||||
* source files
|
||||
*
|
||||
* Revision 1.29 1996/07/18 22:57:14 jimz
|
||||
* port simulator to AIX
|
||||
*
|
||||
* Revision 1.28 1996/06/19 17:53:48 jimz
|
||||
* move GetNumSparePUs, InstallSpareTable ops into layout switch
|
||||
*
|
||||
* Revision 1.27 1996/06/11 15:19:57 wvcii
|
||||
* added include of rf_chaindecluster.h
|
||||
* fixed parameter list of rf_ConfigureChainDecluster
|
||||
*
|
||||
* Revision 1.26 1996/06/11 08:55:15 jimz
|
||||
* improved error-checking at configuration time
|
||||
*
|
||||
* Revision 1.25 1996/06/10 11:55:47 jimz
|
||||
* Straightened out some per-array/not-per-array distinctions, fixed
|
||||
* a couple bugs related to confusion. Added shutdown lists. Removed
|
||||
* layout shutdown function (now subsumed by shutdown lists).
|
||||
*
|
||||
* Revision 1.24 1996/06/07 22:26:27 jimz
|
||||
* type-ify which_ru (RF_ReconUnitNum_t)
|
||||
*
|
||||
* Revision 1.23 1996/06/07 21:33:04 jimz
|
||||
* begin using consistent types for sector numbers,
|
||||
* stripe numbers, row+col numbers, recon unit numbers
|
||||
*
|
||||
* Revision 1.22 1996/06/06 17:31:30 jimz
|
||||
* use CreateMirrorPartitionReadDAG for mirrored reads
|
||||
*
|
||||
* Revision 1.21 1996/06/03 23:28:26 jimz
|
||||
* more bugfixes
|
||||
* check in tree to sync for IPDS runs with current bugfixes
|
||||
* there still may be a problem with threads in the script test
|
||||
* getting I/Os stuck- not trivially reproducible (runs ~50 times
|
||||
* in a row without getting stuck)
|
||||
*
|
||||
* Revision 1.20 1996/06/02 17:31:48 jimz
|
||||
* Moved a lot of global stuff into array structure, where it belongs.
|
||||
* Fixed up paritylogging, pss modules in this manner. Some general
|
||||
* code cleanup. Removed lots of dead code, some dead files.
|
||||
*
|
||||
* Revision 1.19 1996/05/31 22:26:54 jimz
|
||||
* fix a lot of mapping problems, memory allocation problems
|
||||
* found some weird lock issues, fixed 'em
|
||||
* more code cleanup
|
||||
*
|
||||
* Revision 1.18 1996/05/31 16:13:28 amiri
|
||||
* removed/added some commnets.
|
||||
*
|
||||
* Revision 1.17 1996/05/31 05:01:52 amiri
|
||||
* fixed a bug related to sparing layout.
|
||||
*
|
||||
* Revision 1.16 1996/05/30 23:22:16 jimz
|
||||
* bugfixes of serialization, timing problems
|
||||
* more cleanup
|
||||
*
|
||||
* Revision 1.15 1996/05/27 18:56:37 jimz
|
||||
* more code cleanup
|
||||
* better typing
|
||||
* compiles in all 3 environments
|
||||
*
|
||||
* Revision 1.14 1996/05/24 22:17:04 jimz
|
||||
* continue code + namespace cleanup
|
||||
* typed a bunch of flags
|
||||
*
|
||||
* Revision 1.13 1996/05/23 21:46:35 jimz
|
||||
* checkpoint in code cleanup (release prep)
|
||||
* lots of types, function names have been fixed
|
||||
*
|
||||
* Revision 1.12 1996/05/23 00:33:23 jimz
|
||||
* code cleanup: move all debug decls to rf_options.c, all extern
|
||||
* debug decls to rf_options.h, all debug vars preceded by rf_
|
||||
*
|
||||
* Revision 1.11 1996/05/18 19:51:34 jimz
|
||||
* major code cleanup- fix syntax, make some types consistent,
|
||||
* add prototypes, clean out dead code, et cetera
|
||||
*
|
||||
* Revision 1.10 1996/05/03 19:53:56 wvcii
|
||||
* removed include of rf_redstripe.h
|
||||
* moved dag creation routines to new dag library
|
||||
*
|
||||
*/
|
||||
|
||||
#include "rf_archs.h"
|
||||
#include "rf_types.h"
|
||||
#include "rf_raid.h"
|
||||
#include "rf_chaindecluster.h"
|
||||
#include "rf_dag.h"
|
||||
#include "rf_dagutils.h"
|
||||
#include "rf_dagffrd.h"
|
||||
#include "rf_dagffwr.h"
|
||||
#include "rf_dagdegrd.h"
|
||||
#include "rf_dagfuncs.h"
|
||||
#include "rf_threadid.h"
|
||||
#include "rf_general.h"
|
||||
#include "rf_utils.h"
|
||||
|
||||
typedef struct RF_ChaindeclusterConfigInfo_s {
|
||||
RF_RowCol_t **stripeIdentifier; /* filled in at config time
|
||||
* and used by IdentifyStripe */
|
||||
RF_StripeCount_t numSparingRegions;
|
||||
RF_StripeCount_t stripeUnitsPerSparingRegion;
|
||||
RF_SectorNum_t mirrorStripeOffset;
|
||||
} RF_ChaindeclusterConfigInfo_t;
|
||||
|
||||
int rf_ConfigureChainDecluster(
|
||||
RF_ShutdownList_t **listp,
|
||||
RF_Raid_t *raidPtr,
|
||||
RF_Config_t *cfgPtr)
|
||||
{
|
||||
RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
|
||||
RF_StripeCount_t num_used_stripeUnitsPerDisk;
|
||||
RF_ChaindeclusterConfigInfo_t *info;
|
||||
RF_RowCol_t i;
|
||||
|
||||
/* create a Chained Declustering configuration structure */
|
||||
RF_MallocAndAdd(info, sizeof(RF_ChaindeclusterConfigInfo_t), (RF_ChaindeclusterConfigInfo_t *), raidPtr->cleanupList);
|
||||
if (info == NULL)
|
||||
return(ENOMEM);
|
||||
layoutPtr->layoutSpecificInfo = (void *) info;
|
||||
|
||||
/* fill in the config structure. */
|
||||
info->stripeIdentifier = rf_make_2d_array(raidPtr->numCol, 2 , raidPtr->cleanupList);
|
||||
if (info->stripeIdentifier == NULL)
|
||||
return(ENOMEM);
|
||||
for (i=0; i< raidPtr->numCol; i++) {
|
||||
info->stripeIdentifier[i][0] = i % raidPtr->numCol;
|
||||
info->stripeIdentifier[i][1] = (i+1) % raidPtr->numCol;
|
||||
}
|
||||
|
||||
RF_ASSERT(raidPtr->numRow == 1);
|
||||
|
||||
/* fill in the remaining layout parameters */
|
||||
num_used_stripeUnitsPerDisk = layoutPtr->stripeUnitsPerDisk - (layoutPtr->stripeUnitsPerDisk %
|
||||
(2*raidPtr->numCol-2) );
|
||||
info->numSparingRegions = num_used_stripeUnitsPerDisk / (2*raidPtr->numCol-2);
|
||||
info->stripeUnitsPerSparingRegion = raidPtr->numCol * (raidPtr->numCol - 1);
|
||||
info->mirrorStripeOffset = info->numSparingRegions * (raidPtr->numCol-1);
|
||||
layoutPtr->numStripe = info->numSparingRegions * info->stripeUnitsPerSparingRegion;
|
||||
layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << raidPtr->logBytesPerSector;
|
||||
layoutPtr->numDataCol = 1;
|
||||
layoutPtr->dataSectorsPerStripe = layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit;
|
||||
layoutPtr->numParityCol = 1;
|
||||
|
||||
layoutPtr->dataStripeUnitsPerDisk = num_used_stripeUnitsPerDisk;
|
||||
|
||||
raidPtr->sectorsPerDisk =
|
||||
num_used_stripeUnitsPerDisk * layoutPtr->sectorsPerStripeUnit;
|
||||
|
||||
raidPtr->totalSectors =
|
||||
(layoutPtr->numStripe) * layoutPtr->sectorsPerStripeUnit;
|
||||
|
||||
layoutPtr->stripeUnitsPerDisk = raidPtr->sectorsPerDisk / layoutPtr->sectorsPerStripeUnit;
|
||||
|
||||
return(0);
|
||||
}
|
||||
|
||||
RF_ReconUnitCount_t rf_GetNumSpareRUsChainDecluster(raidPtr)
|
||||
RF_Raid_t *raidPtr;
|
||||
{
|
||||
RF_ChaindeclusterConfigInfo_t *info = (RF_ChaindeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
|
||||
|
||||
/*
|
||||
* The layout uses two stripe units per disk as spare within each
|
||||
* sparing region.
|
||||
*/
|
||||
return (2*info->numSparingRegions);
|
||||
}
|
||||
|
||||
|
||||
/* Maps to the primary copy of the data, i.e. the first mirror pair */
|
||||
void rf_MapSectorChainDecluster(
|
||||
RF_Raid_t *raidPtr,
|
||||
RF_RaidAddr_t raidSector,
|
||||
RF_RowCol_t *row,
|
||||
RF_RowCol_t *col,
|
||||
RF_SectorNum_t *diskSector,
|
||||
int remap)
|
||||
{
|
||||
RF_ChaindeclusterConfigInfo_t *info = (RF_ChaindeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
|
||||
RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit;
|
||||
RF_SectorNum_t index_within_region, index_within_disk;
|
||||
RF_StripeNum_t sparing_region_id;
|
||||
int col_before_remap;
|
||||
|
||||
*row = 0;
|
||||
sparing_region_id = SUID / info->stripeUnitsPerSparingRegion;
|
||||
index_within_region = SUID % info->stripeUnitsPerSparingRegion;
|
||||
index_within_disk = index_within_region / raidPtr->numCol;
|
||||
col_before_remap = SUID % raidPtr->numCol;
|
||||
|
||||
if (!remap) {
|
||||
*col = col_before_remap;
|
||||
*diskSector = ( index_within_disk + ( (raidPtr->numCol-1) * sparing_region_id) ) *
|
||||
raidPtr->Layout.sectorsPerStripeUnit;
|
||||
*diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit);
|
||||
}
|
||||
else {
|
||||
/* remap sector to spare space...*/
|
||||
*diskSector = sparing_region_id * (raidPtr->numCol+1) * raidPtr->Layout.sectorsPerStripeUnit;
|
||||
*diskSector += (raidPtr->numCol-1) * raidPtr->Layout.sectorsPerStripeUnit;
|
||||
*diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit);
|
||||
index_within_disk = index_within_region / raidPtr->numCol;
|
||||
if (index_within_disk < col_before_remap )
|
||||
*col = index_within_disk;
|
||||
else if (index_within_disk == raidPtr->numCol-2 ) {
|
||||
*col = (col_before_remap+raidPtr->numCol-1) % raidPtr->numCol;
|
||||
*diskSector += raidPtr->Layout.sectorsPerStripeUnit;
|
||||
}
|
||||
else
|
||||
*col = (index_within_disk + 2) % raidPtr->numCol;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* Maps to the second copy of the mirror pair, which is chain declustered. The second copy is contained
|
||||
in the next disk (mod numCol) after the disk containing the primary copy.
|
||||
The offset into the disk is one-half disk down */
|
||||
void rf_MapParityChainDecluster(
|
||||
RF_Raid_t *raidPtr,
|
||||
RF_RaidAddr_t raidSector,
|
||||
RF_RowCol_t *row,
|
||||
RF_RowCol_t *col,
|
||||
RF_SectorNum_t *diskSector,
|
||||
int remap)
|
||||
{
|
||||
RF_ChaindeclusterConfigInfo_t *info = (RF_ChaindeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
|
||||
RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit;
|
||||
RF_SectorNum_t index_within_region, index_within_disk;
|
||||
RF_StripeNum_t sparing_region_id;
|
||||
int col_before_remap;
|
||||
|
||||
*row = 0;
|
||||
if (!remap) {
|
||||
*col = SUID % raidPtr->numCol;
|
||||
*col = (*col + 1) % raidPtr->numCol;
|
||||
*diskSector = info->mirrorStripeOffset * raidPtr->Layout.sectorsPerStripeUnit;
|
||||
*diskSector += ( SUID / raidPtr->numCol ) * raidPtr->Layout.sectorsPerStripeUnit;
|
||||
*diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit);
|
||||
}
|
||||
else {
|
||||
/* remap parity to spare space ... */
|
||||
sparing_region_id = SUID / info->stripeUnitsPerSparingRegion;
|
||||
index_within_region = SUID % info->stripeUnitsPerSparingRegion;
|
||||
index_within_disk = index_within_region / raidPtr->numCol;
|
||||
*diskSector = sparing_region_id * (raidPtr->numCol+1) * raidPtr->Layout.sectorsPerStripeUnit;
|
||||
*diskSector += (raidPtr->numCol) * raidPtr->Layout.sectorsPerStripeUnit;
|
||||
*diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit);
|
||||
col_before_remap = SUID % raidPtr->numCol;
|
||||
if (index_within_disk < col_before_remap)
|
||||
*col = index_within_disk;
|
||||
else if (index_within_disk == raidPtr->numCol-2 ) {
|
||||
*col = (col_before_remap+2) % raidPtr->numCol;
|
||||
*diskSector -= raidPtr->Layout.sectorsPerStripeUnit;
|
||||
}
|
||||
else
|
||||
*col = (index_within_disk + 2) % raidPtr->numCol;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void rf_IdentifyStripeChainDecluster(
|
||||
RF_Raid_t *raidPtr,
|
||||
RF_RaidAddr_t addr,
|
||||
RF_RowCol_t **diskids,
|
||||
RF_RowCol_t *outRow)
|
||||
{
|
||||
RF_ChaindeclusterConfigInfo_t *info = (RF_ChaindeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
|
||||
RF_StripeNum_t SUID;
|
||||
RF_RowCol_t col;
|
||||
|
||||
SUID = addr / raidPtr->Layout.sectorsPerStripeUnit;
|
||||
col = SUID % raidPtr->numCol;
|
||||
*outRow = 0;
|
||||
*diskids = info->stripeIdentifier[ col ];
|
||||
}
|
||||
|
||||
void rf_MapSIDToPSIDChainDecluster(
|
||||
RF_RaidLayout_t *layoutPtr,
|
||||
RF_StripeNum_t stripeID,
|
||||
RF_StripeNum_t *psID,
|
||||
RF_ReconUnitNum_t *which_ru)
|
||||
{
|
||||
*which_ru = 0;
|
||||
*psID = stripeID;
|
||||
}
|
||||
|
||||
/******************************************************************************
|
||||
* select a graph to perform a single-stripe access
|
||||
*
|
||||
* Parameters: raidPtr - description of the physical array
|
||||
* type - type of operation (read or write) requested
|
||||
* asmap - logical & physical addresses for this access
|
||||
* createFunc - function to use to create the graph (return value)
|
||||
*****************************************************************************/
|
||||
|
||||
void rf_RAIDCDagSelect(
|
||||
RF_Raid_t *raidPtr,
|
||||
RF_IoType_t type,
|
||||
RF_AccessStripeMap_t *asmap,
|
||||
RF_VoidFuncPtr *createFunc)
|
||||
#if 0
|
||||
void (**createFunc)(RF_Raid_t *, RF_AccessStripeMap_t *,
|
||||
RF_DagHeader_t *, void *, RF_RaidAccessFlags_t,
|
||||
RF_AllocListElem_t *))
|
||||
#endif
|
||||
{
|
||||
RF_ASSERT(RF_IO_IS_R_OR_W(type));
|
||||
RF_ASSERT(raidPtr->numRow == 1);
|
||||
|
||||
if (asmap->numDataFailed + asmap->numParityFailed > 1) {
|
||||
RF_ERRORMSG("Multiple disks failed in a single group! Aborting I/O operation.\n");
|
||||
*createFunc = NULL;
|
||||
return;
|
||||
}
|
||||
|
||||
*createFunc = (type == RF_IO_TYPE_READ) ? (RF_VoidFuncPtr)rf_CreateFaultFreeReadDAG :(RF_VoidFuncPtr) rf_CreateRaidOneWriteDAG;
|
||||
|
||||
if (type == RF_IO_TYPE_READ) {
|
||||
if ( ( raidPtr->status[0] == rf_rs_degraded ) || ( raidPtr->status[0] == rf_rs_reconstructing) )
|
||||
*createFunc = (RF_VoidFuncPtr)rf_CreateRaidCDegradedReadDAG; /* array status is degraded, implement workload shifting */
|
||||
else
|
||||
*createFunc = (RF_VoidFuncPtr)rf_CreateMirrorPartitionReadDAG; /* array status not degraded, so use mirror partition dag */
|
||||
}
|
||||
else
|
||||
*createFunc = (RF_VoidFuncPtr)rf_CreateRaidOneWriteDAG;
|
||||
}
|
|
@ -0,0 +1,122 @@
|
|||
/* $NetBSD: rf_chaindecluster.h,v 1.1 1998/11/13 04:20:26 oster Exp $ */
|
||||
/*
|
||||
* Copyright (c) 1995 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Author: Khalil Amiri
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and
|
||||
* its documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
||||
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*/
|
||||
|
||||
/* rf_chaindecluster.h
|
||||
* header file for Chained Declustering
|
||||
*/
|
||||
|
||||
/*
|
||||
* :
|
||||
* Log: rf_chaindecluster.h,v
|
||||
* Revision 1.14 1996/07/29 14:05:12 jimz
|
||||
* fix numPUs/numRUs confusion (everything is now numRUs)
|
||||
* clean up some commenting, return values
|
||||
*
|
||||
* Revision 1.13 1996/07/22 19:52:16 jimz
|
||||
* switched node params to RF_DagParam_t, a union of
|
||||
* a 64-bit int and a void *, for better portability
|
||||
* attempted hpux port, but failed partway through for
|
||||
* lack of a single C compiler capable of compiling all
|
||||
* source files
|
||||
*
|
||||
* Revision 1.12 1996/06/10 11:55:47 jimz
|
||||
* Straightened out some per-array/not-per-array distinctions, fixed
|
||||
* a couple bugs related to confusion. Added shutdown lists. Removed
|
||||
* layout shutdown function (now subsumed by shutdown lists).
|
||||
*
|
||||
* Revision 1.11 1996/06/07 22:26:27 jimz
|
||||
* type-ify which_ru (RF_ReconUnitNum_t)
|
||||
*
|
||||
* Revision 1.10 1996/06/07 21:33:04 jimz
|
||||
* begin using consistent types for sector numbers,
|
||||
* stripe numbers, row+col numbers, recon unit numbers
|
||||
*
|
||||
* Revision 1.9 1996/06/03 23:28:26 jimz
|
||||
* more bugfixes
|
||||
* check in tree to sync for IPDS runs with current bugfixes
|
||||
* there still may be a problem with threads in the script test
|
||||
* getting I/Os stuck- not trivially reproducible (runs ~50 times
|
||||
* in a row without getting stuck)
|
||||
*
|
||||
* Revision 1.8 1996/05/31 22:26:54 jimz
|
||||
* fix a lot of mapping problems, memory allocation problems
|
||||
* found some weird lock issues, fixed 'em
|
||||
* more code cleanup
|
||||
*
|
||||
* Revision 1.7 1996/05/27 18:56:37 jimz
|
||||
* more code cleanup
|
||||
* better typing
|
||||
* compiles in all 3 environments
|
||||
*
|
||||
* Revision 1.6 1996/05/23 21:46:35 jimz
|
||||
* checkpoint in code cleanup (release prep)
|
||||
* lots of types, function names have been fixed
|
||||
*
|
||||
* Revision 1.5 1996/05/18 19:51:34 jimz
|
||||
* major code cleanup- fix syntax, make some types consistent,
|
||||
* add prototypes, clean out dead code, et cetera
|
||||
*
|
||||
* Revision 1.4 1996/02/22 16:45:59 amiri
|
||||
* added declaration of dag selection function
|
||||
*
|
||||
* Revision 1.3 1995/12/01 15:16:56 root
|
||||
* added copyright info
|
||||
*
|
||||
* Revision 1.2 1995/11/17 19:55:21 amiri
|
||||
* prototyped MapParityChainDecluster
|
||||
*/
|
||||
|
||||
#ifndef _RF__RF_CHAINDECLUSTER_H_
|
||||
#define _RF__RF_CHAINDECLUSTER_H_
|
||||
|
||||
int rf_ConfigureChainDecluster(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
|
||||
RF_Config_t *cfgPtr);
|
||||
RF_ReconUnitCount_t rf_GetNumSpareRUsChainDecluster(RF_Raid_t *raidPtr);
|
||||
void rf_MapSectorChainDecluster(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector,
|
||||
RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap);
|
||||
void rf_MapParityChainDecluster(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector,
|
||||
RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap);
|
||||
void rf_IdentifyStripeChainDecluster(RF_Raid_t *raidPtr, RF_RaidAddr_t addr,
|
||||
RF_RowCol_t **diskids, RF_RowCol_t *outRow);
|
||||
void rf_MapSIDToPSIDChainDecluster(RF_RaidLayout_t *layoutPtr,
|
||||
RF_StripeNum_t stripeID, RF_StripeNum_t *psID,
|
||||
RF_ReconUnitNum_t *which_ru);
|
||||
void rf_RAIDCDagSelect(RF_Raid_t *raidPtr, RF_IoType_t type,
|
||||
RF_AccessStripeMap_t *asmap,
|
||||
RF_VoidFuncPtr *);
|
||||
#if 0
|
||||
void (**createFunc)(RF_Raid_t *,
|
||||
RF_AccessStripeMap_t *,
|
||||
RF_DagHeader_t *,
|
||||
void *,
|
||||
RF_RaidAccessFlags_t,
|
||||
RF_AllocListElem_t *)
|
||||
);
|
||||
#endif
|
||||
|
||||
#endif /* !_RF__RF_CHAINDECLUSTER_H_ */
|
|
@ -0,0 +1,126 @@
|
|||
/* $NetBSD: rf_configure.h,v 1.1 1998/11/13 04:20:26 oster Exp $ */
|
||||
/*
|
||||
* Copyright (c) 1995 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Author: Mark Holland
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and
|
||||
* its documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
||||
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*/
|
||||
|
||||
/********************************
|
||||
*
|
||||
* rf_configure.h
|
||||
*
|
||||
* header file for raidframe configuration in the kernel version only.
|
||||
* configuration is invoked via ioctl rather than at boot time
|
||||
*
|
||||
*******************************/
|
||||
|
||||
/* :
|
||||
* Log: rf_configure.h,v
|
||||
* Revision 1.16 1996/06/19 14:57:53 jimz
|
||||
* move layout-specific config parsing hooks into RF_LayoutSW_t
|
||||
* table in rf_layout.c
|
||||
*
|
||||
* Revision 1.15 1996/06/07 21:33:04 jimz
|
||||
* begin using consistent types for sector numbers,
|
||||
* stripe numbers, row+col numbers, recon unit numbers
|
||||
*
|
||||
* Revision 1.14 1996/05/31 22:26:54 jimz
|
||||
* fix a lot of mapping problems, memory allocation problems
|
||||
* found some weird lock issues, fixed 'em
|
||||
* more code cleanup
|
||||
*
|
||||
* Revision 1.13 1996/05/30 11:29:41 jimz
|
||||
* Numerous bug fixes. Stripe lock release code disagreed with the taking code
|
||||
* about when stripes should be locked (I made it consistent: no parity, no lock)
|
||||
* There was a lot of extra serialization of I/Os which I've removed- a lot of
|
||||
* it was to calculate values for the cache code, which is no longer with us.
|
||||
* More types, function, macro cleanup. Added code to properly quiesce the array
|
||||
* on shutdown. Made a lot of stuff array-specific which was (bogusly) general
|
||||
* before. Fixed memory allocation, freeing bugs.
|
||||
*
|
||||
* Revision 1.12 1996/05/27 18:56:37 jimz
|
||||
* more code cleanup
|
||||
* better typing
|
||||
* compiles in all 3 environments
|
||||
*
|
||||
* Revision 1.11 1996/05/24 01:59:45 jimz
|
||||
* another checkpoint in code cleanup for release
|
||||
* time to sync kernel tree
|
||||
*
|
||||
* Revision 1.10 1996/05/23 00:33:23 jimz
|
||||
* code cleanup: move all debug decls to rf_options.c, all extern
|
||||
* debug decls to rf_options.h, all debug vars preceded by rf_
|
||||
*
|
||||
* Revision 1.9 1996/05/18 20:09:51 jimz
|
||||
* bit of cleanup to compile cleanly in kernel, once again
|
||||
*
|
||||
* Revision 1.8 1996/05/18 19:51:34 jimz
|
||||
* major code cleanup- fix syntax, make some types consistent,
|
||||
* add prototypes, clean out dead code, et cetera
|
||||
*
|
||||
* Revision 1.7 1995/12/01 15:16:26 root
|
||||
* added copyright info
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _RF__RF_CONFIGURE_H_
|
||||
#define _RF__RF_CONFIGURE_H_
|
||||
|
||||
#include "rf_archs.h"
|
||||
#include "rf_types.h"
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/proc.h>
|
||||
|
||||
#include <sys/ioctl.h>
|
||||
|
||||
/* the raidframe configuration, passed down through an ioctl.
|
||||
* the driver can be reconfigured (with total loss of data) at any time,
|
||||
* but it must be shut down first.
|
||||
*/
|
||||
struct RF_Config_s {
|
||||
RF_RowCol_t numRow, numCol, numSpare; /* number of rows, columns, and spare disks */
|
||||
dev_t devs[RF_MAXROW][RF_MAXCOL]; /* device numbers for disks comprising array */
|
||||
char devnames[RF_MAXROW][RF_MAXCOL][50]; /* device names */
|
||||
dev_t spare_devs[RF_MAXSPARE]; /* device numbers for spare disks */
|
||||
char spare_names[RF_MAXSPARE][50]; /* device names */
|
||||
RF_SectorNum_t sectPerSU; /* sectors per stripe unit */
|
||||
RF_StripeNum_t SUsPerPU; /* stripe units per parity unit */
|
||||
RF_StripeNum_t SUsPerRU; /* stripe units per reconstruction unit */
|
||||
RF_ParityConfig_t parityConfig; /* identifies the RAID architecture to be used */
|
||||
RF_DiskQueueType_t diskQueueType; /* 'f' = fifo, 'c' = cvscan, not used in kernel */
|
||||
char maxOutstandingDiskReqs; /* # concurrent reqs to be sent to a disk. not used in kernel. */
|
||||
char debugVars[RF_MAXDBGV][50]; /* space for specifying debug variables & their values */
|
||||
unsigned int layoutSpecificSize; /* size in bytes of layout-specific info */
|
||||
void *layoutSpecific; /* a pointer to a layout-specific structure to be copied in */
|
||||
};
|
||||
|
||||
#ifndef KERNEL
|
||||
int rf_MakeConfig(char *configname, RF_Config_t *cfgPtr);
|
||||
int rf_MakeLayoutSpecificNULL(FILE *fp, RF_Config_t *cfgPtr, void *arg);
|
||||
int rf_MakeLayoutSpecificDeclustered(FILE *configfp, RF_Config_t *cfgPtr, void *arg);
|
||||
void *rf_ReadSpareTable(RF_SparetWait_t *req, char *fname);
|
||||
#endif /* !KERNEL */
|
||||
|
||||
#endif /* !_RF__RF_CONFIGURE_H_ */
|
|
@ -0,0 +1,574 @@
|
|||
/* $NetBSD: rf_copyback.c,v 1.1 1998/11/13 04:20:27 oster Exp $ */
|
||||
/*
|
||||
* Copyright (c) 1995 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Author: Mark Holland
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and
|
||||
* its documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
||||
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*/
|
||||
|
||||
/*****************************************************************************************
|
||||
*
|
||||
* copyback.c -- code to copy reconstructed data back from spare space to
|
||||
* the replaced disk.
|
||||
*
|
||||
* the code operates using callbacks on the I/Os to continue with the next
|
||||
* unit to be copied back. We do this because a simple loop containing blocking I/Os
|
||||
* will not work in the simulator.
|
||||
*
|
||||
****************************************************************************************/
|
||||
|
||||
/*
|
||||
* :
|
||||
* Log: rf_copyback.c,v
|
||||
* Revision 1.26 1996/08/06 22:26:00 jimz
|
||||
* don't include sys/buf.h on linux
|
||||
*
|
||||
* Revision 1.25 1996/07/30 03:30:40 jimz
|
||||
* include rf_types.h first
|
||||
*
|
||||
* Revision 1.24 1996/07/27 18:39:52 jimz
|
||||
* cleanup sweep
|
||||
*
|
||||
* Revision 1.23 1996/07/18 22:57:14 jimz
|
||||
* port simulator to AIX
|
||||
*
|
||||
* Revision 1.22 1996/07/11 19:08:00 jimz
|
||||
* generalize reconstruction mechanism
|
||||
* allow raid1 reconstructs via copyback (done with array
|
||||
* quiesced, not online, therefore not disk-directed)
|
||||
*
|
||||
* Revision 1.21 1996/07/11 16:03:47 jimz
|
||||
* fixed hanging bug in rf_CopybackWriteDoneProc()
|
||||
*
|
||||
* Revision 1.20 1996/06/10 11:55:47 jimz
|
||||
* Straightened out some per-array/not-per-array distinctions, fixed
|
||||
* a couple bugs related to confusion. Added shutdown lists. Removed
|
||||
* layout shutdown function (now subsumed by shutdown lists).
|
||||
*
|
||||
* Revision 1.19 1996/06/09 02:36:46 jimz
|
||||
* lots of little crufty cleanup- fixup whitespace
|
||||
* issues, comment #ifdefs, improve typing in some
|
||||
* places (esp size-related)
|
||||
*
|
||||
* Revision 1.18 1996/06/07 21:33:04 jimz
|
||||
* begin using consistent types for sector numbers,
|
||||
* stripe numbers, row+col numbers, recon unit numbers
|
||||
*
|
||||
* Revision 1.17 1996/06/05 18:06:02 jimz
|
||||
* Major code cleanup. The Great Renaming is now done.
|
||||
* Better modularity. Better typing. Fixed a bunch of
|
||||
* synchronization bugs. Made a lot of global stuff
|
||||
* per-desc or per-array. Removed dead code.
|
||||
*
|
||||
* Revision 1.16 1996/06/03 23:28:26 jimz
|
||||
* more bugfixes
|
||||
* check in tree to sync for IPDS runs with current bugfixes
|
||||
* there still may be a problem with threads in the script test
|
||||
* getting I/Os stuck- not trivially reproducible (runs ~50 times
|
||||
* in a row without getting stuck)
|
||||
*
|
||||
* Revision 1.15 1996/06/02 17:31:48 jimz
|
||||
* Moved a lot of global stuff into array structure, where it belongs.
|
||||
* Fixed up paritylogging, pss modules in this manner. Some general
|
||||
* code cleanup. Removed lots of dead code, some dead files.
|
||||
*
|
||||
* Revision 1.14 1996/05/31 22:26:54 jimz
|
||||
* fix a lot of mapping problems, memory allocation problems
|
||||
* found some weird lock issues, fixed 'em
|
||||
* more code cleanup
|
||||
*
|
||||
* Revision 1.13 1996/05/30 11:29:41 jimz
|
||||
* Numerous bug fixes. Stripe lock release code disagreed with the taking code
|
||||
* about when stripes should be locked (I made it consistent: no parity, no lock)
|
||||
* There was a lot of extra serialization of I/Os which I've removed- a lot of
|
||||
* it was to calculate values for the cache code, which is no longer with us.
|
||||
* More types, function, macro cleanup. Added code to properly quiesce the array
|
||||
* on shutdown. Made a lot of stuff array-specific which was (bogusly) general
|
||||
* before. Fixed memory allocation, freeing bugs.
|
||||
*
|
||||
* Revision 1.12 1996/05/27 18:56:37 jimz
|
||||
* more code cleanup
|
||||
* better typing
|
||||
* compiles in all 3 environments
|
||||
*
|
||||
* Revision 1.11 1996/05/24 22:17:04 jimz
|
||||
* continue code + namespace cleanup
|
||||
* typed a bunch of flags
|
||||
*
|
||||
* Revision 1.10 1996/05/24 01:59:45 jimz
|
||||
* another checkpoint in code cleanup for release
|
||||
* time to sync kernel tree
|
||||
*
|
||||
* Revision 1.9 1996/05/23 21:46:35 jimz
|
||||
* checkpoint in code cleanup (release prep)
|
||||
* lots of types, function names have been fixed
|
||||
*
|
||||
* Revision 1.8 1996/05/23 00:33:23 jimz
|
||||
* code cleanup: move all debug decls to rf_options.c, all extern
|
||||
* debug decls to rf_options.h, all debug vars preceded by rf_
|
||||
*
|
||||
* Revision 1.7 1996/05/18 19:51:34 jimz
|
||||
* major code cleanup- fix syntax, make some types consistent,
|
||||
* add prototypes, clean out dead code, et cetera
|
||||
*
|
||||
* Revision 1.6 1995/12/12 18:10:06 jimz
|
||||
* MIN -> RF_MIN, MAX -> RF_MAX, ASSERT -> RF_ASSERT
|
||||
* fix 80-column brain damage in comments
|
||||
*
|
||||
* Revision 1.5 1995/12/01 15:15:31 root
|
||||
* added copyright info
|
||||
*
|
||||
* Revision 1.4 1995/06/23 13:41:36 robby
|
||||
* updeated to prototypes in rf_layout.h
|
||||
*
|
||||
*/
|
||||
|
||||
#include "rf_types.h"
|
||||
#include <sys/time.h>
|
||||
#ifndef LINUX
|
||||
#include <sys/buf.h>
|
||||
#endif /* !LINUX */
|
||||
#include "rf_raid.h"
|
||||
#include "rf_threadid.h"
|
||||
#include "rf_mcpair.h"
|
||||
#include "rf_acctrace.h"
|
||||
#include "rf_etimer.h"
|
||||
#include "rf_general.h"
|
||||
#include "rf_utils.h"
|
||||
#include "rf_copyback.h"
|
||||
#if !defined(__NetBSD__)
|
||||
#include "rf_camlayer.h"
|
||||
#endif
|
||||
#include "rf_decluster.h"
|
||||
#include "rf_driver.h"
|
||||
#include "rf_shutdown.h"
|
||||
#include "rf_sys.h"
|
||||
|
||||
#define RF_COPYBACK_DATA 0
|
||||
#define RF_COPYBACK_PARITY 1
|
||||
|
||||
int rf_copyback_in_progress;
|
||||
|
||||
static int rf_CopybackReadDoneProc(RF_CopybackDesc_t *desc, int status);
|
||||
static int rf_CopybackWriteDoneProc(RF_CopybackDesc_t *desc, int status);
|
||||
static void rf_CopybackOne(RF_CopybackDesc_t *desc, int typ,
|
||||
RF_RaidAddr_t addr, RF_RowCol_t testRow, RF_RowCol_t testCol,
|
||||
RF_SectorNum_t testOffs);
|
||||
static void rf_CopybackComplete(RF_CopybackDesc_t *desc, int status);
|
||||
|
||||
int rf_ConfigureCopyback(listp)
|
||||
RF_ShutdownList_t **listp;
|
||||
{
|
||||
rf_copyback_in_progress = 0;
|
||||
return(0);
|
||||
}
|
||||
|
||||
#if defined(__NetBSD__) && defined(_KERNEL)
|
||||
#include <sys/types.h>
|
||||
#include <sys/param.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/proc.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <sys/fcntl.h>
|
||||
#include <sys/vnode.h>
|
||||
|
||||
int raidlookup __P((char *, struct proc *, struct vnode **));
|
||||
#endif
|
||||
|
||||
/* do a complete copyback */
|
||||
void rf_CopybackReconstructedData(raidPtr)
|
||||
RF_Raid_t *raidPtr;
|
||||
{
|
||||
#if defined(__NetBSD__) && defined(_KERNEL)
|
||||
int done,retcode;
|
||||
RF_CopybackDesc_t *desc;
|
||||
RF_RowCol_t frow, fcol;
|
||||
RF_RaidDisk_t *badDisk;
|
||||
char *databuf;
|
||||
|
||||
struct partinfo dpart;
|
||||
struct vnode *vp;
|
||||
struct vattr va;
|
||||
struct proc *proc;
|
||||
|
||||
#else
|
||||
int bus, targ, lun, done, retcode;
|
||||
RF_CopybackDesc_t *desc;
|
||||
RF_RowCol_t frow, fcol;
|
||||
RF_RaidDisk_t *badDisk;
|
||||
RF_DiskOp_t *tur_op;
|
||||
char *databuf;
|
||||
#endif
|
||||
|
||||
done = 0;
|
||||
fcol = 0;
|
||||
for (frow=0; frow<raidPtr->numRow; frow++) {
|
||||
for (fcol=0; fcol<raidPtr->numCol; fcol++) {
|
||||
if (raidPtr->Disks[frow][fcol].status == rf_ds_dist_spared
|
||||
|| raidPtr->Disks[frow][fcol].status == rf_ds_spared)
|
||||
{
|
||||
done = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (done)
|
||||
break;
|
||||
}
|
||||
|
||||
if (frow == raidPtr->numRow) {
|
||||
printf("COPYBACK: no disks need copyback\n");
|
||||
return;
|
||||
}
|
||||
|
||||
badDisk = &raidPtr->Disks[frow][fcol];
|
||||
#ifndef SIMULATE
|
||||
#if defined(__NetBSD__) && defined(_KERNEL)
|
||||
|
||||
proc = raidPtr->proc; /* XXX Yes, this is not nice.. */
|
||||
|
||||
#if 0
|
||||
printf("Pretending the disk is happy...\n");
|
||||
retcode = 0; /* XXX this should be set to something more realistic. */
|
||||
#endif
|
||||
|
||||
/* This device may have been opened successfully the first time.
|
||||
Close it before trying to open it again.. */
|
||||
|
||||
if (raidPtr->raid_cinfo[frow][fcol].ci_vp != NULL) {
|
||||
printf("Closed the open device: %s\n",
|
||||
raidPtr->Disks[frow][fcol].devname);
|
||||
(void)vn_close(raidPtr->raid_cinfo[frow][fcol].ci_vp,
|
||||
FREAD|FWRITE, proc->p_ucred, proc);
|
||||
}
|
||||
|
||||
printf("About to (re-)open the device: %s\n",
|
||||
raidPtr->Disks[frow][fcol].devname);
|
||||
|
||||
retcode = raidlookup(raidPtr->Disks[frow][fcol].devname, proc, &vp);
|
||||
|
||||
if (retcode) {
|
||||
printf("COPYBACK: raidlookup on device: %s failed: %d!\n",
|
||||
raidPtr->Disks[frow][fcol].devname, retcode);
|
||||
|
||||
/* XXX the component isn't responding properly...
|
||||
must be still dead :-( */
|
||||
return;
|
||||
|
||||
} else {
|
||||
|
||||
/* Ok, so we can at least do a lookup... How about actually
|
||||
getting a vp for it? */
|
||||
|
||||
if ((retcode = VOP_GETATTR(vp, &va, proc->p_ucred, proc)) != 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
retcode = VOP_IOCTL(vp, DIOCGPART, (caddr_t)&dpart,
|
||||
FREAD, proc->p_ucred, proc);
|
||||
if (retcode) {
|
||||
return;
|
||||
}
|
||||
raidPtr->Disks[frow][fcol].blockSize = dpart.disklab->d_secsize;
|
||||
|
||||
raidPtr->Disks[frow][fcol].numBlocks = dpart.part->p_size -
|
||||
rf_protectedSectors;
|
||||
|
||||
raidPtr->raid_cinfo[frow][fcol].ci_vp = vp;
|
||||
raidPtr->raid_cinfo[frow][fcol].ci_dev = va.va_rdev;
|
||||
|
||||
raidPtr->Disks[frow][fcol].dev = va.va_rdev; /* XXX or the above? */
|
||||
|
||||
/* we allow the user to specify that only a fraction of the
|
||||
* disks should be used this is just for debug: it speeds up
|
||||
* the parity scan
|
||||
*/
|
||||
raidPtr->Disks[frow][fcol].numBlocks =
|
||||
raidPtr->Disks[frow][fcol].numBlocks *
|
||||
rf_sizePercentage / 100;
|
||||
}
|
||||
#else
|
||||
if (rf_extract_ids(badDisk->devname, &bus, &targ, &lun)) {
|
||||
printf("COPYBACK: unable to extract bus, target, lun from devname %s\n",
|
||||
badDisk->devname);
|
||||
return;
|
||||
}
|
||||
|
||||
/* TUR the disk that's marked as bad to be sure that it's actually alive */
|
||||
rf_SCSI_AllocTUR(&tur_op);
|
||||
retcode = rf_SCSI_DoTUR(tur_op, bus, targ, lun, badDisk->dev);
|
||||
rf_SCSI_FreeDiskOp(tur_op, 0);
|
||||
#endif
|
||||
|
||||
if (retcode) {
|
||||
printf("COPYBACK: target disk failed TUR\n");
|
||||
return;
|
||||
}
|
||||
#endif /* !SIMULATE */
|
||||
|
||||
/* get a buffer to hold one SU */
|
||||
RF_Malloc(databuf, rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit), (char *));
|
||||
|
||||
/* create a descriptor */
|
||||
RF_Malloc(desc, sizeof(*desc), (RF_CopybackDesc_t *));
|
||||
desc->raidPtr = raidPtr;
|
||||
desc->status = 0;
|
||||
desc->frow = frow;
|
||||
desc->fcol = fcol;
|
||||
desc->spRow = badDisk->spareRow;
|
||||
desc->spCol = badDisk->spareCol;
|
||||
desc->stripeAddr = 0;
|
||||
desc->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
|
||||
desc->sectPerStripe = raidPtr->Layout.sectorsPerStripeUnit * raidPtr->Layout.numDataCol;
|
||||
desc->databuf = databuf;
|
||||
#ifndef SIMULATE
|
||||
desc->mcpair = rf_AllocMCPair();
|
||||
#endif /* !SIMULATE */
|
||||
|
||||
printf("COPYBACK: Quiescing the array\n");
|
||||
/* quiesce the array, since we don't want to code support for user accs here */
|
||||
rf_SuspendNewRequestsAndWait(raidPtr);
|
||||
|
||||
/* adjust state of the array and of the disks */
|
||||
RF_LOCK_MUTEX(raidPtr->mutex);
|
||||
raidPtr->Disks[desc->frow][desc->fcol].status = rf_ds_optimal;
|
||||
raidPtr->status[desc->frow] = rf_rs_optimal;
|
||||
rf_copyback_in_progress = 1; /* debug only */
|
||||
RF_UNLOCK_MUTEX(raidPtr->mutex);
|
||||
|
||||
printf("COPYBACK: Beginning\n");
|
||||
RF_GETTIME(desc->starttime);
|
||||
rf_ContinueCopyback(desc);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* invoked via callback after a copyback I/O has completed to
|
||||
* continue on with the next one
|
||||
*/
|
||||
void rf_ContinueCopyback(desc)
|
||||
RF_CopybackDesc_t *desc;
|
||||
{
|
||||
RF_SectorNum_t testOffs, stripeAddr;
|
||||
RF_Raid_t *raidPtr = desc->raidPtr;
|
||||
RF_RaidAddr_t addr;
|
||||
RF_RowCol_t testRow, testCol;
|
||||
int old_pctg, new_pctg, done;
|
||||
struct timeval t, diff;
|
||||
|
||||
old_pctg = (-1);
|
||||
while (1) {
|
||||
stripeAddr = desc->stripeAddr;
|
||||
if (rf_prReconSched) {
|
||||
old_pctg = 100 * desc->stripeAddr / raidPtr->totalSectors;
|
||||
}
|
||||
desc->stripeAddr += desc->sectPerStripe;
|
||||
if (rf_prReconSched) {
|
||||
new_pctg = 100 * desc->stripeAddr / raidPtr->totalSectors;
|
||||
if (new_pctg != old_pctg) {
|
||||
RF_GETTIME(t);
|
||||
RF_TIMEVAL_DIFF(&desc->starttime, &t, &diff);
|
||||
printf("%d %d.%06d\n",new_pctg, (int)diff.tv_sec, (int)diff.tv_usec);
|
||||
}
|
||||
}
|
||||
|
||||
if (stripeAddr >= raidPtr->totalSectors) {
|
||||
rf_CopybackComplete(desc, 0);
|
||||
return;
|
||||
}
|
||||
|
||||
/* walk through the current stripe, su-by-su */
|
||||
for (done=0, addr = stripeAddr; addr < stripeAddr+desc->sectPerStripe; addr += desc->sectPerSU) {
|
||||
|
||||
/* map the SU, disallowing remap to spare space */
|
||||
(raidPtr->Layout.map->MapSector)(raidPtr, addr, &testRow, &testCol, &testOffs, RF_DONT_REMAP);
|
||||
|
||||
if (testRow == desc->frow && testCol == desc->fcol) {
|
||||
rf_CopybackOne(desc, RF_COPYBACK_DATA, addr, testRow, testCol, testOffs);
|
||||
#ifdef SIMULATE
|
||||
return;
|
||||
#else /* SIMULATE */
|
||||
done = 1;
|
||||
break;
|
||||
#endif /* SIMULATE */
|
||||
}
|
||||
}
|
||||
|
||||
if (!done) {
|
||||
/* we didn't find the failed disk in the data part. check parity. */
|
||||
|
||||
/* map the parity for this stripe, disallowing remap to spare space */
|
||||
(raidPtr->Layout.map->MapParity)(raidPtr, stripeAddr, &testRow, &testCol, &testOffs, RF_DONT_REMAP);
|
||||
|
||||
if (testRow == desc->frow && testCol == desc->fcol) {
|
||||
rf_CopybackOne(desc, RF_COPYBACK_PARITY, stripeAddr, testRow, testCol, testOffs);
|
||||
#ifdef SIMULATE
|
||||
return;
|
||||
#endif /* SIMULATE */
|
||||
}
|
||||
}
|
||||
|
||||
/* check to see if the last read/write pair failed */
|
||||
if (desc->status) {
|
||||
rf_CopybackComplete(desc, 1);
|
||||
return;
|
||||
}
|
||||
|
||||
/* we didn't find any units to copy back in this stripe. Continue with the next one */
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* copyback one unit */
|
||||
static void rf_CopybackOne(desc, typ, addr, testRow, testCol, testOffs)
|
||||
RF_CopybackDesc_t *desc;
|
||||
int typ;
|
||||
RF_RaidAddr_t addr;
|
||||
RF_RowCol_t testRow;
|
||||
RF_RowCol_t testCol;
|
||||
RF_SectorNum_t testOffs;
|
||||
{
|
||||
RF_SectorCount_t sectPerSU = desc->sectPerSU;
|
||||
RF_Raid_t *raidPtr = desc->raidPtr;
|
||||
RF_RowCol_t spRow = desc->spRow;
|
||||
RF_RowCol_t spCol = desc->spCol;
|
||||
RF_SectorNum_t spOffs;
|
||||
|
||||
/* find the spare spare location for this SU */
|
||||
if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) {
|
||||
if (typ == RF_COPYBACK_DATA)
|
||||
raidPtr->Layout.map->MapSector(raidPtr, addr, &spRow, &spCol, &spOffs, RF_REMAP);
|
||||
else
|
||||
raidPtr->Layout.map->MapParity(raidPtr, addr, &spRow, &spCol, &spOffs, RF_REMAP);
|
||||
} else {
|
||||
spOffs = testOffs;
|
||||
}
|
||||
|
||||
/* create reqs to read the old location & write the new */
|
||||
desc->readreq = rf_CreateDiskQueueData(RF_IO_TYPE_READ, spOffs,
|
||||
sectPerSU, desc->databuf, 0L, 0,
|
||||
(int (*)(void *,int)) rf_CopybackReadDoneProc, desc,
|
||||
NULL, NULL, (void *) raidPtr, RF_DISKQUEUE_DATA_FLAGS_NONE, NULL);
|
||||
desc->writereq = rf_CreateDiskQueueData(RF_IO_TYPE_WRITE, testOffs,
|
||||
sectPerSU, desc->databuf, 0L, 0,
|
||||
(int (*)(void *,int)) rf_CopybackWriteDoneProc, desc,
|
||||
NULL, NULL, (void *) raidPtr, RF_DISKQUEUE_DATA_FLAGS_NONE, NULL);
|
||||
desc->frow = testRow;
|
||||
desc->fcol = testCol;
|
||||
|
||||
/* enqueue the read. the write will go out as part of the callback on the read.
|
||||
* at user-level & in the kernel, wait for the read-write pair to complete.
|
||||
* in the simulator, just return, since everything will happen as callbacks
|
||||
*/
|
||||
#ifndef SIMULATE
|
||||
RF_LOCK_MUTEX(desc->mcpair->mutex);
|
||||
desc->mcpair->flag = 0;
|
||||
#endif /* !SIMULATE */
|
||||
|
||||
rf_DiskIOEnqueue(&raidPtr->Queues[spRow][spCol], desc->readreq, RF_IO_NORMAL_PRIORITY);
|
||||
|
||||
#ifndef SIMULATE
|
||||
while (!desc->mcpair->flag) {
|
||||
RF_WAIT_MCPAIR(desc->mcpair);
|
||||
}
|
||||
RF_UNLOCK_MUTEX(desc->mcpair->mutex);
|
||||
rf_FreeDiskQueueData(desc->readreq);
|
||||
rf_FreeDiskQueueData(desc->writereq);
|
||||
#endif /* !SIMULATE */
|
||||
}
|
||||
|
||||
|
||||
/* called at interrupt context when the read has completed. just send out the write */
|
||||
static int rf_CopybackReadDoneProc(desc, status)
|
||||
RF_CopybackDesc_t *desc;
|
||||
int status;
|
||||
{
|
||||
if (status) { /* invoke the callback with bad status */
|
||||
printf("COPYBACK: copyback read failed. Aborting.\n");
|
||||
(desc->writereq->CompleteFunc)(desc, -100);
|
||||
}
|
||||
else {
|
||||
rf_DiskIOEnqueue(&(desc->raidPtr->Queues[desc->frow][desc->fcol]), desc->writereq, RF_IO_NORMAL_PRIORITY);
|
||||
}
|
||||
return(0);
|
||||
}
|
||||
|
||||
/* called at interrupt context when the write has completed.
|
||||
* at user level & in the kernel, wake up the copyback thread.
|
||||
* in the simulator, invoke the next copyback directly.
|
||||
* can't free diskqueuedata structs in the kernel b/c we're at interrupt context.
|
||||
*/
|
||||
static int rf_CopybackWriteDoneProc(desc, status)
|
||||
RF_CopybackDesc_t *desc;
|
||||
int status;
|
||||
{
|
||||
if (status && status != -100) {
|
||||
printf("COPYBACK: copyback write failed. Aborting.\n");
|
||||
}
|
||||
|
||||
#ifdef SIMULATE
|
||||
rf_FreeDiskQueueData(desc->readreq);
|
||||
rf_FreeDiskQueueData(desc->writereq);
|
||||
if (!status)
|
||||
rf_ContinueCopyback(desc);
|
||||
else
|
||||
rf_CopybackComplete(desc, 1);
|
||||
#else /* SIMULATE */
|
||||
desc->status = status;
|
||||
rf_MCPairWakeupFunc(desc->mcpair);
|
||||
#endif /* SIMULATE */
|
||||
return(0);
|
||||
}
|
||||
|
||||
/* invoked when the copyback has completed */
|
||||
static void rf_CopybackComplete(desc, status)
|
||||
RF_CopybackDesc_t *desc;
|
||||
int status;
|
||||
{
|
||||
RF_Raid_t *raidPtr = desc->raidPtr;
|
||||
struct timeval t, diff;
|
||||
|
||||
if (!status) {
|
||||
RF_LOCK_MUTEX(raidPtr->mutex);
|
||||
if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) {
|
||||
RF_ASSERT(raidPtr->Layout.map->parityConfig == 'D');
|
||||
rf_FreeSpareTable(raidPtr);
|
||||
} else {
|
||||
raidPtr->Disks[desc->spRow][desc->spCol].status = rf_ds_spare;
|
||||
}
|
||||
RF_UNLOCK_MUTEX(raidPtr->mutex);
|
||||
|
||||
RF_GETTIME(t);
|
||||
RF_TIMEVAL_DIFF(&desc->starttime, &t, &diff);
|
||||
printf("Copyback time was %d.%06d seconds\n",
|
||||
(int)diff.tv_sec, (int)diff.tv_usec);
|
||||
} else printf("COPYBACK: Failure.\n");
|
||||
|
||||
RF_Free(desc->databuf, rf_RaidAddressToByte(raidPtr, desc->sectPerSU));
|
||||
#ifndef SIMULATE
|
||||
rf_FreeMCPair(desc->mcpair);
|
||||
#endif /* !SIMULATE */
|
||||
RF_Free(desc, sizeof(*desc));
|
||||
|
||||
rf_copyback_in_progress = 0;
|
||||
rf_ResumeNewRequests(raidPtr);
|
||||
}
|
|
@ -0,0 +1,87 @@
|
|||
/* $NetBSD: rf_copyback.h,v 1.1 1998/11/13 04:20:27 oster Exp $ */
|
||||
/*
|
||||
* rf_copyback.h
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 1996 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Author: Jim Zelenka
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and
|
||||
* its documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
||||
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*/
|
||||
/*
|
||||
* :
|
||||
* Log: rf_copyback.h,v
|
||||
* Revision 1.5 1996/07/11 19:08:00 jimz
|
||||
* generalize reconstruction mechanism
|
||||
* allow raid1 reconstructs via copyback (done with array
|
||||
* quiesced, not online, therefore not disk-directed)
|
||||
*
|
||||
* Revision 1.4 1996/06/10 11:55:47 jimz
|
||||
* Straightened out some per-array/not-per-array distinctions, fixed
|
||||
* a couple bugs related to confusion. Added shutdown lists. Removed
|
||||
* layout shutdown function (now subsumed by shutdown lists).
|
||||
*
|
||||
* Revision 1.3 1996/05/24 01:59:45 jimz
|
||||
* another checkpoint in code cleanup for release
|
||||
* time to sync kernel tree
|
||||
*
|
||||
* Revision 1.2 1996/05/23 21:46:35 jimz
|
||||
* checkpoint in code cleanup (release prep)
|
||||
* lots of types, function names have been fixed
|
||||
*
|
||||
* Revision 1.1 1996/05/18 19:55:02 jimz
|
||||
* Initial revision
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _RF__RF_COPYBACK_H_
|
||||
#define _RF__RF_COPYBACK_H_
|
||||
|
||||
#include "rf_types.h"
|
||||
|
||||
typedef struct RF_CopybackDesc_s {
|
||||
RF_Raid_t *raidPtr;
|
||||
RF_RowCol_t frow;
|
||||
RF_RowCol_t fcol;
|
||||
RF_RowCol_t spRow;
|
||||
RF_RowCol_t spCol;
|
||||
int status;
|
||||
RF_StripeNum_t stripeAddr;
|
||||
RF_SectorCount_t sectPerSU;
|
||||
RF_SectorCount_t sectPerStripe;
|
||||
char *databuf;
|
||||
RF_DiskQueueData_t *readreq;
|
||||
RF_DiskQueueData_t *writereq;
|
||||
struct timeval starttime;
|
||||
#ifndef SIMULATE
|
||||
RF_MCPair_t *mcpair;
|
||||
#endif /* !SIMULATE */
|
||||
} RF_CopybackDesc_t;
|
||||
|
||||
extern int rf_copyback_in_progress;
|
||||
|
||||
int rf_ConfigureCopyback(RF_ShutdownList_t **listp);
|
||||
void rf_CopybackReconstructedData(RF_Raid_t *raidPtr);
|
||||
void rf_ContinueCopyback(RF_CopybackDesc_t *desc);
|
||||
|
||||
#endif /* !_RF__RF_COPYBACK_H_ */
|
|
@ -0,0 +1,194 @@
|
|||
/* $NetBSD: rf_cpuutil.c,v 1.1 1998/11/13 04:20:27 oster Exp $ */
|
||||
/*
|
||||
* Copyright (c) 1995 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Authors: Mark Holland, Jim Zelenka
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and
|
||||
* its documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
||||
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*/
|
||||
/*
|
||||
* rf_cpuutil.c
|
||||
*
|
||||
* track cpu utilization
|
||||
*/
|
||||
|
||||
#ifdef _KERNEL
|
||||
#define KERNEL
|
||||
#endif
|
||||
|
||||
#include "rf_cpuutil.h"
|
||||
|
||||
#ifndef KERNEL
|
||||
#include <errno.h>
|
||||
#endif /* !KERNEL */
|
||||
#include "rf_types.h"
|
||||
#include "rf_general.h"
|
||||
#include "rf_shutdown.h"
|
||||
#include "rf_sys.h"
|
||||
#ifdef __osf__
|
||||
#include <sys/table.h>
|
||||
#endif /* __osf__ */
|
||||
#ifdef AIX
|
||||
#include <nlist.h>
|
||||
#include <sys/sysinfo.h>
|
||||
#endif /* AIX */
|
||||
#ifdef KERNEL
|
||||
#ifndef __NetBSD__
|
||||
#include <sys/dk.h>
|
||||
#endif /* __NetBSD__ */
|
||||
#else /* KERNEL */
|
||||
extern int table(int id, int index, void *addr, int nel, u_int lel);
|
||||
#endif /* KERNEL */
|
||||
|
||||
#ifdef __osf__
|
||||
static struct tbl_sysinfo start, stop;
|
||||
#endif /* __osf__ */
|
||||
|
||||
#ifdef AIX
|
||||
static int kmem_fd;
|
||||
static off_t sysinfo_offset;
|
||||
static struct sysinfo sysinfo_start, sysinfo_stop;
|
||||
static struct nlist namelist[] = {
|
||||
{{"sysinfo"}},
|
||||
{{""}},
|
||||
};
|
||||
#endif /* AIX */
|
||||
|
||||
#ifdef AIX
|
||||
static void rf_ShutdownCpuMonitor(ignored)
|
||||
void *ignored;
|
||||
{
|
||||
close(kmem_fd);
|
||||
}
|
||||
#endif /* AIX */
|
||||
|
||||
int rf_ConfigureCpuMonitor(listp)
|
||||
RF_ShutdownList_t **listp;
|
||||
{
|
||||
#ifdef AIX
|
||||
int rc;
|
||||
|
||||
rc = knlist(namelist, 1, sizeof(struct nlist));
|
||||
if (rc) {
|
||||
RF_ERRORMSG("Could not knlist() to config CPU monitor\n");
|
||||
return(errno);
|
||||
}
|
||||
if (namelist[0].n_value == 0) {
|
||||
RF_ERRORMSG("Got bogus results from knlist() for CPU monitor\n");
|
||||
return(EIO);
|
||||
}
|
||||
sysinfo_offset = namelist[0].n_value;
|
||||
kmem_fd = open("/dev/kmem", O_RDONLY);
|
||||
if (kmem_fd < 0) {
|
||||
perror("/dev/kmem");
|
||||
return(errno);
|
||||
}
|
||||
rc = rf_ShutdownCreate(listp, rf_ShutdownCpuMonitor, NULL);
|
||||
if (rc) {
|
||||
RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", __FILE__,
|
||||
__LINE__, rc);
|
||||
rf_ShutdownCpuMonitor(NULL);
|
||||
return(rc);
|
||||
}
|
||||
#endif /* AIX */
|
||||
return(0);
|
||||
}
|
||||
|
||||
void rf_start_cpu_monitor()
|
||||
{
|
||||
#ifdef __osf__
|
||||
#ifndef KERNEL
|
||||
if (table(TBL_SYSINFO, 0, &start, 1, sizeof(start)) != 1) {
|
||||
printf("Unable to get sysinfo for cpu utilization monitor\n");
|
||||
perror("start_cpu_monitor");
|
||||
}
|
||||
#else /* !KERNEL */
|
||||
/* start.si_user = cp_time[CP_USER];
|
||||
start.si_nice = cp_time[CP_NICE];
|
||||
start.si_sys = cp_time[CP_SYS];
|
||||
start.si_idle = cp_time[CP_IDLE];
|
||||
start.wait = cp_time[CP_WAIT]; */
|
||||
#endif /* !KERNEL */
|
||||
#endif /* __osf__ */
|
||||
#ifdef AIX
|
||||
off_t off;
|
||||
int rc;
|
||||
|
||||
off = lseek(kmem_fd, sysinfo_offset, SEEK_SET);
|
||||
RF_ASSERT(off == sysinfo_offset);
|
||||
rc = read(kmem_fd, &sysinfo_start, sizeof(struct sysinfo));
|
||||
if (rc != sizeof(struct sysinfo)) {
|
||||
RF_ERRORMSG2("Starting CPU monitor: rc=%d != %d\n", rc,
|
||||
sizeof(struct sysinfo));
|
||||
}
|
||||
#endif /* AIX */
|
||||
}
|
||||
|
||||
void rf_stop_cpu_monitor()
|
||||
{
|
||||
#ifdef __osf__
|
||||
#ifndef KERNEL
|
||||
if (table(TBL_SYSINFO, 0, &stop, 1, sizeof(stop)) != 1) {
|
||||
printf("Unable to get sysinfo for cpu utilization monitor\n");
|
||||
perror("stop_cpu_monitor");
|
||||
}
|
||||
#else /* !KERNEL */
|
||||
/* stop.si_user = cp_time[CP_USER];
|
||||
stop.si_nice = cp_time[CP_NICE];
|
||||
stop.si_sys = cp_time[CP_SYS];
|
||||
stop.si_idle = cp_time[CP_IDLE];
|
||||
stop.wait = cp_time[CP_WAIT]; */
|
||||
#endif /* !KERNEL */
|
||||
#endif /* __osf__ */
|
||||
#ifdef AIX
|
||||
off_t off;
|
||||
int rc;
|
||||
|
||||
off = lseek(kmem_fd, sysinfo_offset, SEEK_SET);
|
||||
RF_ASSERT(off == sysinfo_offset);
|
||||
rc = read(kmem_fd, &sysinfo_stop, sizeof(struct sysinfo));
|
||||
if (rc != sizeof(struct sysinfo)) {
|
||||
RF_ERRORMSG2("Stopping CPU monitor: rc=%d != %d\n", rc,
|
||||
sizeof(struct sysinfo));
|
||||
}
|
||||
#endif /* AIX */
|
||||
}
|
||||
|
||||
void rf_print_cpu_util(s)
|
||||
char *s;
|
||||
{
|
||||
#ifdef __osf__
|
||||
long totalticks, idleticks;
|
||||
|
||||
idleticks = stop.si_idle - start.si_idle + stop.wait - start.wait;
|
||||
totalticks = stop.si_user - start.si_user + stop.si_nice - start.si_nice +
|
||||
stop.si_sys - start.si_sys + idleticks;
|
||||
printf("CPU utilization during %s was %d %%\n", s, 100 - 100*idleticks/totalticks);
|
||||
#endif /* __osf__ */
|
||||
#ifdef AIX
|
||||
long idle;
|
||||
|
||||
/* XXX compute a percentage here */
|
||||
idle = (long)(sysinfo_stop.cpu[CPU_IDLE] - sysinfo_start.cpu[CPU_IDLE]);
|
||||
printf("%ld idle ticks during %s.\n", idle, s);
|
||||
#endif /* AIX */
|
||||
}
|
|
@ -0,0 +1,56 @@
|
|||
/* $NetBSD: rf_cpuutil.h,v 1.1 1998/11/13 04:20:27 oster Exp $ */
|
||||
/*
|
||||
* rf_cpuutil.h
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 1995 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Author: Mark Holland, Jim Zelenka
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and
|
||||
* its documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
||||
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*/
|
||||
/*
|
||||
* :
|
||||
* Log: rf_cpuutil.h,v
|
||||
* Revision 1.3 1996/07/18 22:57:14 jimz
|
||||
* port simulator to AIX
|
||||
*
|
||||
* Revision 1.2 1996/05/24 01:59:45 jimz
|
||||
* another checkpoint in code cleanup for release
|
||||
* time to sync kernel tree
|
||||
*
|
||||
* Revision 1.1 1996/05/18 19:55:29 jimz
|
||||
* Initial revision
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _RF__RF_CPUUTIL_H_
|
||||
#define _RF__RF_CPUUTIL_H_
|
||||
|
||||
#include "rf_types.h"
|
||||
|
||||
int rf_ConfigureCpuMonitor(RF_ShutdownList_t **listp);
|
||||
void rf_start_cpu_monitor(void);
|
||||
void rf_stop_cpu_monitor(void);
|
||||
void rf_print_cpu_util(char *s);
|
||||
|
||||
#endif /* !_RF__RF_CPUUTIL_H_ */
|
|
@ -0,0 +1,449 @@
|
|||
/* $NetBSD: rf_cvscan.c,v 1.1 1998/11/13 04:20:27 oster Exp $ */
|
||||
/*
|
||||
* Copyright (c) 1995 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Author: Mark Holland
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and
|
||||
* its documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
||||
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*/
|
||||
|
||||
/*******************************************************************************
|
||||
*
|
||||
* cvscan.c -- prioritized cvscan disk queueing code.
|
||||
*
|
||||
* Nov 9, 1994, adapted from raidSim version (MCH)
|
||||
*
|
||||
******************************************************************************/
|
||||
|
||||
/*
|
||||
* :
|
||||
* Log: rf_cvscan.c,v
|
||||
* Revision 1.6 1996/07/27 23:36:08 jimz
|
||||
* Solaris port of simulator
|
||||
*
|
||||
* Revision 1.5 1996/07/15 17:22:18 jimz
|
||||
* nit-pick code cleanup
|
||||
* resolve stdlib problems on DEC OSF
|
||||
*
|
||||
* Revision 1.4 1996/06/09 02:36:46 jimz
|
||||
* lots of little crufty cleanup- fixup whitespace
|
||||
* issues, comment #ifdefs, improve typing in some
|
||||
* places (esp size-related)
|
||||
*
|
||||
* Revision 1.3 1996/06/07 22:26:27 jimz
|
||||
* type-ify which_ru (RF_ReconUnitNum_t)
|
||||
*
|
||||
* Revision 1.2 1996/06/07 21:33:04 jimz
|
||||
* begin using consistent types for sector numbers,
|
||||
* stripe numbers, row+col numbers, recon unit numbers
|
||||
*
|
||||
* Revision 1.1 1996/06/05 19:17:40 jimz
|
||||
* Initial revision
|
||||
*
|
||||
*/
|
||||
|
||||
#include "rf_types.h"
|
||||
#include "rf_alloclist.h"
|
||||
#include "rf_stripelocks.h"
|
||||
#include "rf_layout.h"
|
||||
#include "rf_diskqueue.h"
|
||||
#include "rf_cvscan.h"
|
||||
#include "rf_debugMem.h"
|
||||
#include "rf_general.h"
|
||||
#include "rf_sys.h"
|
||||
|
||||
#define DO_CHECK_STATE(_hdr_) CheckCvscanState((_hdr_), __FILE__, __LINE__)
|
||||
|
||||
#define pri_ok(p) ( ((p) == RF_IO_NORMAL_PRIORITY) || ((p) == RF_IO_LOW_PRIORITY))
|
||||
|
||||
static void CheckCvscanState(RF_CvscanHeader_t *hdr, char *file, int line)
|
||||
{
|
||||
long i, key;
|
||||
RF_DiskQueueData_t *tmp;
|
||||
|
||||
if( hdr->left != (RF_DiskQueueData_t *) NULL )
|
||||
RF_ASSERT( hdr->left->sectorOffset < hdr->cur_block );
|
||||
for( key=hdr->cur_block, i=0, tmp=hdr->left;
|
||||
tmp != (RF_DiskQueueData_t *) NULL;
|
||||
key=tmp->sectorOffset, i++, tmp=tmp->next )
|
||||
RF_ASSERT( tmp->sectorOffset <= key
|
||||
&& tmp->priority == hdr->nxt_priority && pri_ok(tmp->priority) );
|
||||
RF_ASSERT( i == hdr->left_cnt );
|
||||
|
||||
for( key=hdr->cur_block, i=0, tmp=hdr->right;
|
||||
tmp != (RF_DiskQueueData_t *) NULL;
|
||||
key=tmp->sectorOffset, i++, tmp=tmp->next )
|
||||
{
|
||||
RF_ASSERT(key <= tmp->sectorOffset);
|
||||
RF_ASSERT(tmp->priority == hdr->nxt_priority);
|
||||
RF_ASSERT(pri_ok(tmp->priority));
|
||||
}
|
||||
RF_ASSERT( i == hdr->right_cnt );
|
||||
|
||||
for( key=hdr->nxt_priority-1, tmp=hdr->burner;
|
||||
tmp != (RF_DiskQueueData_t *) NULL;
|
||||
key=tmp->priority, tmp=tmp->next )
|
||||
{
|
||||
RF_ASSERT(tmp);
|
||||
RF_ASSERT(hdr);
|
||||
RF_ASSERT(pri_ok(tmp->priority));
|
||||
RF_ASSERT(key >= tmp->priority);
|
||||
RF_ASSERT(tmp->priority < hdr->nxt_priority);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
static void PriorityInsert(RF_DiskQueueData_t **list_ptr, RF_DiskQueueData_t *req )
|
||||
{
|
||||
/*
|
||||
** insert block pointed to by req in to list whose first
|
||||
** entry is pointed to by the pointer that list_ptr points to
|
||||
** ie., list_ptr is a grandparent of the first entry
|
||||
*/
|
||||
|
||||
for( ; (*list_ptr)!=(RF_DiskQueueData_t *)NULL &&
|
||||
(*list_ptr)->priority > req->priority;
|
||||
list_ptr = &((*list_ptr)->next) ) {}
|
||||
req->next = (*list_ptr);
|
||||
(*list_ptr) = req;
|
||||
}
|
||||
|
||||
|
||||
|
||||
static void ReqInsert(RF_DiskQueueData_t **list_ptr, RF_DiskQueueData_t *req, RF_CvscanArmDir_t order)
|
||||
{
|
||||
/*
|
||||
** insert block pointed to by req in to list whose first
|
||||
** entry is pointed to by the pointer that list_ptr points to
|
||||
** ie., list_ptr is a grandparent of the first entry
|
||||
*/
|
||||
|
||||
for( ; (*list_ptr)!=(RF_DiskQueueData_t *)NULL &&
|
||||
|
||||
( (order==rf_cvscan_RIGHT && (*list_ptr)->sectorOffset <= req->sectorOffset)
|
||||
|| (order==rf_cvscan_LEFT && (*list_ptr)->sectorOffset > req->sectorOffset) );
|
||||
list_ptr = &((*list_ptr)->next) ) {}
|
||||
req->next = (*list_ptr);
|
||||
(*list_ptr) = req;
|
||||
}
|
||||
|
||||
|
||||
|
||||
static RF_DiskQueueData_t *ReqDequeue(RF_DiskQueueData_t **list_ptr)
|
||||
{
|
||||
RF_DiskQueueData_t * ret = (*list_ptr);
|
||||
if( (*list_ptr) != (RF_DiskQueueData_t *) NULL ) {
|
||||
(*list_ptr) = (*list_ptr)->next;
|
||||
}
|
||||
return( ret );
|
||||
}
|
||||
|
||||
|
||||
|
||||
static void ReBalance(RF_CvscanHeader_t *hdr)
|
||||
{
|
||||
/* DO_CHECK_STATE(hdr); */
|
||||
while( hdr->right != (RF_DiskQueueData_t *) NULL
|
||||
&& hdr->right->sectorOffset < hdr->cur_block ) {
|
||||
hdr->right_cnt--;
|
||||
hdr->left_cnt++;
|
||||
ReqInsert( &hdr->left, ReqDequeue( &hdr->right ), rf_cvscan_LEFT );
|
||||
}
|
||||
/* DO_CHECK_STATE(hdr); */
|
||||
}
|
||||
|
||||
|
||||
|
||||
static void Transfer(RF_DiskQueueData_t **to_list_ptr, RF_DiskQueueData_t **from_list_ptr )
|
||||
{
|
||||
RF_DiskQueueData_t *gp;
|
||||
for( gp=(*from_list_ptr); gp != (RF_DiskQueueData_t *) NULL; ) {
|
||||
RF_DiskQueueData_t *p = gp->next;
|
||||
PriorityInsert( to_list_ptr, gp );
|
||||
gp = p;
|
||||
}
|
||||
(*from_list_ptr) = (RF_DiskQueueData_t *) NULL;
|
||||
}
|
||||
|
||||
|
||||
|
||||
static void RealEnqueue(RF_CvscanHeader_t *hdr, RF_DiskQueueData_t *req)
|
||||
{
|
||||
RF_ASSERT(req->priority == RF_IO_NORMAL_PRIORITY || req->priority == RF_IO_LOW_PRIORITY);
|
||||
|
||||
DO_CHECK_STATE(hdr);
|
||||
if( hdr->left_cnt == 0 && hdr->right_cnt == 0 ) {
|
||||
hdr->nxt_priority = req->priority;
|
||||
}
|
||||
if( req->priority > hdr->nxt_priority ) {
|
||||
/*
|
||||
** dump all other outstanding requests on the back burner
|
||||
*/
|
||||
Transfer( &hdr->burner, &hdr->left );
|
||||
Transfer( &hdr->burner, &hdr->right );
|
||||
hdr->left_cnt = 0;
|
||||
hdr->right_cnt = 0;
|
||||
hdr->nxt_priority = req->priority;
|
||||
}
|
||||
if( req->priority < hdr->nxt_priority ) {
|
||||
/*
|
||||
** yet another low priority task!
|
||||
*/
|
||||
PriorityInsert( &hdr->burner, req );
|
||||
} else {
|
||||
if( req->sectorOffset < hdr->cur_block ) {
|
||||
/* this request is to the left of the current arms */
|
||||
ReqInsert( &hdr->left, req, rf_cvscan_LEFT );
|
||||
hdr->left_cnt++;
|
||||
} else {
|
||||
/* this request is to the right of the current arms */
|
||||
ReqInsert( &hdr->right, req, rf_cvscan_RIGHT );
|
||||
hdr->right_cnt++;
|
||||
}
|
||||
}
|
||||
DO_CHECK_STATE(hdr);
|
||||
}
|
||||
|
||||
|
||||
|
||||
void rf_CvscanEnqueue(void *q_in, RF_DiskQueueData_t *elem, int priority)
|
||||
{
|
||||
RF_CvscanHeader_t *hdr = (RF_CvscanHeader_t *) q_in;
|
||||
RealEnqueue( hdr, elem /*req*/ );
|
||||
}
|
||||
|
||||
|
||||
|
||||
RF_DiskQueueData_t *rf_CvscanDequeue(void *q_in)
|
||||
{
|
||||
RF_CvscanHeader_t *hdr = (RF_CvscanHeader_t *) q_in;
|
||||
long range, i, sum_dist_left, sum_dist_right;
|
||||
RF_DiskQueueData_t *ret;
|
||||
RF_DiskQueueData_t *tmp;
|
||||
|
||||
DO_CHECK_STATE(hdr);
|
||||
|
||||
if( hdr->left_cnt == 0 && hdr->right_cnt == 0 ) return( (RF_DiskQueueData_t *) NULL );
|
||||
|
||||
range = RF_MIN( hdr->range_for_avg, RF_MIN(hdr->left_cnt,hdr->right_cnt));
|
||||
for( i=0, tmp=hdr->left, sum_dist_left=
|
||||
((hdr->direction==rf_cvscan_RIGHT)?range*hdr->change_penalty:0);
|
||||
tmp != (RF_DiskQueueData_t *) NULL && i < range;
|
||||
tmp = tmp->next, i++ ) {
|
||||
sum_dist_left += hdr->cur_block - tmp->sectorOffset;
|
||||
}
|
||||
for( i=0, tmp=hdr->right, sum_dist_right=
|
||||
((hdr->direction==rf_cvscan_LEFT)?range*hdr->change_penalty:0);
|
||||
tmp != (RF_DiskQueueData_t *) NULL && i < range;
|
||||
tmp = tmp->next, i++ ) {
|
||||
sum_dist_right += tmp->sectorOffset - hdr->cur_block;
|
||||
}
|
||||
|
||||
if( hdr->right_cnt == 0 || sum_dist_left < sum_dist_right ) {
|
||||
hdr->direction = rf_cvscan_LEFT;
|
||||
hdr->cur_block = hdr->left->sectorOffset + hdr->left->numSector;
|
||||
hdr->left_cnt = RF_MAX(hdr->left_cnt-1,0);
|
||||
tmp = hdr->left;
|
||||
ret = (ReqDequeue(&hdr->left))/*->parent*/;
|
||||
} else {
|
||||
hdr->direction = rf_cvscan_RIGHT;
|
||||
hdr->cur_block = hdr->right->sectorOffset + hdr->right->numSector;
|
||||
hdr->right_cnt = RF_MAX(hdr->right_cnt-1,0);
|
||||
tmp = hdr->right;
|
||||
ret = (ReqDequeue(&hdr->right))/*->parent*/;
|
||||
}
|
||||
ReBalance( hdr );
|
||||
|
||||
if( hdr->left_cnt == 0 && hdr->right_cnt == 0
|
||||
&& hdr->burner != (RF_DiskQueueData_t *) NULL ) {
|
||||
/*
|
||||
** restore low priority requests for next dequeue
|
||||
*/
|
||||
RF_DiskQueueData_t *burner = hdr->burner;
|
||||
hdr->nxt_priority = burner->priority;
|
||||
while( burner != (RF_DiskQueueData_t *) NULL
|
||||
&& burner->priority == hdr->nxt_priority ) {
|
||||
RF_DiskQueueData_t *next = burner->next;
|
||||
RealEnqueue( hdr, burner );
|
||||
burner = next;
|
||||
}
|
||||
hdr->burner = burner;
|
||||
}
|
||||
DO_CHECK_STATE(hdr);
|
||||
return( ret );
|
||||
}
|
||||
|
||||
|
||||
|
||||
RF_DiskQueueData_t *rf_CvscanPeek(void *q_in)
|
||||
{
|
||||
RF_CvscanHeader_t *hdr = (RF_CvscanHeader_t *) q_in;
|
||||
long range, i, sum_dist_left, sum_dist_right;
|
||||
RF_DiskQueueData_t *tmp, *headElement;
|
||||
|
||||
DO_CHECK_STATE(hdr);
|
||||
|
||||
if( hdr->left_cnt == 0 && hdr->right_cnt == 0 )
|
||||
headElement = NULL;
|
||||
else {
|
||||
range = RF_MIN( hdr->range_for_avg, RF_MIN(hdr->left_cnt,hdr->right_cnt));
|
||||
for( i=0, tmp=hdr->left, sum_dist_left=
|
||||
((hdr->direction==rf_cvscan_RIGHT)?range*hdr->change_penalty:0);
|
||||
tmp != (RF_DiskQueueData_t *) NULL && i < range;
|
||||
tmp = tmp->next, i++ ) {
|
||||
sum_dist_left += hdr->cur_block - tmp->sectorOffset;
|
||||
}
|
||||
for( i=0, tmp=hdr->right, sum_dist_right=
|
||||
((hdr->direction==rf_cvscan_LEFT)?range*hdr->change_penalty:0);
|
||||
tmp != (RF_DiskQueueData_t *) NULL && i < range;
|
||||
tmp = tmp->next, i++ ) {
|
||||
sum_dist_right += tmp->sectorOffset - hdr->cur_block;
|
||||
}
|
||||
|
||||
if( hdr->right_cnt == 0 || sum_dist_left < sum_dist_right )
|
||||
headElement = hdr->left;
|
||||
else
|
||||
headElement = hdr->right;
|
||||
}
|
||||
return(headElement);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*
|
||||
** CVSCAN( 1, 0 ) is Shortest Seek Time First (SSTF)
|
||||
** lowest average response time
|
||||
** CVSCAN( 1, infinity ) is SCAN
|
||||
** lowest response time standard deviation
|
||||
*/
|
||||
|
||||
|
||||
int rf_CvscanConfigure()
|
||||
{
|
||||
return(0);
|
||||
}
|
||||
|
||||
|
||||
|
||||
void *rf_CvscanCreate(RF_SectorCount_t sectPerDisk,
|
||||
RF_AllocListElem_t *clList,
|
||||
RF_ShutdownList_t **listp)
|
||||
{
|
||||
RF_CvscanHeader_t *hdr;
|
||||
long range = 2; /* Currently no mechanism to change these */
|
||||
long penalty = sectPerDisk / 5;
|
||||
|
||||
RF_MallocAndAdd(hdr, sizeof(RF_CvscanHeader_t), (RF_CvscanHeader_t *), clList);
|
||||
bzero((char *)hdr, sizeof(RF_CvscanHeader_t));
|
||||
hdr->range_for_avg = RF_MAX( range, 1 );
|
||||
hdr->change_penalty = RF_MAX( penalty, 0 );
|
||||
hdr->direction = rf_cvscan_RIGHT;
|
||||
hdr->cur_block = 0;
|
||||
hdr->left_cnt = hdr->right_cnt = 0;
|
||||
hdr->left = hdr->right = (RF_DiskQueueData_t *) NULL;
|
||||
hdr->burner = (RF_DiskQueueData_t *) NULL;
|
||||
DO_CHECK_STATE(hdr);
|
||||
|
||||
return( (void *) hdr );
|
||||
}
|
||||
|
||||
|
||||
#if defined(__NetBSD__) && defined(_KERNEL)
|
||||
/* PrintCvscanQueue is not used, so we ignore it... */
|
||||
#else
|
||||
static void PrintCvscanQueue(RF_CvscanHeader_t *hdr)
|
||||
{
|
||||
RF_DiskQueueData_t *tmp;
|
||||
|
||||
printf( "CVSCAN(%d,%d) at %d going %s\n",
|
||||
(int)hdr->range_for_avg,
|
||||
(int)hdr->change_penalty,
|
||||
(int)hdr->cur_block,
|
||||
(hdr->direction==rf_cvscan_LEFT)?"LEFT":"RIGHT" );
|
||||
printf( "\tLeft(%d): ", hdr->left_cnt );
|
||||
for( tmp = hdr->left; tmp != (RF_DiskQueueData_t *) NULL; tmp = tmp->next)
|
||||
printf( "(%d,%ld,%d) ",
|
||||
(int) tmp->sectorOffset,
|
||||
(long) (tmp->sectorOffset + tmp->numSector),
|
||||
tmp->priority );
|
||||
printf( "\n" );
|
||||
printf( "\tRight(%d): ", hdr->right_cnt );
|
||||
for( tmp = hdr->right; tmp != (RF_DiskQueueData_t *) NULL; tmp = tmp->next)
|
||||
printf( "(%d,%ld,%d) ",
|
||||
(int) tmp->sectorOffset,
|
||||
(long) (tmp->sectorOffset + tmp->numSector),
|
||||
tmp->priority );
|
||||
printf( "\n" );
|
||||
printf( "\tBurner: " );
|
||||
for( tmp = hdr->burner; tmp != (RF_DiskQueueData_t *) NULL; tmp = tmp->next)
|
||||
printf( "(%d,%ld,%d) ",
|
||||
(int) tmp->sectorOffset,
|
||||
(long) (tmp->sectorOffset + tmp->numSector),
|
||||
tmp->priority );
|
||||
printf( "\n" );
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
/* promotes reconstruction accesses for the given stripeID to normal priority.
|
||||
* returns 1 if an access was found and zero otherwise. Normally, we should
|
||||
* only have one or zero entries in the burner queue, so execution time should
|
||||
* be short.
|
||||
*/
|
||||
int rf_CvscanPromote(void *q_in, RF_StripeNum_t parityStripeID, RF_ReconUnitNum_t which_ru)
|
||||
{
|
||||
RF_CvscanHeader_t *hdr = (RF_CvscanHeader_t *) q_in;
|
||||
RF_DiskQueueData_t *trailer, *tmp = hdr->burner, *tlist = NULL;
|
||||
int retval=0;
|
||||
|
||||
DO_CHECK_STATE(hdr);
|
||||
while (tmp) { /* handle entries at the front of the list */
|
||||
if (tmp->parityStripeID == parityStripeID && tmp->which_ru == which_ru) {
|
||||
hdr->burner = tmp->next;
|
||||
tmp->priority = RF_IO_NORMAL_PRIORITY;
|
||||
tmp->next = tlist; tlist=tmp;
|
||||
tmp = hdr->burner;
|
||||
} else break;
|
||||
}
|
||||
if (tmp) {trailer=tmp; tmp=tmp->next;}
|
||||
while (tmp) { /* handle entries on the rest of the list */
|
||||
if (tmp->parityStripeID == parityStripeID && tmp->which_ru == which_ru) {
|
||||
trailer->next = tmp->next;
|
||||
tmp->priority = RF_IO_NORMAL_PRIORITY;
|
||||
tmp->next = tlist; tlist=tmp; /* insert on a temp queue */
|
||||
tmp = trailer->next;
|
||||
} else {
|
||||
trailer=tmp; tmp=tmp->next;
|
||||
}
|
||||
}
|
||||
while (tlist) {
|
||||
retval++;
|
||||
tmp = tlist->next;
|
||||
RealEnqueue(hdr, tlist);
|
||||
tlist = tmp;
|
||||
}
|
||||
RF_ASSERT(retval==0 || retval==1);
|
||||
DO_CHECK_STATE((RF_CvscanHeader_t *)q_in);
|
||||
return(retval);
|
||||
}
|
||||
|
|
@ -0,0 +1,96 @@
|
|||
/* $NetBSD: rf_cvscan.h,v 1.1 1998/11/13 04:20:27 oster Exp $ */
|
||||
/*
|
||||
* Copyright (c) 1995 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Author: Mark Holland
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and
|
||||
* its documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
||||
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*/
|
||||
|
||||
/*
|
||||
** Disk scheduling by CVSCAN( N, r )
|
||||
**
|
||||
** Given a set of requests, partition them into one set on each
|
||||
** side of the current arm position. The trick is to pick which
|
||||
** side you are going to service next; once a side is picked you will
|
||||
** service the closest request.
|
||||
** Let there be n1 requests on one side and n2 requests on the other
|
||||
** side. If one of n1 or n2 is zero, select the other side.
|
||||
** If both n1 and n2 are nonzero, select a "range" for examination
|
||||
** that is N' = min( n1, n2, N ). Average the distance from the
|
||||
** current position to the nearest N' requests on each side giving
|
||||
** d1 and d2.
|
||||
** Suppose the last decision was to move toward set 2, then the
|
||||
** current direction is toward set 2, and you will only switch to set
|
||||
** 1 if d1+R < d2 where R is r*(total number of cylinders), r in [0,1].
|
||||
**
|
||||
** I extend this by applying only to the set of requests that all
|
||||
** share the same, highest priority level.
|
||||
*/
|
||||
|
||||
/* :
|
||||
* Log: rf_cvscan.h,v
|
||||
* Revision 1.3 1996/06/07 22:26:27 jimz
|
||||
* type-ify which_ru (RF_ReconUnitNum_t)
|
||||
*
|
||||
* Revision 1.2 1996/06/07 21:33:04 jimz
|
||||
* begin using consistent types for sector numbers,
|
||||
* stripe numbers, row+col numbers, recon unit numbers
|
||||
*
|
||||
* Revision 1.1 1996/06/05 19:17:40 jimz
|
||||
* Initial revision
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _RF__RF_CVSCAN_H_
|
||||
#define _RF__RF_CVSCAN_H_
|
||||
|
||||
#include "rf_diskqueue.h"
|
||||
|
||||
typedef enum RF_CvscanArmDir_e {
|
||||
rf_cvscan_LEFT,
|
||||
rf_cvscan_RIGHT
|
||||
} RF_CvscanArmDir_t;
|
||||
|
||||
typedef struct RF_CvscanHeader_s {
|
||||
long range_for_avg; /* CVSCAN param N */
|
||||
long change_penalty; /* CVSCAN param R */
|
||||
RF_CvscanArmDir_t direction;
|
||||
RF_SectorNum_t cur_block;
|
||||
int nxt_priority;
|
||||
RF_DiskQueueData_t *left;
|
||||
int left_cnt;
|
||||
RF_DiskQueueData_t *right;
|
||||
int right_cnt;
|
||||
RF_DiskQueueData_t *burner;
|
||||
} RF_CvscanHeader_t;
|
||||
|
||||
int rf_CvscanConfigure(void);
|
||||
void *rf_CvscanCreate(RF_SectorCount_t sect_per_disk,
|
||||
RF_AllocListElem_t *cl_list, RF_ShutdownList_t **listp);
|
||||
void rf_CvscanEnqueue(void *qptr, RF_DiskQueueData_t *req, int priority);
|
||||
RF_DiskQueueData_t *rf_CvscanDequeue(void *qptr);
|
||||
RF_DiskQueueData_t *rf_CvscanPeek(void *qptr);
|
||||
int rf_CvscanPromote(void *qptr, RF_StripeNum_t parityStripeID,
|
||||
RF_ReconUnitNum_t which_ru);
|
||||
|
||||
#endif /* !_RF__RF_CVSCAN_H_ */
|
|
@ -0,0 +1,319 @@
|
|||
/* $NetBSD: rf_dag.h,v 1.1 1998/11/13 04:20:27 oster Exp $ */
|
||||
/*
|
||||
* Copyright (c) 1995 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Author: William V. Courtright II, Mark Holland
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and
|
||||
* its documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
||||
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*/
|
||||
|
||||
/****************************************************************************
|
||||
* *
|
||||
* dag.h -- header file for DAG-related data structures *
|
||||
* *
|
||||
****************************************************************************/
|
||||
/*
|
||||
*
|
||||
* :
|
||||
* Log: rf_dag.h,v
|
||||
* Revision 1.35 1996/11/05 18:38:37 jimz
|
||||
* add patch from galvarez@cs.ucsd.edu (Guillermo Alvarez)
|
||||
* to fix dag_params memory-sizing problem (should be an array
|
||||
* of the type, not an array of pointers to the type)
|
||||
*
|
||||
* Revision 1.34 1996/07/28 20:31:39 jimz
|
||||
* i386netbsd port
|
||||
* true/false fixup
|
||||
*
|
||||
* Revision 1.33 1996/07/22 19:52:16 jimz
|
||||
* switched node params to RF_DagParam_t, a union of
|
||||
* a 64-bit int and a void *, for better portability
|
||||
* attempted hpux port, but failed partway through for
|
||||
* lack of a single C compiler capable of compiling all
|
||||
* source files
|
||||
*
|
||||
* Revision 1.32 1996/06/10 22:22:13 wvcii
|
||||
* added two node status types for use in backward error
|
||||
* recovery experiments.
|
||||
*
|
||||
* Revision 1.31 1996/06/09 02:36:46 jimz
|
||||
* lots of little crufty cleanup- fixup whitespace
|
||||
* issues, comment #ifdefs, improve typing in some
|
||||
* places (esp size-related)
|
||||
*
|
||||
* Revision 1.30 1996/06/07 22:49:18 jimz
|
||||
* fix up raidPtr typing
|
||||
*
|
||||
* Revision 1.29 1996/05/27 18:56:37 jimz
|
||||
* more code cleanup
|
||||
* better typing
|
||||
* compiles in all 3 environments
|
||||
*
|
||||
* Revision 1.28 1996/05/24 22:17:04 jimz
|
||||
* continue code + namespace cleanup
|
||||
* typed a bunch of flags
|
||||
*
|
||||
* Revision 1.27 1996/05/24 04:28:55 jimz
|
||||
* release cleanup ckpt
|
||||
*
|
||||
* Revision 1.26 1996/05/23 21:46:35 jimz
|
||||
* checkpoint in code cleanup (release prep)
|
||||
* lots of types, function names have been fixed
|
||||
*
|
||||
* Revision 1.25 1996/05/23 00:33:23 jimz
|
||||
* code cleanup: move all debug decls to rf_options.c, all extern
|
||||
* debug decls to rf_options.h, all debug vars preceded by rf_
|
||||
*
|
||||
* Revision 1.24 1996/05/18 19:51:34 jimz
|
||||
* major code cleanup- fix syntax, make some types consistent,
|
||||
* add prototypes, clean out dead code, et cetera
|
||||
*
|
||||
* Revision 1.23 1996/05/16 23:05:20 jimz
|
||||
* Added dag_ptrs field, RF_DAG_PTRCACHESIZE
|
||||
*
|
||||
* The dag_ptrs field of the node is basically some scribble
|
||||
* space to be used here. We could get rid of it, and always
|
||||
* allocate the range of pointers, but that's expensive. So,
|
||||
* we pick a "common case" size for the pointer cache. Hopefully,
|
||||
* we'll find that:
|
||||
* (1) Generally, nptrs doesn't exceed RF_DAG_PTRCACHESIZE by
|
||||
* only a little bit (least efficient case)
|
||||
* (2) Generally, ntprs isn't a lot less than RF_DAG_PTRCACHESIZE
|
||||
* (wasted memory)
|
||||
*
|
||||
* Revision 1.22 1996/05/08 21:01:24 jimz
|
||||
* fixed up enum type names that were conflicting with other
|
||||
* enums and function names (ie, "panic")
|
||||
* future naming trends will be towards RF_ and rf_ for
|
||||
* everything raidframe-related
|
||||
*
|
||||
* Revision 1.21 1996/05/08 15:23:47 wvcii
|
||||
* added new node states: undone, recover, panic
|
||||
*
|
||||
* Revision 1.20 1995/12/01 14:59:19 root
|
||||
* increased MAX_ANTECEDENTS from 10 to 20
|
||||
* should consider getting rid of this (eliminate static array)
|
||||
*
|
||||
* Revision 1.19 1995/11/30 15:58:59 wvcii
|
||||
* added copyright info
|
||||
*
|
||||
* Revision 1.18 1995/11/19 16:27:03 wvcii
|
||||
* created struct dagList
|
||||
*
|
||||
* Revision 1.17 1995/11/07 15:43:01 wvcii
|
||||
* added static array to DAGnode: antType
|
||||
* added commitNode type
|
||||
* added commit node counts to dag header
|
||||
* added ptr (firstDag) to support multi-dag requests
|
||||
* added succedent done/fired counts to nodes to support rollback
|
||||
* added node status type "skipped"
|
||||
* added hdr status types "rollForward, rollBackward"
|
||||
* deleted hdr status type "disable"
|
||||
* updated ResetNode & ResetDAGHeader to zero new fields
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _RF__RF_DAG_H_
|
||||
#define _RF__RF_DAG_H_
|
||||
|
||||
#include "rf_types.h"
|
||||
#include "rf_threadstuff.h"
|
||||
#include "rf_alloclist.h"
|
||||
#include "rf_stripelocks.h"
|
||||
#include "rf_layout.h"
|
||||
#include "rf_dagflags.h"
|
||||
#include "rf_acctrace.h"
|
||||
#include "rf_memchunk.h"
|
||||
|
||||
#define RF_THREAD_CONTEXT 0 /* we were invoked from thread context */
|
||||
#define RF_INTR_CONTEXT 1 /* we were invoked from interrupt context */
|
||||
#define RF_MAX_ANTECEDENTS 20 /* max num of antecedents a node may posses */
|
||||
|
||||
#ifdef KERNEL
|
||||
#include <sys/buf.h>
|
||||
#endif /* KERNEL */
|
||||
|
||||
struct RF_PropHeader_s { /* structure for propagation of results */
|
||||
int resultNum; /* bind result # resultNum */
|
||||
int paramNum; /* to parameter # paramNum */
|
||||
RF_PropHeader_t *next; /* linked list for multiple results/params */
|
||||
};
|
||||
|
||||
typedef enum RF_NodeStatus_e {
|
||||
rf_bwd1, /* node is ready for undo logging (backward error recovery only) */
|
||||
rf_bwd2, /* node has completed undo logging (backward error recovery only) */
|
||||
rf_wait, /* node is waiting to be executed */
|
||||
rf_fired, /* node is currently executing its do function */
|
||||
rf_good, /* node successfully completed execution of its do function */
|
||||
rf_bad, /* node failed to successfully execute its do function */
|
||||
rf_skipped, /* not used anymore, used to imply a node was not executed */
|
||||
rf_recover, /* node is currently executing its undo function */
|
||||
rf_panic, /* node failed to successfully execute its undo function */
|
||||
rf_undone /* node successfully executed its undo function */
|
||||
} RF_NodeStatus_t;
|
||||
|
||||
/*
|
||||
* These were used to control skipping a node.
|
||||
* Now, these are only used as comments.
|
||||
*/
|
||||
typedef enum RF_AntecedentType_e {
|
||||
rf_trueData,
|
||||
rf_antiData,
|
||||
rf_outputData,
|
||||
rf_control
|
||||
} RF_AntecedentType_t;
|
||||
|
||||
#define RF_DAG_PTRCACHESIZE 40
|
||||
#define RF_DAG_PARAMCACHESIZE 12
|
||||
|
||||
typedef RF_uint8 RF_DagNodeFlags_t;
|
||||
|
||||
struct RF_DagNode_s {
|
||||
RF_NodeStatus_t status; /* current status of this node */
|
||||
int (*doFunc)(RF_DagNode_t *); /* normal function */
|
||||
int (*undoFunc)(RF_DagNode_t *); /* func to remove effect of doFunc */
|
||||
int (*wakeFunc)(RF_DagNode_t *, int status); /* func called when the node completes an I/O */
|
||||
int numParams; /* number of parameters required by *funcPtr */
|
||||
int numResults; /* number of results produced by *funcPtr */
|
||||
int numAntecedents; /* number of antecedents */
|
||||
int numAntDone; /* number of antecedents which have finished */
|
||||
int numSuccedents; /* number of succedents */
|
||||
int numSuccFired; /* incremented when a succedent is fired during forward execution */
|
||||
int numSuccDone; /* incremented when a succedent finishes during rollBackward */
|
||||
int commitNode; /* boolean flag - if true, this is a commit node */
|
||||
RF_DagNode_t **succedents; /* succedents, array size numSuccedents */
|
||||
RF_DagNode_t **antecedents; /* antecedents, array size numAntecedents */
|
||||
RF_AntecedentType_t antType[RF_MAX_ANTECEDENTS]; /* type of each antecedent */
|
||||
void **results; /* array of results produced by *funcPtr */
|
||||
RF_DagParam_t *params; /* array of parameters required by *funcPtr */
|
||||
RF_PropHeader_t **propList; /* propagation list, size numSuccedents */
|
||||
RF_DagHeader_t *dagHdr; /* ptr to head of dag containing this node */
|
||||
void *dagFuncData; /* dag execution func uses this for whatever it wants */
|
||||
RF_DagNode_t *next;
|
||||
int nodeNum; /* used by PrintDAG for debug only */
|
||||
int visited; /* used to avoid re-visiting nodes on DAG walks */
|
||||
/* ANY CODE THAT USES THIS FIELD MUST MAINTAIN THE PROPERTY
|
||||
* THAT AFTER IT FINISHES, ALL VISITED FLAGS IN THE DAG ARE IDENTICAL */
|
||||
char *name; /* debug only */
|
||||
RF_DagNodeFlags_t flags; /* see below */
|
||||
RF_DagNode_t *dag_ptrs[RF_DAG_PTRCACHESIZE]; /* cache for performance */
|
||||
RF_DagParam_t dag_params[RF_DAG_PARAMCACHESIZE]; /* cache for performance */
|
||||
};
|
||||
|
||||
/*
|
||||
* Bit values for flags field of RF_DagNode_t
|
||||
*/
|
||||
#define RF_DAGNODE_FLAG_NONE 0x00
|
||||
#define RF_DAGNODE_FLAG_YIELD 0x01 /* in the kernel, yield the processor before firing this node */
|
||||
|
||||
/* enable - DAG ready for normal execution, no errors encountered
|
||||
* rollForward - DAG encountered an error after commit point, rolling forward
|
||||
* rollBackward - DAG encountered an error prior to commit point, rolling backward
|
||||
*/
|
||||
typedef enum RF_DagStatus_e {
|
||||
rf_enable,
|
||||
rf_rollForward,
|
||||
rf_rollBackward
|
||||
} RF_DagStatus_t;
|
||||
|
||||
#define RF_MAX_HDR_SUCC 1
|
||||
|
||||
#define RF_MAXCHUNKS 10
|
||||
|
||||
struct RF_DagHeader_s {
|
||||
RF_DagStatus_t status; /* status of this DAG */
|
||||
int numSuccedents; /* DAG may be a tree, i.e. may have > 1 root */
|
||||
int numCommitNodes; /* number of commit nodes in graph */
|
||||
int numCommits; /* number of commit nodes which have been fired */
|
||||
RF_DagNode_t *succedents[RF_MAX_HDR_SUCC]; /* array of succedents, size numSuccedents */
|
||||
RF_DagHeader_t *next; /* ptr to allow a list of dags */
|
||||
RF_AllocListElem_t *allocList; /* ptr to list of ptrs to be freed prior to freeing DAG */
|
||||
RF_AccessStripeMapHeader_t *asmList; /* list of access stripe maps to be freed */
|
||||
int nodeNum; /* used by PrintDAG for debug only */
|
||||
int numNodesCompleted;
|
||||
RF_AccTraceEntry_t *tracerec; /* perf mon only */
|
||||
|
||||
void (*cbFunc)(void *); /* function to call when the dag completes */
|
||||
void *cbArg; /* argument for cbFunc */
|
||||
char *creator; /* name of function used to create this dag */
|
||||
|
||||
RF_Raid_t *raidPtr; /* the descriptor for the RAID device this DAG is for */
|
||||
void *bp; /* the bp for this I/O passed down from the file system. ignored outside kernel */
|
||||
|
||||
RF_ChunkDesc_t *memChunk[RF_MAXCHUNKS]; /* experimental- Chunks of memory to be retained upon DAG free for re-use */
|
||||
int chunkIndex; /* the idea is to avoid calls to alloc and free */
|
||||
|
||||
RF_ChunkDesc_t **xtraMemChunk; /* escape hatch which allows SelectAlgorithm to merge memChunks from several dags */
|
||||
int xtraChunkIndex; /* number of ptrs to valid chunks */
|
||||
int xtraChunkCnt; /* number of ptrs to chunks allocated */
|
||||
|
||||
#ifdef SIMULATE
|
||||
int done; /* Tag to tell if termination node has been fired */
|
||||
#endif /* SIMULATE */
|
||||
};
|
||||
|
||||
struct RF_DagList_s {
|
||||
/* common info for a list of dags which will be fired sequentially */
|
||||
int numDags; /* number of dags in the list */
|
||||
int numDagsFired; /* number of dags in list which have initiated execution */
|
||||
int numDagsDone; /* number of dags in list which have completed execution */
|
||||
RF_DagHeader_t *dags; /* list of dags */
|
||||
RF_RaidAccessDesc_t *desc; /* ptr to descriptor for this access */
|
||||
RF_AccTraceEntry_t tracerec; /* perf mon info for dags (not user info) */
|
||||
};
|
||||
|
||||
/* resets a node so that it can be fired again */
|
||||
#define RF_ResetNode(_n_) { \
|
||||
(_n_)->status = rf_wait; \
|
||||
(_n_)->numAntDone = 0; \
|
||||
(_n_)->numSuccFired = 0; \
|
||||
(_n_)->numSuccDone = 0; \
|
||||
(_n_)->next = NULL; \
|
||||
}
|
||||
|
||||
#ifdef SIMULATE
|
||||
#define RF_ResetDagHeader(_h_) { \
|
||||
(_h_)->done = RF_FALSE; \
|
||||
(_h_)->numNodesCompleted = 0; \
|
||||
(_h_)->numCommits = 0; \
|
||||
(_h_)->status = rf_enable; \
|
||||
}
|
||||
#else /* SIMULATE */
|
||||
#define RF_ResetDagHeader(_h_) { \
|
||||
(_h_)->numNodesCompleted = 0; \
|
||||
(_h_)->numCommits = 0; \
|
||||
(_h_)->status = rf_enable; \
|
||||
}
|
||||
#endif /* SIMULATE */
|
||||
|
||||
/* convience macro for declaring a create dag function */
|
||||
|
||||
#define RF_CREATE_DAG_FUNC_DECL(_name_) \
|
||||
void _name_ ( \
|
||||
RF_Raid_t *raidPtr, \
|
||||
RF_AccessStripeMap_t *asmap, \
|
||||
RF_DagHeader_t *dag_h, \
|
||||
void *bp, \
|
||||
RF_RaidAccessFlags_t flags, \
|
||||
RF_AllocListElem_t *allocList)
|
||||
|
||||
#endif /* !_RF__RF_DAG_H_ */
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,87 @@
|
|||
/* $NetBSD: rf_dagdegrd.h,v 1.1 1998/11/13 04:20:27 oster Exp $ */
|
||||
/*
|
||||
* Copyright (c) 1995 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Author: Mark Holland, Daniel Stodolsky, William V. Courtright II
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and
|
||||
* its documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
||||
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*/
|
||||
|
||||
/*
|
||||
* :
|
||||
* Log: rf_dagdegrd.h,v
|
||||
* Revision 1.6 1996/07/31 16:29:06 jimz
|
||||
* asm/asmap re-fix (EO merge)
|
||||
*
|
||||
* Revision 1.5 1996/07/31 15:34:40 jimz
|
||||
* evenodd changes; bugfixes for double-degraded archs, generalize
|
||||
* some formerly PQ-only functions
|
||||
*
|
||||
* Revision 1.4 1996/07/22 19:52:16 jimz
|
||||
* switched node params to RF_DagParam_t, a union of
|
||||
* a 64-bit int and a void *, for better portability
|
||||
* attempted hpux port, but failed partway through for
|
||||
* lack of a single C compiler capable of compiling all
|
||||
* source files
|
||||
*
|
||||
* Revision 1.3 1996/05/24 22:17:04 jimz
|
||||
* continue code + namespace cleanup
|
||||
* typed a bunch of flags
|
||||
*
|
||||
* Revision 1.2 1996/05/23 21:46:35 jimz
|
||||
* checkpoint in code cleanup (release prep)
|
||||
* lots of types, function names have been fixed
|
||||
*
|
||||
* Revision 1.1 1996/05/03 19:22:06 wvcii
|
||||
* Initial revision
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _RF__RF_DAGDEGRD_H_
|
||||
#define _RF__RF_DAGDEGRD_H_
|
||||
|
||||
#include "rf_types.h"
|
||||
|
||||
/* degraded read DAG creation routines */
|
||||
void rf_CreateRaidFiveDegradedReadDAG(RF_Raid_t *raidPtr,
|
||||
RF_AccessStripeMap_t *asmap, RF_DagHeader_t *dag_h, void *bp,
|
||||
RF_RaidAccessFlags_t flags, RF_AllocListElem_t *allocList);
|
||||
void rf_CreateRaidOneDegradedReadDAG(RF_Raid_t *raidPtr,
|
||||
RF_AccessStripeMap_t *asmap, RF_DagHeader_t *dag_h, void *bp,
|
||||
RF_RaidAccessFlags_t flags, RF_AllocListElem_t *allocList);
|
||||
void rf_CreateDegradedReadDAG(RF_Raid_t *raidPtr,
|
||||
RF_AccessStripeMap_t *asmap, RF_DagHeader_t *dag_h, void *bp,
|
||||
RF_RaidAccessFlags_t flags, RF_AllocListElem_t *allocList,
|
||||
RF_RedFuncs_t *recFunc);
|
||||
void rf_CreateRaidCDegradedReadDAG(RF_Raid_t *raidPtr,
|
||||
RF_AccessStripeMap_t *asmap, RF_DagHeader_t *dag_h, void *bp,
|
||||
RF_RaidAccessFlags_t flags, RF_AllocListElem_t *allocList);
|
||||
void rf_DD_GenerateFailedAccessASMs(RF_Raid_t *raidPtr,
|
||||
RF_AccessStripeMap_t *asmap, RF_PhysDiskAddr_t **pdap,
|
||||
int *nNodep, RF_PhysDiskAddr_t **pqpdap, int *nPQNodep,
|
||||
RF_AllocListElem_t *allocList);
|
||||
void rf_DoubleDegRead(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
|
||||
RF_DagHeader_t *dag_h, void *bp, RF_RaidAccessFlags_t flags,
|
||||
RF_AllocListElem_t *allocList, char *redundantReadNodeName,
|
||||
char *recoveryNodeName, int (*recovFunc)(RF_DagNode_t *));
|
||||
|
||||
#endif /* !_RF__RF_DAGDEGRD_H_ */
|
|
@ -0,0 +1,968 @@
|
|||
/* $NetBSD: rf_dagdegwr.c,v 1.1 1998/11/13 04:20:27 oster Exp $ */
|
||||
/*
|
||||
* Copyright (c) 1995 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Author: Mark Holland, Daniel Stodolsky, William V. Courtright II
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and
|
||||
* its documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
||||
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*/
|
||||
|
||||
/*
|
||||
* rf_dagdegwr.c
|
||||
*
|
||||
* code for creating degraded write DAGs
|
||||
*
|
||||
* :
|
||||
* Log: rf_dagdegwr.c,v
|
||||
* Revision 1.23 1996/11/05 21:10:40 jimz
|
||||
* failed pda generalization
|
||||
*
|
||||
* Revision 1.22 1996/08/23 14:49:48 jimz
|
||||
* remove bogus assert from small write double deg DAG generator
|
||||
*
|
||||
* Revision 1.21 1996/08/21 05:09:44 jimz
|
||||
* get rid of bogus fakery in DoubleDegSmallWrite
|
||||
*
|
||||
* Revision 1.20 1996/08/21 04:14:35 jimz
|
||||
* cleanup doubledegsmallwrite
|
||||
* NOTE: we need doubledeglargewrite
|
||||
*
|
||||
* Revision 1.19 1996/08/19 21:39:38 jimz
|
||||
* CommonCreateSimpleDegradedWriteDAG() was unable to correctly create DAGs for
|
||||
* complete stripe overwrite accesses- it assumed the necessity to read old
|
||||
* data. Rather than do the "right" thing, and risk breaking a critical DAG so
|
||||
* close to release, I made a no-op read node to stick in and link up in this
|
||||
* case. Seems to work.
|
||||
*
|
||||
* Revision 1.18 1996/07/31 15:35:34 jimz
|
||||
* evenodd changes; bugfixes for double-degraded archs, generalize
|
||||
* some formerly PQ-only functions
|
||||
*
|
||||
* Revision 1.17 1996/07/28 20:31:39 jimz
|
||||
* i386netbsd port
|
||||
* true/false fixup
|
||||
*
|
||||
* Revision 1.16 1996/07/27 23:36:08 jimz
|
||||
* Solaris port of simulator
|
||||
*
|
||||
* Revision 1.15 1996/07/27 16:30:19 jimz
|
||||
* cleanup sweep
|
||||
*
|
||||
* Revision 1.14 1996/07/22 19:52:16 jimz
|
||||
* switched node params to RF_DagParam_t, a union of
|
||||
* a 64-bit int and a void *, for better portability
|
||||
* attempted hpux port, but failed partway through for
|
||||
* lack of a single C compiler capable of compiling all
|
||||
* source files
|
||||
*
|
||||
* Revision 1.13 1996/06/09 02:36:46 jimz
|
||||
* lots of little crufty cleanup- fixup whitespace
|
||||
* issues, comment #ifdefs, improve typing in some
|
||||
* places (esp size-related)
|
||||
*
|
||||
* Revision 1.12 1996/06/07 22:26:27 jimz
|
||||
* type-ify which_ru (RF_ReconUnitNum_t)
|
||||
*
|
||||
* Revision 1.11 1996/06/07 21:33:04 jimz
|
||||
* begin using consistent types for sector numbers,
|
||||
* stripe numbers, row+col numbers, recon unit numbers
|
||||
*
|
||||
* Revision 1.10 1996/05/31 22:26:54 jimz
|
||||
* fix a lot of mapping problems, memory allocation problems
|
||||
* found some weird lock issues, fixed 'em
|
||||
* more code cleanup
|
||||
*
|
||||
* Revision 1.9 1996/05/30 11:29:41 jimz
|
||||
* Numerous bug fixes. Stripe lock release code disagreed with the taking code
|
||||
* about when stripes should be locked (I made it consistent: no parity, no lock)
|
||||
* There was a lot of extra serialization of I/Os which I've removed- a lot of
|
||||
* it was to calculate values for the cache code, which is no longer with us.
|
||||
* More types, function, macro cleanup. Added code to properly quiesce the array
|
||||
* on shutdown. Made a lot of stuff array-specific which was (bogusly) general
|
||||
* before. Fixed memory allocation, freeing bugs.
|
||||
*
|
||||
* Revision 1.8 1996/05/27 18:56:37 jimz
|
||||
* more code cleanup
|
||||
* better typing
|
||||
* compiles in all 3 environments
|
||||
*
|
||||
* Revision 1.7 1996/05/24 22:17:04 jimz
|
||||
* continue code + namespace cleanup
|
||||
* typed a bunch of flags
|
||||
*
|
||||
* Revision 1.6 1996/05/24 04:28:55 jimz
|
||||
* release cleanup ckpt
|
||||
*
|
||||
* Revision 1.5 1996/05/23 21:46:35 jimz
|
||||
* checkpoint in code cleanup (release prep)
|
||||
* lots of types, function names have been fixed
|
||||
*
|
||||
* Revision 1.4 1996/05/23 00:33:23 jimz
|
||||
* code cleanup: move all debug decls to rf_options.c, all extern
|
||||
* debug decls to rf_options.h, all debug vars preceded by rf_
|
||||
*
|
||||
* Revision 1.3 1996/05/18 19:51:34 jimz
|
||||
* major code cleanup- fix syntax, make some types consistent,
|
||||
* add prototypes, clean out dead code, et cetera
|
||||
*
|
||||
* Revision 1.2 1996/05/08 21:01:24 jimz
|
||||
* fixed up enum type names that were conflicting with other
|
||||
* enums and function names (ie, "panic")
|
||||
* future naming trends will be towards RF_ and rf_ for
|
||||
* everything raidframe-related
|
||||
*
|
||||
* Revision 1.1 1996/05/03 19:21:50 wvcii
|
||||
* Initial revision
|
||||
*
|
||||
*/
|
||||
|
||||
#include "rf_types.h"
|
||||
#include "rf_raid.h"
|
||||
#include "rf_dag.h"
|
||||
#include "rf_dagutils.h"
|
||||
#include "rf_dagfuncs.h"
|
||||
#include "rf_threadid.h"
|
||||
#include "rf_debugMem.h"
|
||||
#include "rf_memchunk.h"
|
||||
#include "rf_general.h"
|
||||
#include "rf_dagdegwr.h"
|
||||
#include "rf_sys.h"
|
||||
|
||||
|
||||
/******************************************************************************
|
||||
*
|
||||
* General comments on DAG creation:
|
||||
*
|
||||
* All DAGs in this file use roll-away error recovery. Each DAG has a single
|
||||
* commit node, usually called "Cmt." If an error occurs before the Cmt node
|
||||
* is reached, the execution engine will halt forward execution and work
|
||||
* backward through the graph, executing the undo functions. Assuming that
|
||||
* each node in the graph prior to the Cmt node are undoable and atomic - or -
|
||||
* does not make changes to permanent state, the graph will fail atomically.
|
||||
* If an error occurs after the Cmt node executes, the engine will roll-forward
|
||||
* through the graph, blindly executing nodes until it reaches the end.
|
||||
* If a graph reaches the end, it is assumed to have completed successfully.
|
||||
*
|
||||
* A graph has only 1 Cmt node.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
/******************************************************************************
|
||||
*
|
||||
* The following wrappers map the standard DAG creation interface to the
|
||||
* DAG creation routines. Additionally, these wrappers enable experimentation
|
||||
* with new DAG structures by providing an extra level of indirection, allowing
|
||||
* the DAG creation routines to be replaced at this single point.
|
||||
*/
|
||||
|
||||
static RF_CREATE_DAG_FUNC_DECL(rf_CreateSimpleDegradedWriteDAG)
|
||||
{
|
||||
rf_CommonCreateSimpleDegradedWriteDAG(raidPtr, asmap, dag_h, bp,
|
||||
flags, allocList,1, rf_RecoveryXorFunc, RF_TRUE);
|
||||
}
|
||||
|
||||
void rf_CreateDegradedWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList)
|
||||
RF_Raid_t *raidPtr;
|
||||
RF_AccessStripeMap_t *asmap;
|
||||
RF_DagHeader_t *dag_h;
|
||||
void *bp;
|
||||
RF_RaidAccessFlags_t flags;
|
||||
RF_AllocListElem_t *allocList;
|
||||
{
|
||||
RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
|
||||
RF_PhysDiskAddr_t *failedPDA = asmap->failedPDAs[0];
|
||||
|
||||
RF_ASSERT( asmap->numDataFailed == 1 );
|
||||
dag_h->creator = "DegradedWriteDAG";
|
||||
|
||||
/* if the access writes only a portion of the failed unit, and also writes
|
||||
* some portion of at least one surviving unit, we create two DAGs, one for
|
||||
* the failed component and one for the non-failed component, and do them
|
||||
* sequentially. Note that the fact that we're accessing only a portion of
|
||||
* the failed unit indicates that the access either starts or ends in the
|
||||
* failed unit, and hence we need create only two dags. This is inefficient
|
||||
* in that the same data or parity can get read and written twice using this
|
||||
* structure. I need to fix this to do the access all at once.
|
||||
*/
|
||||
RF_ASSERT(!(asmap->numStripeUnitsAccessed != 1 && failedPDA->numSector != layoutPtr->sectorsPerStripeUnit));
|
||||
rf_CreateSimpleDegradedWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/******************************************************************************
|
||||
*
|
||||
* DAG creation code begins here
|
||||
*/
|
||||
|
||||
|
||||
|
||||
/******************************************************************************
|
||||
*
|
||||
* CommonCreateSimpleDegradedWriteDAG -- creates a DAG to do a degraded-mode
|
||||
* write, which is as follows
|
||||
*
|
||||
* / {Wnq} --\
|
||||
* hdr -> blockNode -> Rod -> Xor -> Cmt -> Wnp ----> unblock -> term
|
||||
* \ {Rod} / \ Wnd ---/
|
||||
* \ {Wnd} -/
|
||||
*
|
||||
* commit nodes: Xor, Wnd
|
||||
*
|
||||
* IMPORTANT:
|
||||
* This DAG generator does not work for double-degraded archs since it does not
|
||||
* generate Q
|
||||
*
|
||||
* This dag is essentially identical to the large-write dag, except that the
|
||||
* write to the failed data unit is suppressed.
|
||||
*
|
||||
* IMPORTANT: this dag does not work in the case where the access writes only
|
||||
* a portion of the failed unit, and also writes some portion of at least one
|
||||
* surviving SU. this case is handled in CreateDegradedWriteDAG above.
|
||||
*
|
||||
* The block & unblock nodes are leftovers from a previous version. They
|
||||
* do nothing, but I haven't deleted them because it would be a tremendous
|
||||
* effort to put them back in.
|
||||
*
|
||||
* This dag is used whenever a one of the data units in a write has failed.
|
||||
* If it is the parity unit that failed, the nonredundant write dag (below)
|
||||
* is used.
|
||||
*****************************************************************************/
|
||||
|
||||
void rf_CommonCreateSimpleDegradedWriteDAG(raidPtr, asmap, dag_h, bp, flags,
|
||||
allocList, nfaults, redFunc, allowBufferRecycle)
|
||||
RF_Raid_t *raidPtr;
|
||||
RF_AccessStripeMap_t *asmap;
|
||||
RF_DagHeader_t *dag_h;
|
||||
void *bp;
|
||||
RF_RaidAccessFlags_t flags;
|
||||
RF_AllocListElem_t *allocList;
|
||||
int nfaults;
|
||||
int (*redFunc)(RF_DagNode_t *);
|
||||
int allowBufferRecycle;
|
||||
{
|
||||
int nNodes, nRrdNodes, nWndNodes, nXorBufs, i, j, paramNum, rdnodesFaked;
|
||||
RF_DagNode_t *blockNode, *unblockNode, *wnpNode, *wnqNode, *termNode;
|
||||
RF_DagNode_t *nodes, *wndNodes, *rrdNodes, *xorNode, *commitNode;
|
||||
RF_SectorCount_t sectorsPerSU;
|
||||
RF_ReconUnitNum_t which_ru;
|
||||
char *xorTargetBuf = NULL; /* the target buffer for the XOR operation */
|
||||
char *overlappingPDAs; /* a temporary array of flags */
|
||||
RF_AccessStripeMapHeader_t *new_asm_h[2];
|
||||
RF_PhysDiskAddr_t *pda, *parityPDA;
|
||||
RF_StripeNum_t parityStripeID;
|
||||
RF_PhysDiskAddr_t *failedPDA;
|
||||
RF_RaidLayout_t *layoutPtr;
|
||||
|
||||
layoutPtr = &(raidPtr->Layout);
|
||||
parityStripeID = rf_RaidAddressToParityStripeID(layoutPtr, asmap->raidAddress,
|
||||
&which_ru);
|
||||
sectorsPerSU = layoutPtr->sectorsPerStripeUnit;
|
||||
/* failedPDA points to the pda within the asm that targets the failed disk */
|
||||
failedPDA = asmap->failedPDAs[0];
|
||||
|
||||
if (rf_dagDebug)
|
||||
printf("[Creating degraded-write DAG]\n");
|
||||
|
||||
RF_ASSERT( asmap->numDataFailed == 1 );
|
||||
dag_h->creator = "SimpleDegradedWriteDAG";
|
||||
|
||||
/*
|
||||
* Generate two ASMs identifying the surviving data
|
||||
* we need in order to recover the lost data.
|
||||
*/
|
||||
/* overlappingPDAs array must be zero'd */
|
||||
RF_Calloc(overlappingPDAs, asmap->numStripeUnitsAccessed, sizeof(char), (char *));
|
||||
rf_GenerateFailedAccessASMs(raidPtr, asmap, failedPDA, dag_h, new_asm_h,
|
||||
&nXorBufs, NULL, overlappingPDAs, allocList);
|
||||
|
||||
/* create all the nodes at once */
|
||||
nWndNodes = asmap->numStripeUnitsAccessed - 1; /* no access is generated
|
||||
* for the failed pda */
|
||||
|
||||
nRrdNodes = ((new_asm_h[0]) ? new_asm_h[0]->stripeMap->numStripeUnitsAccessed : 0) +
|
||||
((new_asm_h[1]) ? new_asm_h[1]->stripeMap->numStripeUnitsAccessed : 0);
|
||||
/*
|
||||
* XXX
|
||||
*
|
||||
* There's a bug with a complete stripe overwrite- that means 0 reads
|
||||
* of old data, and the rest of the DAG generation code doesn't like
|
||||
* that. A release is coming, and I don't wanna risk breaking a critical
|
||||
* DAG generator, so here's what I'm gonna do- if there's no read nodes,
|
||||
* I'm gonna fake there being a read node, and I'm gonna swap in a
|
||||
* no-op node in its place (to make all the link-up code happy).
|
||||
* This should be fixed at some point. --jimz
|
||||
*/
|
||||
if (nRrdNodes == 0) {
|
||||
nRrdNodes = 1;
|
||||
rdnodesFaked = 1;
|
||||
}
|
||||
else {
|
||||
rdnodesFaked = 0;
|
||||
}
|
||||
/* lock, unlock, xor, Wnd, Rrd, W(nfaults) */
|
||||
nNodes = 5 + nfaults + nWndNodes + nRrdNodes;
|
||||
RF_CallocAndAdd(nodes, nNodes, sizeof(RF_DagNode_t),
|
||||
(RF_DagNode_t *), allocList);
|
||||
i = 0;
|
||||
blockNode = &nodes[i]; i += 1;
|
||||
commitNode = &nodes[i]; i += 1;
|
||||
unblockNode = &nodes[i]; i += 1;
|
||||
termNode = &nodes[i]; i += 1;
|
||||
xorNode = &nodes[i]; i += 1;
|
||||
wnpNode = &nodes[i]; i += 1;
|
||||
wndNodes = &nodes[i]; i += nWndNodes;
|
||||
rrdNodes = &nodes[i]; i += nRrdNodes;
|
||||
if (nfaults == 2) {
|
||||
wnqNode = &nodes[i]; i += 1;
|
||||
}
|
||||
else {
|
||||
wnqNode = NULL;
|
||||
}
|
||||
RF_ASSERT(i == nNodes);
|
||||
|
||||
/* this dag can not commit until all rrd and xor Nodes have completed */
|
||||
dag_h->numCommitNodes = 1;
|
||||
dag_h->numCommits = 0;
|
||||
dag_h->numSuccedents = 1;
|
||||
|
||||
RF_ASSERT( nRrdNodes > 0 );
|
||||
rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc,
|
||||
NULL, nRrdNodes, 0, 0, 0, dag_h, "Nil", allocList);
|
||||
rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc,
|
||||
NULL, nWndNodes + nfaults, 1, 0, 0, dag_h, "Cmt", allocList);
|
||||
rf_InitNode(unblockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc,
|
||||
NULL, 1, nWndNodes + nfaults, 0, 0, dag_h, "Nil", allocList);
|
||||
rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc,
|
||||
NULL, 0, 1, 0, 0, dag_h, "Trm", allocList);
|
||||
rf_InitNode(xorNode, rf_wait, RF_FALSE, redFunc, rf_NullNodeUndoFunc, NULL, 1,
|
||||
nRrdNodes, 2*nXorBufs+2, nfaults, dag_h, "Xrc", allocList);
|
||||
|
||||
/*
|
||||
* Fill in the Rrd nodes. If any of the rrd buffers are the same size as
|
||||
* the failed buffer, save a pointer to it so we can use it as the target
|
||||
* of the XOR. The pdas in the rrd nodes have been range-restricted, so if
|
||||
* a buffer is the same size as the failed buffer, it must also be at the
|
||||
* same alignment within the SU.
|
||||
*/
|
||||
i = 0;
|
||||
if (new_asm_h[0]) {
|
||||
for (i=0, pda=new_asm_h[0]->stripeMap->physInfo;
|
||||
i<new_asm_h[0]->stripeMap->numStripeUnitsAccessed;
|
||||
i++, pda=pda->next)
|
||||
{
|
||||
rf_InitNode(&rrdNodes[i], rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc,
|
||||
rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rrd", allocList);
|
||||
RF_ASSERT(pda);
|
||||
rrdNodes[i].params[0].p = pda;
|
||||
rrdNodes[i].params[1].p = pda->bufPtr;
|
||||
rrdNodes[i].params[2].v = parityStripeID;
|
||||
rrdNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
|
||||
}
|
||||
}
|
||||
/* i now equals the number of stripe units accessed in new_asm_h[0] */
|
||||
if (new_asm_h[1]) {
|
||||
for (j=0,pda=new_asm_h[1]->stripeMap->physInfo;
|
||||
j<new_asm_h[1]->stripeMap->numStripeUnitsAccessed;
|
||||
j++, pda=pda->next)
|
||||
{
|
||||
rf_InitNode(&rrdNodes[i+j], rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc,
|
||||
rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rrd", allocList);
|
||||
RF_ASSERT(pda);
|
||||
rrdNodes[i+j].params[0].p = pda;
|
||||
rrdNodes[i+j].params[1].p = pda->bufPtr;
|
||||
rrdNodes[i+j].params[2].v = parityStripeID;
|
||||
rrdNodes[i+j].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
|
||||
if (allowBufferRecycle && (pda->numSector == failedPDA->numSector))
|
||||
xorTargetBuf = pda->bufPtr;
|
||||
}
|
||||
}
|
||||
if (rdnodesFaked) {
|
||||
/*
|
||||
* This is where we'll init that fake noop read node
|
||||
* (XXX should the wakeup func be different?)
|
||||
*/
|
||||
rf_InitNode(&rrdNodes[0], rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc,
|
||||
NULL, 1, 1, 0, 0, dag_h, "RrN", allocList);
|
||||
}
|
||||
|
||||
/*
|
||||
* Make a PDA for the parity unit. The parity PDA should start at
|
||||
* the same offset into the SU as the failed PDA.
|
||||
*/
|
||||
/*
|
||||
* Danner comment:
|
||||
* I don't think this copy is really necessary.
|
||||
* We are in one of two cases here.
|
||||
* (1) The entire failed unit is written. Then asmap->parityInfo will
|
||||
* describe the entire parity.
|
||||
* (2) We are only writing a subset of the failed unit and nothing
|
||||
* else. Then the asmap->parityInfo describes the failed unit and
|
||||
* the copy can also be avoided.
|
||||
*/
|
||||
|
||||
RF_MallocAndAdd(parityPDA, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList);
|
||||
parityPDA->row = asmap->parityInfo->row;
|
||||
parityPDA->col = asmap->parityInfo->col;
|
||||
parityPDA->startSector = ((asmap->parityInfo->startSector / sectorsPerSU)
|
||||
* sectorsPerSU) + (failedPDA->startSector % sectorsPerSU);
|
||||
parityPDA->numSector = failedPDA->numSector;
|
||||
|
||||
if (!xorTargetBuf) {
|
||||
RF_CallocAndAdd(xorTargetBuf, 1,
|
||||
rf_RaidAddressToByte(raidPtr, failedPDA->numSector), (char *), allocList);
|
||||
}
|
||||
|
||||
/* init the Wnp node */
|
||||
rf_InitNode(wnpNode, rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
|
||||
rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wnp", allocList);
|
||||
wnpNode->params[0].p = parityPDA;
|
||||
wnpNode->params[1].p = xorTargetBuf;
|
||||
wnpNode->params[2].v = parityStripeID;
|
||||
wnpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
|
||||
|
||||
/* fill in the Wnq Node */
|
||||
if (nfaults == 2) {
|
||||
{
|
||||
RF_MallocAndAdd(parityPDA, sizeof(RF_PhysDiskAddr_t),
|
||||
(RF_PhysDiskAddr_t *), allocList);
|
||||
parityPDA->row = asmap->qInfo->row;
|
||||
parityPDA->col = asmap->qInfo->col;
|
||||
parityPDA->startSector = ((asmap->qInfo->startSector / sectorsPerSU)
|
||||
* sectorsPerSU) + (failedPDA->startSector % sectorsPerSU);
|
||||
parityPDA->numSector = failedPDA->numSector;
|
||||
|
||||
rf_InitNode(wnqNode, rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
|
||||
rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wnq", allocList);
|
||||
wnqNode->params[0].p = parityPDA;
|
||||
RF_CallocAndAdd(xorNode->results[1], 1,
|
||||
rf_RaidAddressToByte(raidPtr, failedPDA->numSector), (char *), allocList);
|
||||
wnqNode->params[1].p = xorNode->results[1];
|
||||
wnqNode->params[2].v = parityStripeID;
|
||||
wnqNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
|
||||
}
|
||||
}
|
||||
|
||||
/* fill in the Wnd nodes */
|
||||
for (pda=asmap->physInfo, i=0; i<nWndNodes; i++, pda=pda->next) {
|
||||
if (pda == failedPDA) {
|
||||
i--;
|
||||
continue;
|
||||
}
|
||||
rf_InitNode(&wndNodes[i], rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
|
||||
rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wnd", allocList);
|
||||
RF_ASSERT(pda);
|
||||
wndNodes[i].params[0].p = pda;
|
||||
wndNodes[i].params[1].p = pda->bufPtr;
|
||||
wndNodes[i].params[2].v = parityStripeID;
|
||||
wndNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
|
||||
}
|
||||
|
||||
/* fill in the results of the xor node */
|
||||
xorNode->results[0] = xorTargetBuf;
|
||||
|
||||
/* fill in the params of the xor node */
|
||||
|
||||
paramNum=0;
|
||||
if (rdnodesFaked == 0) {
|
||||
for (i=0; i<nRrdNodes; i++) {
|
||||
/* all the Rrd nodes need to be xored together */
|
||||
xorNode->params[paramNum++] = rrdNodes[i].params[0];
|
||||
xorNode->params[paramNum++] = rrdNodes[i].params[1];
|
||||
}
|
||||
}
|
||||
for (i=0; i < nWndNodes; i++) {
|
||||
/* any Wnd nodes that overlap the failed access need to be xored in */
|
||||
if (overlappingPDAs[i]) {
|
||||
RF_MallocAndAdd(pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList);
|
||||
bcopy((char *)wndNodes[i].params[0].p, (char *)pda, sizeof(RF_PhysDiskAddr_t));
|
||||
rf_RangeRestrictPDA(raidPtr, failedPDA, pda, RF_RESTRICT_DOBUFFER, 0);
|
||||
xorNode->params[paramNum++].p = pda;
|
||||
xorNode->params[paramNum++].p = pda->bufPtr;
|
||||
}
|
||||
}
|
||||
RF_Free(overlappingPDAs, asmap->numStripeUnitsAccessed * sizeof(char));
|
||||
|
||||
/*
|
||||
* Install the failed PDA into the xor param list so that the
|
||||
* new data gets xor'd in.
|
||||
*/
|
||||
xorNode->params[paramNum++].p = failedPDA;
|
||||
xorNode->params[paramNum++].p = failedPDA->bufPtr;
|
||||
|
||||
/*
|
||||
* The last 2 params to the recovery xor node are always the failed
|
||||
* PDA and the raidPtr. install the failedPDA even though we have just
|
||||
* done so above. This allows us to use the same XOR function for both
|
||||
* degraded reads and degraded writes.
|
||||
*/
|
||||
xorNode->params[paramNum++].p = failedPDA;
|
||||
xorNode->params[paramNum++].p = raidPtr;
|
||||
RF_ASSERT( paramNum == 2*nXorBufs+2 );
|
||||
|
||||
/*
|
||||
* Code to link nodes begins here
|
||||
*/
|
||||
|
||||
/* link header to block node */
|
||||
RF_ASSERT(blockNode->numAntecedents == 0);
|
||||
dag_h->succedents[0] = blockNode;
|
||||
|
||||
/* link block node to rd nodes */
|
||||
RF_ASSERT(blockNode->numSuccedents == nRrdNodes);
|
||||
for (i = 0; i < nRrdNodes; i++) {
|
||||
RF_ASSERT(rrdNodes[i].numAntecedents == 1);
|
||||
blockNode->succedents[i] = &rrdNodes[i];
|
||||
rrdNodes[i].antecedents[0] = blockNode;
|
||||
rrdNodes[i].antType[0] = rf_control;
|
||||
}
|
||||
|
||||
/* link read nodes to xor node*/
|
||||
RF_ASSERT(xorNode->numAntecedents == nRrdNodes);
|
||||
for (i = 0; i < nRrdNodes; i++) {
|
||||
RF_ASSERT(rrdNodes[i].numSuccedents == 1);
|
||||
rrdNodes[i].succedents[0] = xorNode;
|
||||
xorNode->antecedents[i] = &rrdNodes[i];
|
||||
xorNode->antType[i] = rf_trueData;
|
||||
}
|
||||
|
||||
/* link xor node to commit node */
|
||||
RF_ASSERT(xorNode->numSuccedents == 1);
|
||||
RF_ASSERT(commitNode->numAntecedents == 1);
|
||||
xorNode->succedents[0] = commitNode;
|
||||
commitNode->antecedents[0] = xorNode;
|
||||
commitNode->antType[0] = rf_control;
|
||||
|
||||
/* link commit node to wnd nodes */
|
||||
RF_ASSERT(commitNode->numSuccedents == nfaults + nWndNodes);
|
||||
for (i = 0; i < nWndNodes; i++) {
|
||||
RF_ASSERT(wndNodes[i].numAntecedents == 1);
|
||||
commitNode->succedents[i] = &wndNodes[i];
|
||||
wndNodes[i].antecedents[0] = commitNode;
|
||||
wndNodes[i].antType[0] = rf_control;
|
||||
}
|
||||
|
||||
/* link the commit node to wnp, wnq nodes */
|
||||
RF_ASSERT(wnpNode->numAntecedents == 1);
|
||||
commitNode->succedents[nWndNodes] = wnpNode;
|
||||
wnpNode->antecedents[0] = commitNode;
|
||||
wnpNode->antType[0] = rf_control;
|
||||
if (nfaults == 2) {
|
||||
RF_ASSERT(wnqNode->numAntecedents == 1);
|
||||
commitNode->succedents[nWndNodes + 1] = wnqNode;
|
||||
wnqNode->antecedents[0] = commitNode;
|
||||
wnqNode->antType[0] = rf_control;
|
||||
}
|
||||
|
||||
/* link write new data nodes to unblock node */
|
||||
RF_ASSERT(unblockNode->numAntecedents == (nWndNodes + nfaults));
|
||||
for(i = 0; i < nWndNodes; i++) {
|
||||
RF_ASSERT(wndNodes[i].numSuccedents == 1);
|
||||
wndNodes[i].succedents[0] = unblockNode;
|
||||
unblockNode->antecedents[i] = &wndNodes[i];
|
||||
unblockNode->antType[i] = rf_control;
|
||||
}
|
||||
|
||||
/* link write new parity node to unblock node */
|
||||
RF_ASSERT(wnpNode->numSuccedents == 1);
|
||||
wnpNode->succedents[0] = unblockNode;
|
||||
unblockNode->antecedents[nWndNodes] = wnpNode;
|
||||
unblockNode->antType[nWndNodes] = rf_control;
|
||||
|
||||
/* link write new q node to unblock node */
|
||||
if (nfaults == 2) {
|
||||
RF_ASSERT(wnqNode->numSuccedents == 1);
|
||||
wnqNode->succedents[0] = unblockNode;
|
||||
unblockNode->antecedents[nWndNodes+1] = wnqNode;
|
||||
unblockNode->antType[nWndNodes+1] = rf_control;
|
||||
}
|
||||
|
||||
/* link unblock node to term node */
|
||||
RF_ASSERT(unblockNode->numSuccedents == 1);
|
||||
RF_ASSERT(termNode->numAntecedents == 1);
|
||||
RF_ASSERT(termNode->numSuccedents == 0);
|
||||
unblockNode->succedents[0] = termNode;
|
||||
termNode->antecedents[0] = unblockNode;
|
||||
termNode->antType[0] = rf_control;
|
||||
}
|
||||
|
||||
#define CONS_PDA(if,start,num) \
|
||||
pda_p->row = asmap->if->row; pda_p->col = asmap->if->col; \
|
||||
pda_p->startSector = ((asmap->if->startSector / secPerSU) * secPerSU) + start; \
|
||||
pda_p->numSector = num; \
|
||||
pda_p->next = NULL; \
|
||||
RF_MallocAndAdd(pda_p->bufPtr,rf_RaidAddressToByte(raidPtr,num),(char *), allocList)
|
||||
|
||||
void rf_WriteGenerateFailedAccessASMs(
|
||||
RF_Raid_t *raidPtr,
|
||||
RF_AccessStripeMap_t *asmap,
|
||||
RF_PhysDiskAddr_t **pdap,
|
||||
int *nNodep,
|
||||
RF_PhysDiskAddr_t **pqpdap,
|
||||
int *nPQNodep,
|
||||
RF_AllocListElem_t *allocList)
|
||||
{
|
||||
RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
|
||||
int PDAPerDisk,i;
|
||||
RF_SectorCount_t secPerSU = layoutPtr->sectorsPerStripeUnit;
|
||||
int numDataCol = layoutPtr->numDataCol;
|
||||
int state;
|
||||
unsigned napdas;
|
||||
RF_SectorNum_t fone_start, fone_end, ftwo_start = 0, ftwo_end;
|
||||
RF_PhysDiskAddr_t *fone = asmap->failedPDAs[0], *ftwo = asmap->failedPDAs[1];
|
||||
RF_PhysDiskAddr_t *pda_p;
|
||||
RF_RaidAddr_t sosAddr;
|
||||
|
||||
/* determine how many pda's we will have to generate per unaccess stripe.
|
||||
If there is only one failed data unit, it is one; if two, possibly two,
|
||||
depending wether they overlap. */
|
||||
|
||||
fone_start = rf_StripeUnitOffset(layoutPtr,fone->startSector);
|
||||
fone_end = fone_start + fone->numSector;
|
||||
|
||||
if (asmap->numDataFailed==1)
|
||||
{
|
||||
PDAPerDisk = 1;
|
||||
state = 1;
|
||||
RF_MallocAndAdd(*pqpdap,2*sizeof(RF_PhysDiskAddr_t),(RF_PhysDiskAddr_t *), allocList);
|
||||
pda_p = *pqpdap;
|
||||
/* build p */
|
||||
CONS_PDA(parityInfo,fone_start,fone->numSector);
|
||||
pda_p->type = RF_PDA_TYPE_PARITY;
|
||||
pda_p++;
|
||||
/* build q */
|
||||
CONS_PDA(qInfo,fone_start,fone->numSector);
|
||||
pda_p->type = RF_PDA_TYPE_Q;
|
||||
}
|
||||
else
|
||||
{
|
||||
ftwo_start = rf_StripeUnitOffset(layoutPtr,ftwo->startSector);
|
||||
ftwo_end = ftwo_start + ftwo->numSector;
|
||||
if (fone->numSector + ftwo->numSector > secPerSU)
|
||||
{
|
||||
PDAPerDisk = 1;
|
||||
state = 2;
|
||||
RF_MallocAndAdd(*pqpdap,2*sizeof(RF_PhysDiskAddr_t),(RF_PhysDiskAddr_t *), allocList);
|
||||
pda_p = *pqpdap;
|
||||
CONS_PDA(parityInfo,0,secPerSU);
|
||||
pda_p->type = RF_PDA_TYPE_PARITY;
|
||||
pda_p++;
|
||||
CONS_PDA(qInfo,0,secPerSU);
|
||||
pda_p->type = RF_PDA_TYPE_Q;
|
||||
}
|
||||
else
|
||||
{
|
||||
PDAPerDisk = 2;
|
||||
state = 3;
|
||||
/* four of them, fone, then ftwo */
|
||||
RF_MallocAndAdd(*pqpdap,4*sizeof(RF_PhysDiskAddr_t),(RF_PhysDiskAddr_t *), allocList);
|
||||
pda_p = *pqpdap;
|
||||
CONS_PDA(parityInfo,fone_start,fone->numSector);
|
||||
pda_p->type = RF_PDA_TYPE_PARITY;
|
||||
pda_p++;
|
||||
CONS_PDA(qInfo,fone_start,fone->numSector);
|
||||
pda_p->type = RF_PDA_TYPE_Q;
|
||||
pda_p++;
|
||||
CONS_PDA(parityInfo,ftwo_start,ftwo->numSector);
|
||||
pda_p->type = RF_PDA_TYPE_PARITY;
|
||||
pda_p++;
|
||||
CONS_PDA(qInfo,ftwo_start,ftwo->numSector);
|
||||
pda_p->type = RF_PDA_TYPE_Q;
|
||||
}
|
||||
}
|
||||
/* figure out number of nonaccessed pda */
|
||||
napdas = PDAPerDisk * (numDataCol - 2);
|
||||
*nPQNodep = PDAPerDisk;
|
||||
|
||||
*nNodep = napdas;
|
||||
if (napdas == 0) return; /* short circuit */
|
||||
|
||||
/* allocate up our list of pda's */
|
||||
|
||||
RF_CallocAndAdd(pda_p, napdas, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList);
|
||||
*pdap = pda_p;
|
||||
|
||||
/* linkem together */
|
||||
for (i=0; i < (napdas-1); i++)
|
||||
pda_p[i].next = pda_p+(i+1);
|
||||
|
||||
sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress);
|
||||
for (i=0; i < numDataCol; i++)
|
||||
{
|
||||
if ((pda_p - (*pdap)) == napdas)
|
||||
continue;
|
||||
pda_p->type = RF_PDA_TYPE_DATA;
|
||||
pda_p->raidAddress = sosAddr + (i * secPerSU);
|
||||
(raidPtr->Layout.map->MapSector)(raidPtr,pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0);
|
||||
/* skip over dead disks */
|
||||
if (RF_DEAD_DISK(raidPtr->Disks[pda_p->row][pda_p->col].status))
|
||||
continue;
|
||||
switch (state)
|
||||
{
|
||||
case 1: /* fone */
|
||||
pda_p->numSector = fone->numSector;
|
||||
pda_p->raidAddress += fone_start;
|
||||
pda_p->startSector += fone_start;
|
||||
RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr,pda_p->numSector), (char *), allocList);
|
||||
break;
|
||||
case 2: /* full stripe */
|
||||
pda_p->numSector = secPerSU;
|
||||
RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr,secPerSU), (char *), allocList);
|
||||
break;
|
||||
case 3: /* two slabs */
|
||||
pda_p->numSector = fone->numSector;
|
||||
pda_p->raidAddress += fone_start;
|
||||
pda_p->startSector += fone_start;
|
||||
RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr,pda_p->numSector), (char *), allocList);
|
||||
pda_p++;
|
||||
pda_p->type = RF_PDA_TYPE_DATA;
|
||||
pda_p->raidAddress = sosAddr + (i * secPerSU);
|
||||
(raidPtr->Layout.map->MapSector)(raidPtr,pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0);
|
||||
pda_p->numSector = ftwo->numSector;
|
||||
pda_p->raidAddress += ftwo_start;
|
||||
pda_p->startSector += ftwo_start;
|
||||
RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr,pda_p->numSector), (char *), allocList);
|
||||
break;
|
||||
default:
|
||||
RF_PANIC();
|
||||
}
|
||||
pda_p++;
|
||||
}
|
||||
|
||||
RF_ASSERT (pda_p - *pdap == napdas);
|
||||
return;
|
||||
}
|
||||
|
||||
#define DISK_NODE_PDA(node) ((node)->params[0].p)
|
||||
|
||||
#define DISK_NODE_PARAMS(_node_,_p_) \
|
||||
(_node_).params[0].p = _p_ ; \
|
||||
(_node_).params[1].p = (_p_)->bufPtr; \
|
||||
(_node_).params[2].v = parityStripeID; \
|
||||
(_node_).params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru)
|
||||
|
||||
void rf_DoubleDegSmallWrite(
|
||||
RF_Raid_t *raidPtr,
|
||||
RF_AccessStripeMap_t *asmap,
|
||||
RF_DagHeader_t *dag_h,
|
||||
void *bp,
|
||||
RF_RaidAccessFlags_t flags,
|
||||
RF_AllocListElem_t *allocList,
|
||||
char *redundantReadNodeName,
|
||||
char *redundantWriteNodeName,
|
||||
char *recoveryNodeName,
|
||||
int (*recovFunc)(RF_DagNode_t *))
|
||||
{
|
||||
RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
|
||||
RF_DagNode_t *nodes, *wudNodes, *rrdNodes, *recoveryNode, *blockNode, *unblockNode, *rpNodes,*rqNodes, *wpNodes, *wqNodes, *termNode;
|
||||
RF_PhysDiskAddr_t *pda, *pqPDAs;
|
||||
RF_PhysDiskAddr_t *npdas;
|
||||
int nWriteNodes, nNodes, nReadNodes, nRrdNodes, nWudNodes, i;
|
||||
RF_ReconUnitNum_t which_ru;
|
||||
int nPQNodes;
|
||||
RF_StripeNum_t parityStripeID = rf_RaidAddressToParityStripeID(layoutPtr, asmap->raidAddress, &which_ru);
|
||||
|
||||
/* simple small write case -
|
||||
First part looks like a reconstruct-read of the failed data units.
|
||||
Then a write of all data units not failed. */
|
||||
|
||||
|
||||
/*
|
||||
Hdr
|
||||
|
|
||||
------Block-
|
||||
/ / \
|
||||
Rrd Rrd ... Rrd Rp Rq
|
||||
\ \ /
|
||||
-------PQ-----
|
||||
/ \ \
|
||||
Wud Wp WQ
|
||||
\ | /
|
||||
--Unblock-
|
||||
|
|
||||
T
|
||||
|
||||
Rrd = read recovery data (potentially none)
|
||||
Wud = write user data (not incl. failed disks)
|
||||
Wp = Write P (could be two)
|
||||
Wq = Write Q (could be two)
|
||||
|
||||
*/
|
||||
|
||||
rf_WriteGenerateFailedAccessASMs(raidPtr, asmap, &npdas, &nRrdNodes, &pqPDAs, &nPQNodes,allocList);
|
||||
|
||||
RF_ASSERT(asmap->numDataFailed == 1);
|
||||
|
||||
nWudNodes = asmap->numStripeUnitsAccessed - (asmap->numDataFailed);
|
||||
nReadNodes = nRrdNodes + 2*nPQNodes;
|
||||
nWriteNodes = nWudNodes+ 2*nPQNodes;
|
||||
nNodes = 4 + nReadNodes + nWriteNodes;
|
||||
|
||||
RF_CallocAndAdd(nodes, nNodes, sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList);
|
||||
blockNode = nodes;
|
||||
unblockNode = blockNode+1;
|
||||
termNode = unblockNode+1;
|
||||
recoveryNode = termNode+1;
|
||||
rrdNodes = recoveryNode+1;
|
||||
rpNodes = rrdNodes + nRrdNodes;
|
||||
rqNodes = rpNodes + nPQNodes;
|
||||
wudNodes = rqNodes + nPQNodes;
|
||||
wpNodes = wudNodes + nWudNodes;
|
||||
wqNodes = wpNodes + nPQNodes;
|
||||
|
||||
dag_h->creator = "PQ_DDSimpleSmallWrite";
|
||||
dag_h->numSuccedents = 1;
|
||||
dag_h->succedents[0] = blockNode;
|
||||
rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, dag_h, "Trm", allocList);
|
||||
termNode->antecedents[0] = unblockNode;
|
||||
termNode->antType[0] = rf_control;
|
||||
|
||||
/* init the block and unblock nodes */
|
||||
/* The block node has all the read nodes as successors */
|
||||
rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, nReadNodes, 0, 0, 0, dag_h, "Nil", allocList);
|
||||
for (i=0; i < nReadNodes; i++)
|
||||
blockNode->succedents[i] = rrdNodes+i;
|
||||
|
||||
/* The unblock node has all the writes as successors */
|
||||
rf_InitNode(unblockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, 1, nWriteNodes, 0, 0, dag_h, "Nil", allocList);
|
||||
for (i=0; i < nWriteNodes; i++) {
|
||||
unblockNode->antecedents[i] = wudNodes+i;
|
||||
unblockNode->antType[i] = rf_control;
|
||||
}
|
||||
unblockNode->succedents[0] = termNode;
|
||||
|
||||
#define INIT_READ_NODE(node,name) \
|
||||
rf_InitNode(node, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, name, allocList); \
|
||||
(node)->succedents[0] = recoveryNode; \
|
||||
(node)->antecedents[0] = blockNode; \
|
||||
(node)->antType[0] = rf_control;
|
||||
|
||||
/* build the read nodes */
|
||||
pda = npdas;
|
||||
for (i=0; i < nRrdNodes; i++, pda = pda->next) {
|
||||
INIT_READ_NODE(rrdNodes+i,"rrd");
|
||||
DISK_NODE_PARAMS(rrdNodes[i],pda);
|
||||
}
|
||||
|
||||
/* read redundancy pdas */
|
||||
pda = pqPDAs;
|
||||
INIT_READ_NODE(rpNodes,"Rp");
|
||||
RF_ASSERT(pda);
|
||||
DISK_NODE_PARAMS(rpNodes[0],pda);
|
||||
pda++;
|
||||
INIT_READ_NODE(rqNodes, redundantReadNodeName );
|
||||
RF_ASSERT(pda);
|
||||
DISK_NODE_PARAMS(rqNodes[0],pda);
|
||||
if (nPQNodes==2)
|
||||
{
|
||||
pda++;
|
||||
INIT_READ_NODE(rpNodes+1,"Rp");
|
||||
RF_ASSERT(pda);
|
||||
DISK_NODE_PARAMS(rpNodes[1],pda);
|
||||
pda++;
|
||||
INIT_READ_NODE(rqNodes+1,redundantReadNodeName );
|
||||
RF_ASSERT(pda);
|
||||
DISK_NODE_PARAMS(rqNodes[1],pda);
|
||||
}
|
||||
|
||||
/* the recovery node has all reads as precedessors and all writes as successors.
|
||||
It generates a result for every write P or write Q node.
|
||||
As parameters, it takes a pda per read and a pda per stripe of user data written.
|
||||
It also takes as the last params the raidPtr and asm.
|
||||
For results, it takes PDA for P & Q. */
|
||||
|
||||
|
||||
rf_InitNode(recoveryNode, rf_wait, RF_FALSE, recovFunc, rf_NullNodeUndoFunc, NULL,
|
||||
nWriteNodes, /* succesors */
|
||||
nReadNodes, /* preds */
|
||||
nReadNodes + nWudNodes + 3, /* params */
|
||||
2 * nPQNodes, /* results */
|
||||
dag_h, recoveryNodeName, allocList);
|
||||
|
||||
|
||||
|
||||
for (i=0; i < nReadNodes; i++ )
|
||||
{
|
||||
recoveryNode->antecedents[i] = rrdNodes+i;
|
||||
recoveryNode->antType[i] = rf_control;
|
||||
recoveryNode->params[i].p = DISK_NODE_PDA(rrdNodes+i);
|
||||
}
|
||||
for (i=0; i < nWudNodes; i++)
|
||||
{
|
||||
recoveryNode->succedents[i] = wudNodes+i;
|
||||
}
|
||||
recoveryNode->params[nReadNodes+nWudNodes].p = asmap->failedPDAs[0];
|
||||
recoveryNode->params[nReadNodes+nWudNodes+1].p = raidPtr;
|
||||
recoveryNode->params[nReadNodes+nWudNodes+2].p = asmap;
|
||||
|
||||
for ( ; i < nWriteNodes; i++)
|
||||
recoveryNode->succedents[i] = wudNodes+i;
|
||||
|
||||
pda = pqPDAs;
|
||||
recoveryNode->results[0] = pda;
|
||||
pda++;
|
||||
recoveryNode->results[1] = pda;
|
||||
if ( nPQNodes == 2)
|
||||
{
|
||||
pda++;
|
||||
recoveryNode->results[2] = pda;
|
||||
pda++;
|
||||
recoveryNode->results[3] = pda;
|
||||
}
|
||||
|
||||
/* fill writes */
|
||||
#define INIT_WRITE_NODE(node,name) \
|
||||
rf_InitNode(node, rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, name, allocList); \
|
||||
(node)->succedents[0] = unblockNode; \
|
||||
(node)->antecedents[0] = recoveryNode; \
|
||||
(node)->antType[0] = rf_control;
|
||||
|
||||
pda = asmap->physInfo;
|
||||
for (i=0; i < nWudNodes; i++)
|
||||
{
|
||||
INIT_WRITE_NODE(wudNodes+i,"Wd");
|
||||
DISK_NODE_PARAMS(wudNodes[i],pda);
|
||||
recoveryNode->params[nReadNodes+i].p = DISK_NODE_PDA(wudNodes+i);
|
||||
pda = pda->next;
|
||||
}
|
||||
/* write redundancy pdas */
|
||||
pda = pqPDAs;
|
||||
INIT_WRITE_NODE(wpNodes,"Wp");
|
||||
RF_ASSERT(pda);
|
||||
DISK_NODE_PARAMS(wpNodes[0],pda);
|
||||
pda++;
|
||||
INIT_WRITE_NODE(wqNodes,"Wq");
|
||||
RF_ASSERT(pda);
|
||||
DISK_NODE_PARAMS(wqNodes[0],pda);
|
||||
if (nPQNodes==2)
|
||||
{
|
||||
pda++;
|
||||
INIT_WRITE_NODE(wpNodes+1,"Wp");
|
||||
RF_ASSERT(pda);
|
||||
DISK_NODE_PARAMS(wpNodes[1],pda);
|
||||
pda++;
|
||||
INIT_WRITE_NODE(wqNodes+1,"Wq");
|
||||
RF_ASSERT(pda);
|
||||
DISK_NODE_PARAMS(wqNodes[1],pda);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,80 @@
|
|||
/* $NetBSD: rf_dagdegwr.h,v 1.1 1998/11/13 04:20:27 oster Exp $ */
|
||||
/*
|
||||
* Copyright (c) 1995 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Author: Mark Holland, Daniel Stodolsky, William V. Courtright II
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and
|
||||
* its documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
||||
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*/
|
||||
|
||||
/*
|
||||
* :
|
||||
* Log: rf_dagdegwr.h,v
|
||||
* Revision 1.6 1996/07/31 16:30:28 jimz
|
||||
* asm/asmap fix (EO merge)
|
||||
*
|
||||
* Revision 1.5 1996/07/31 15:35:38 jimz
|
||||
* evenodd changes; bugfixes for double-degraded archs, generalize
|
||||
* some formerly PQ-only functions
|
||||
*
|
||||
* Revision 1.4 1996/07/22 19:52:16 jimz
|
||||
* switched node params to RF_DagParam_t, a union of
|
||||
* a 64-bit int and a void *, for better portability
|
||||
* attempted hpux port, but failed partway through for
|
||||
* lack of a single C compiler capable of compiling all
|
||||
* source files
|
||||
*
|
||||
* Revision 1.3 1996/05/24 22:17:04 jimz
|
||||
* continue code + namespace cleanup
|
||||
* typed a bunch of flags
|
||||
*
|
||||
* Revision 1.2 1996/05/23 21:46:35 jimz
|
||||
* checkpoint in code cleanup (release prep)
|
||||
* lots of types, function names have been fixed
|
||||
*
|
||||
* Revision 1.1 1996/05/03 19:21:28 wvcii
|
||||
* Initial revision
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _RF__RF_DAGDEGWR_H_
|
||||
#define _RF__RF_DAGDEGWR_H_
|
||||
|
||||
/* degraded write DAG creation routines */
|
||||
void rf_CreateDegradedWriteDAG(RF_Raid_t *raidPtr,
|
||||
RF_AccessStripeMap_t *asmap, RF_DagHeader_t *dag_h, void *bp,
|
||||
RF_RaidAccessFlags_t flags, RF_AllocListElem_t *allocList);
|
||||
void rf_CommonCreateSimpleDegradedWriteDAG(RF_Raid_t *raidPtr,
|
||||
RF_AccessStripeMap_t *asmap, RF_DagHeader_t *dag_h, void *bp,
|
||||
RF_RaidAccessFlags_t flags, RF_AllocListElem_t *allocList,
|
||||
int nfaults, int (*redFunc)(RF_DagNode_t *), int allowBufferRecycle);
|
||||
void rf_WriteGenerateFailedAccessASMs(RF_Raid_t *raidPtr,
|
||||
RF_AccessStripeMap_t *asmap, RF_PhysDiskAddr_t **pdap,
|
||||
int *nNodep, RF_PhysDiskAddr_t **pqpdap,
|
||||
int *nPQNodep, RF_AllocListElem_t *allocList);
|
||||
void rf_DoubleDegSmallWrite(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
|
||||
RF_DagHeader_t *dag_h, void *bp, RF_RaidAccessFlags_t flags,
|
||||
RF_AllocListElem_t *allocList, char *redundantReadNodeName,
|
||||
char *redundantWriteNodeName, char *recoveryNodeName,
|
||||
int (*recovFunc)(RF_DagNode_t *));
|
||||
|
||||
#endif /* !_RF__RF_DAGDEGWR_H_ */
|
|
@ -0,0 +1,499 @@
|
|||
/* $NetBSD: rf_dagffrd.c,v 1.1 1998/11/13 04:20:27 oster Exp $ */
|
||||
/*
|
||||
* Copyright (c) 1995 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Author: Mark Holland, Daniel Stodolsky, William V. Courtright II
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and
|
||||
* its documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
||||
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*/
|
||||
|
||||
/*
|
||||
* rf_dagffrd.c
|
||||
*
|
||||
* code for creating fault-free read DAGs
|
||||
*
|
||||
* :
|
||||
* Log: rf_dagffrd.c,v
|
||||
* Revision 1.14 1996/07/28 20:31:39 jimz
|
||||
* i386netbsd port
|
||||
* true/false fixup
|
||||
*
|
||||
* Revision 1.13 1996/07/22 19:52:16 jimz
|
||||
* switched node params to RF_DagParam_t, a union of
|
||||
* a 64-bit int and a void *, for better portability
|
||||
* attempted hpux port, but failed partway through for
|
||||
* lack of a single C compiler capable of compiling all
|
||||
* source files
|
||||
*
|
||||
* Revision 1.12 1996/06/09 02:36:46 jimz
|
||||
* lots of little crufty cleanup- fixup whitespace
|
||||
* issues, comment #ifdefs, improve typing in some
|
||||
* places (esp size-related)
|
||||
*
|
||||
* Revision 1.11 1996/06/06 17:30:44 jimz
|
||||
* turn old Raid1 mirror read creation into a more generic function
|
||||
* parameterized by an addtional parameter: type of mirrored read
|
||||
* this is now used by other dag creation routines so chained declustering
|
||||
* and raid1 can share dag creation code, but have different mirroring
|
||||
* policies
|
||||
*
|
||||
* Revision 1.10 1996/05/31 22:26:54 jimz
|
||||
* fix a lot of mapping problems, memory allocation problems
|
||||
* found some weird lock issues, fixed 'em
|
||||
* more code cleanup
|
||||
*
|
||||
* Revision 1.9 1996/05/30 11:29:41 jimz
|
||||
* Numerous bug fixes. Stripe lock release code disagreed with the taking code
|
||||
* about when stripes should be locked (I made it consistent: no parity, no lock)
|
||||
* There was a lot of extra serialization of I/Os which I've removed- a lot of
|
||||
* it was to calculate values for the cache code, which is no longer with us.
|
||||
* More types, function, macro cleanup. Added code to properly quiesce the array
|
||||
* on shutdown. Made a lot of stuff array-specific which was (bogusly) general
|
||||
* before. Fixed memory allocation, freeing bugs.
|
||||
*
|
||||
* Revision 1.8 1996/05/27 18:56:37 jimz
|
||||
* more code cleanup
|
||||
* better typing
|
||||
* compiles in all 3 environments
|
||||
*
|
||||
* Revision 1.7 1996/05/24 22:17:04 jimz
|
||||
* continue code + namespace cleanup
|
||||
* typed a bunch of flags
|
||||
*
|
||||
* Revision 1.6 1996/05/24 04:28:55 jimz
|
||||
* release cleanup ckpt
|
||||
*
|
||||
* Revision 1.5 1996/05/23 21:46:35 jimz
|
||||
* checkpoint in code cleanup (release prep)
|
||||
* lots of types, function names have been fixed
|
||||
*
|
||||
* Revision 1.4 1996/05/23 00:33:23 jimz
|
||||
* code cleanup: move all debug decls to rf_options.c, all extern
|
||||
* debug decls to rf_options.h, all debug vars preceded by rf_
|
||||
*
|
||||
* Revision 1.3 1996/05/18 19:51:34 jimz
|
||||
* major code cleanup- fix syntax, make some types consistent,
|
||||
* add prototypes, clean out dead code, et cetera
|
||||
*
|
||||
* Revision 1.2 1996/05/08 21:01:24 jimz
|
||||
* fixed up enum type names that were conflicting with other
|
||||
* enums and function names (ie, "panic")
|
||||
* future naming trends will be towards RF_ and rf_ for
|
||||
* everything raidframe-related
|
||||
*
|
||||
* Revision 1.1 1996/05/03 19:19:20 wvcii
|
||||
* Initial revision
|
||||
*
|
||||
*/
|
||||
|
||||
#include "rf_types.h"
|
||||
#include "rf_raid.h"
|
||||
#include "rf_dag.h"
|
||||
#include "rf_dagutils.h"
|
||||
#include "rf_dagfuncs.h"
|
||||
#include "rf_threadid.h"
|
||||
#include "rf_debugMem.h"
|
||||
#include "rf_memchunk.h"
|
||||
#include "rf_general.h"
|
||||
#include "rf_dagffrd.h"
|
||||
|
||||
/******************************************************************************
|
||||
*
|
||||
* General comments on DAG creation:
|
||||
*
|
||||
* All DAGs in this file use roll-away error recovery. Each DAG has a single
|
||||
* commit node, usually called "Cmt." If an error occurs before the Cmt node
|
||||
* is reached, the execution engine will halt forward execution and work
|
||||
* backward through the graph, executing the undo functions. Assuming that
|
||||
* each node in the graph prior to the Cmt node are undoable and atomic - or -
|
||||
* does not make changes to permanent state, the graph will fail atomically.
|
||||
* If an error occurs after the Cmt node executes, the engine will roll-forward
|
||||
* through the graph, blindly executing nodes until it reaches the end.
|
||||
* If a graph reaches the end, it is assumed to have completed successfully.
|
||||
*
|
||||
* A graph has only 1 Cmt node.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
/******************************************************************************
|
||||
*
|
||||
* The following wrappers map the standard DAG creation interface to the
|
||||
* DAG creation routines. Additionally, these wrappers enable experimentation
|
||||
* with new DAG structures by providing an extra level of indirection, allowing
|
||||
* the DAG creation routines to be replaced at this single point.
|
||||
*/
|
||||
|
||||
void rf_CreateFaultFreeReadDAG(
|
||||
RF_Raid_t *raidPtr,
|
||||
RF_AccessStripeMap_t *asmap,
|
||||
RF_DagHeader_t *dag_h,
|
||||
void *bp,
|
||||
RF_RaidAccessFlags_t flags,
|
||||
RF_AllocListElem_t *allocList)
|
||||
{
|
||||
rf_CreateNonredundantDAG(raidPtr, asmap, dag_h, bp, flags, allocList,
|
||||
RF_IO_TYPE_READ);
|
||||
}
|
||||
|
||||
|
||||
/******************************************************************************
|
||||
*
|
||||
* DAG creation code begins here
|
||||
*/
|
||||
|
||||
/******************************************************************************
|
||||
*
|
||||
* creates a DAG to perform a nonredundant read or write of data within one
|
||||
* stripe.
|
||||
* For reads, this DAG is as follows:
|
||||
*
|
||||
* /---- read ----\
|
||||
* Header -- Block ---- read ---- Commit -- Terminate
|
||||
* \---- read ----/
|
||||
*
|
||||
* For writes, this DAG is as follows:
|
||||
*
|
||||
* /---- write ----\
|
||||
* Header -- Commit ---- write ---- Block -- Terminate
|
||||
* \---- write ----/
|
||||
*
|
||||
* There is one disk node per stripe unit accessed, and all disk nodes are in
|
||||
* parallel.
|
||||
*
|
||||
* Tricky point here: The first disk node (read or write) is created
|
||||
* normally. Subsequent disk nodes are created by copying the first one,
|
||||
* and modifying a few params. The "succedents" and "antecedents" fields are
|
||||
* _not_ re-created in each node, but rather left pointing to the same array
|
||||
* that was malloc'd when the first node was created. Thus, it's essential
|
||||
* that when this DAG is freed, the succedents and antecedents fields be freed
|
||||
* in ONLY ONE of the read nodes. This does not apply to the "params" field
|
||||
* because it is recreated for each READ node.
|
||||
*
|
||||
* Note that normal-priority accesses do not need to be tagged with their
|
||||
* parity stripe ID, because they will never be promoted. Hence, I've
|
||||
* commented-out the code to do this, and marked it with UNNEEDED.
|
||||
*
|
||||
*****************************************************************************/
|
||||
|
||||
void rf_CreateNonredundantDAG(
|
||||
RF_Raid_t *raidPtr,
|
||||
RF_AccessStripeMap_t *asmap,
|
||||
RF_DagHeader_t *dag_h,
|
||||
void *bp,
|
||||
RF_RaidAccessFlags_t flags,
|
||||
RF_AllocListElem_t *allocList,
|
||||
RF_IoType_t type)
|
||||
{
|
||||
RF_DagNode_t *nodes, *diskNodes, *blockNode, *commitNode, *termNode;
|
||||
RF_PhysDiskAddr_t *pda = asmap->physInfo;
|
||||
int (*doFunc)(RF_DagNode_t *), (*undoFunc)(RF_DagNode_t *);
|
||||
int i, n, totalNumNodes;
|
||||
char *name;
|
||||
|
||||
n = asmap->numStripeUnitsAccessed;
|
||||
dag_h->creator = "NonredundantDAG";
|
||||
|
||||
RF_ASSERT(RF_IO_IS_R_OR_W(type));
|
||||
switch (type) {
|
||||
case RF_IO_TYPE_READ:
|
||||
doFunc = rf_DiskReadFunc;
|
||||
undoFunc = rf_DiskReadUndoFunc;
|
||||
name = "R ";
|
||||
if (rf_dagDebug) printf("[Creating non-redundant read DAG]\n");
|
||||
break;
|
||||
case RF_IO_TYPE_WRITE:
|
||||
doFunc = rf_DiskWriteFunc;
|
||||
undoFunc = rf_DiskWriteUndoFunc;
|
||||
name = "W ";
|
||||
if (rf_dagDebug) printf("[Creating non-redundant write DAG]\n");
|
||||
break;
|
||||
default:
|
||||
RF_PANIC();
|
||||
}
|
||||
|
||||
/*
|
||||
* For reads, the dag can not commit until the block node is reached.
|
||||
* for writes, the dag commits immediately.
|
||||
*/
|
||||
dag_h->numCommitNodes = 1;
|
||||
dag_h->numCommits = 0;
|
||||
dag_h->numSuccedents = 1;
|
||||
|
||||
/*
|
||||
* Node count:
|
||||
* 1 block node
|
||||
* n data reads (or writes)
|
||||
* 1 commit node
|
||||
* 1 terminator node
|
||||
*/
|
||||
RF_ASSERT(n > 0);
|
||||
totalNumNodes = n + 3;
|
||||
RF_CallocAndAdd(nodes, totalNumNodes, sizeof(RF_DagNode_t),
|
||||
(RF_DagNode_t *), allocList);
|
||||
i = 0;
|
||||
diskNodes = &nodes[i]; i += n;
|
||||
blockNode = &nodes[i]; i += 1;
|
||||
commitNode = &nodes[i]; i += 1;
|
||||
termNode = &nodes[i]; i += 1;
|
||||
RF_ASSERT(i == totalNumNodes);
|
||||
|
||||
/* initialize nodes */
|
||||
switch (type) {
|
||||
case RF_IO_TYPE_READ:
|
||||
rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc,
|
||||
NULL, n, 0, 0, 0, dag_h, "Nil", allocList);
|
||||
rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc,
|
||||
NULL, 1, n, 0, 0, dag_h, "Cmt", allocList);
|
||||
rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc,
|
||||
NULL, 0, 1, 0, 0, dag_h, "Trm", allocList);
|
||||
break;
|
||||
case RF_IO_TYPE_WRITE:
|
||||
rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc,
|
||||
NULL, 1, 0, 0, 0, dag_h, "Nil", allocList);
|
||||
rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc,
|
||||
NULL, n, 1, 0, 0, dag_h, "Cmt", allocList);
|
||||
rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc,
|
||||
NULL, 0, n, 0, 0, dag_h, "Trm", allocList);
|
||||
break;
|
||||
default:
|
||||
RF_PANIC();
|
||||
}
|
||||
|
||||
for (i = 0; i < n; i++) {
|
||||
RF_ASSERT(pda != NULL);
|
||||
rf_InitNode(&diskNodes[i], rf_wait, RF_FALSE, doFunc, undoFunc, rf_GenericWakeupFunc,
|
||||
1, 1, 4, 0, dag_h, name, allocList);
|
||||
diskNodes[i].params[0].p = pda;
|
||||
diskNodes[i].params[1].p = pda->bufPtr;
|
||||
/* parity stripe id is not necessary */
|
||||
diskNodes[i].params[2].v = 0;
|
||||
diskNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0);
|
||||
pda = pda->next;
|
||||
}
|
||||
|
||||
/*
|
||||
* Connect nodes.
|
||||
*/
|
||||
|
||||
/* connect hdr to block node */
|
||||
RF_ASSERT(blockNode->numAntecedents == 0);
|
||||
dag_h->succedents[0] = blockNode;
|
||||
|
||||
if (type == RF_IO_TYPE_READ) {
|
||||
/* connecting a nonredundant read DAG */
|
||||
RF_ASSERT(blockNode->numSuccedents == n);
|
||||
RF_ASSERT(commitNode->numAntecedents == n);
|
||||
for (i=0; i < n; i++) {
|
||||
/* connect block node to each read node */
|
||||
RF_ASSERT(diskNodes[i].numAntecedents == 1);
|
||||
blockNode->succedents[i] = &diskNodes[i];
|
||||
diskNodes[i].antecedents[0] = blockNode;
|
||||
diskNodes[i].antType[0] = rf_control;
|
||||
|
||||
/* connect each read node to the commit node */
|
||||
RF_ASSERT(diskNodes[i].numSuccedents == 1);
|
||||
diskNodes[i].succedents[0] = commitNode;
|
||||
commitNode->antecedents[i] = &diskNodes[i];
|
||||
commitNode->antType[i] = rf_control;
|
||||
}
|
||||
/* connect the commit node to the term node */
|
||||
RF_ASSERT(commitNode->numSuccedents == 1);
|
||||
RF_ASSERT(termNode->numAntecedents == 1);
|
||||
RF_ASSERT(termNode->numSuccedents == 0);
|
||||
commitNode->succedents[0] = termNode;
|
||||
termNode->antecedents[0] = commitNode;
|
||||
termNode->antType[0] = rf_control;
|
||||
}
|
||||
else {
|
||||
/* connecting a nonredundant write DAG */
|
||||
/* connect the block node to the commit node */
|
||||
RF_ASSERT(blockNode->numSuccedents == 1);
|
||||
RF_ASSERT(commitNode->numAntecedents == 1);
|
||||
blockNode->succedents[0] = commitNode;
|
||||
commitNode->antecedents[0] = blockNode;
|
||||
commitNode->antType[0] = rf_control;
|
||||
|
||||
RF_ASSERT(commitNode->numSuccedents == n);
|
||||
RF_ASSERT(termNode->numAntecedents == n);
|
||||
RF_ASSERT(termNode->numSuccedents == 0);
|
||||
for (i=0; i < n; i++) {
|
||||
/* connect the commit node to each write node */
|
||||
RF_ASSERT(diskNodes[i].numAntecedents == 1);
|
||||
commitNode->succedents[i] = &diskNodes[i];
|
||||
diskNodes[i].antecedents[0] = commitNode;
|
||||
diskNodes[i].antType[0] = rf_control;
|
||||
|
||||
/* connect each write node to the term node */
|
||||
RF_ASSERT(diskNodes[i].numSuccedents == 1);
|
||||
diskNodes[i].succedents[0] = termNode;
|
||||
termNode->antecedents[i] = &diskNodes[i];
|
||||
termNode->antType[i] = rf_control;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/******************************************************************************
|
||||
* Create a fault-free read DAG for RAID level 1
|
||||
*
|
||||
* Hdr -> Nil -> Rmir -> Cmt -> Trm
|
||||
*
|
||||
* The "Rmir" node schedules a read from the disk in the mirror pair with the
|
||||
* shortest disk queue. the proper queue is selected at Rmir execution. this
|
||||
* deferred mapping is unlike other archs in RAIDframe which generally fix
|
||||
* mapping at DAG creation time.
|
||||
*
|
||||
* Parameters: raidPtr - description of the physical array
|
||||
* asmap - logical & physical addresses for this access
|
||||
* bp - buffer ptr (for holding read data)
|
||||
* flags - general flags (e.g. disk locking)
|
||||
* allocList - list of memory allocated in DAG creation
|
||||
*****************************************************************************/
|
||||
|
||||
static void CreateMirrorReadDAG(
|
||||
RF_Raid_t *raidPtr,
|
||||
RF_AccessStripeMap_t *asmap,
|
||||
RF_DagHeader_t *dag_h,
|
||||
void *bp,
|
||||
RF_RaidAccessFlags_t flags,
|
||||
RF_AllocListElem_t *allocList,
|
||||
int (*readfunc)(RF_DagNode_t *node))
|
||||
{
|
||||
RF_DagNode_t *readNodes, *nodes, *blockNode, *commitNode, *termNode;
|
||||
RF_PhysDiskAddr_t *data_pda = asmap->physInfo;
|
||||
RF_PhysDiskAddr_t *parity_pda = asmap->parityInfo;
|
||||
int i, n, totalNumNodes;
|
||||
|
||||
n = asmap->numStripeUnitsAccessed;
|
||||
dag_h->creator = "RaidOneReadDAG";
|
||||
if (rf_dagDebug) {
|
||||
printf("[Creating RAID level 1 read DAG]\n");
|
||||
}
|
||||
|
||||
/*
|
||||
* This dag can not commit until the commit node is reached
|
||||
* errors prior to the commit point imply the dag has failed.
|
||||
*/
|
||||
dag_h->numCommitNodes = 1;
|
||||
dag_h->numCommits = 0;
|
||||
dag_h->numSuccedents = 1;
|
||||
|
||||
/*
|
||||
* Node count:
|
||||
* n data reads
|
||||
* 1 block node
|
||||
* 1 commit node
|
||||
* 1 terminator node
|
||||
*/
|
||||
RF_ASSERT(n > 0);
|
||||
totalNumNodes = n + 3;
|
||||
RF_CallocAndAdd(nodes, totalNumNodes, sizeof(RF_DagNode_t),
|
||||
(RF_DagNode_t *), allocList);
|
||||
i = 0;
|
||||
readNodes = &nodes[i]; i += n;
|
||||
blockNode = &nodes[i]; i += 1;
|
||||
commitNode = &nodes[i]; i += 1;
|
||||
termNode = &nodes[i]; i += 1;
|
||||
RF_ASSERT(i == totalNumNodes);
|
||||
|
||||
/* initialize nodes */
|
||||
rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
|
||||
rf_NullNodeUndoFunc, NULL, n, 0, 0, 0, dag_h, "Nil", allocList);
|
||||
rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc,
|
||||
rf_NullNodeUndoFunc, NULL, 1, n, 0, 0, dag_h, "Cmt", allocList);
|
||||
rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc,
|
||||
rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, dag_h, "Trm", allocList);
|
||||
|
||||
for (i = 0; i < n; i++) {
|
||||
RF_ASSERT(data_pda != NULL);
|
||||
RF_ASSERT(parity_pda != NULL);
|
||||
rf_InitNode(&readNodes[i], rf_wait, RF_FALSE, readfunc,
|
||||
rf_DiskReadMirrorUndoFunc, rf_GenericWakeupFunc, 1, 1, 5, 0, dag_h,
|
||||
"Rmir", allocList);
|
||||
readNodes[i].params[0].p = data_pda;
|
||||
readNodes[i].params[1].p = data_pda->bufPtr;
|
||||
/* parity stripe id is not necessary */
|
||||
readNodes[i].params[2].p = 0;
|
||||
readNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0);
|
||||
readNodes[i].params[4].p = parity_pda;
|
||||
data_pda = data_pda->next;
|
||||
parity_pda = parity_pda->next;
|
||||
}
|
||||
|
||||
/*
|
||||
* Connect nodes
|
||||
*/
|
||||
|
||||
/* connect hdr to block node */
|
||||
RF_ASSERT(blockNode->numAntecedents == 0);
|
||||
dag_h->succedents[0] = blockNode;
|
||||
|
||||
/* connect block node to read nodes */
|
||||
RF_ASSERT(blockNode->numSuccedents == n);
|
||||
for (i=0; i < n; i++) {
|
||||
RF_ASSERT(readNodes[i].numAntecedents == 1);
|
||||
blockNode->succedents[i] = &readNodes[i];
|
||||
readNodes[i].antecedents[0] = blockNode;
|
||||
readNodes[i].antType[0] = rf_control;
|
||||
}
|
||||
|
||||
/* connect read nodes to commit node */
|
||||
RF_ASSERT(commitNode->numAntecedents == n);
|
||||
for (i=0; i < n; i++) {
|
||||
RF_ASSERT(readNodes[i].numSuccedents == 1);
|
||||
readNodes[i].succedents[0] = commitNode;
|
||||
commitNode->antecedents[i] = &readNodes[i];
|
||||
commitNode->antType[i] = rf_control;
|
||||
}
|
||||
|
||||
/* connect commit node to term node */
|
||||
RF_ASSERT(commitNode->numSuccedents == 1);
|
||||
RF_ASSERT(termNode->numAntecedents == 1);
|
||||
RF_ASSERT(termNode->numSuccedents == 0);
|
||||
commitNode->succedents[0] = termNode;
|
||||
termNode->antecedents[0] = commitNode;
|
||||
termNode->antType[0] = rf_control;
|
||||
}
|
||||
|
||||
void rf_CreateMirrorIdleReadDAG(
|
||||
RF_Raid_t *raidPtr,
|
||||
RF_AccessStripeMap_t *asmap,
|
||||
RF_DagHeader_t *dag_h,
|
||||
void *bp,
|
||||
RF_RaidAccessFlags_t flags,
|
||||
RF_AllocListElem_t *allocList)
|
||||
{
|
||||
CreateMirrorReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList,
|
||||
rf_DiskReadMirrorIdleFunc);
|
||||
}
|
||||
|
||||
void rf_CreateMirrorPartitionReadDAG(
|
||||
RF_Raid_t *raidPtr,
|
||||
RF_AccessStripeMap_t *asmap,
|
||||
RF_DagHeader_t *dag_h,
|
||||
void *bp,
|
||||
RF_RaidAccessFlags_t flags,
|
||||
RF_AllocListElem_t *allocList)
|
||||
{
|
||||
CreateMirrorReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList,
|
||||
rf_DiskReadMirrorPartitionFunc);
|
||||
}
|
|
@ -0,0 +1,74 @@
|
|||
/* $NetBSD: rf_dagffrd.h,v 1.1 1998/11/13 04:20:27 oster Exp $ */
|
||||
/*
|
||||
* Copyright (c) 1995 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Author: Mark Holland, Daniel Stodolsky, William V. Courtright II
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and
|
||||
* its documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
||||
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*/
|
||||
|
||||
/*
|
||||
* :
|
||||
* Log: rf_dagffrd.h,v
|
||||
* Revision 1.5 1996/07/22 19:52:16 jimz
|
||||
* switched node params to RF_DagParam_t, a union of
|
||||
* a 64-bit int and a void *, for better portability
|
||||
* attempted hpux port, but failed partway through for
|
||||
* lack of a single C compiler capable of compiling all
|
||||
* source files
|
||||
*
|
||||
* Revision 1.4 1996/06/06 17:31:13 jimz
|
||||
* new mirror read creation dags
|
||||
*
|
||||
* Revision 1.3 1996/05/24 22:17:04 jimz
|
||||
* continue code + namespace cleanup
|
||||
* typed a bunch of flags
|
||||
*
|
||||
* Revision 1.2 1996/05/23 21:46:35 jimz
|
||||
* checkpoint in code cleanup (release prep)
|
||||
* lots of types, function names have been fixed
|
||||
*
|
||||
* Revision 1.1 1996/05/03 19:19:53 wvcii
|
||||
* Initial revision
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _RF__RF_DAGFFRD_H_
|
||||
#define _RF__RF_DAGFFRD_H_
|
||||
|
||||
#include "rf_types.h"
|
||||
|
||||
/* fault-free read DAG creation routines */
|
||||
void rf_CreateFaultFreeReadDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
|
||||
RF_DagHeader_t *dag_h, void *bp, RF_RaidAccessFlags_t flags,
|
||||
RF_AllocListElem_t *allocList);
|
||||
void rf_CreateNonredundantDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
|
||||
RF_DagHeader_t *dag_h, void *bp, RF_RaidAccessFlags_t flags,
|
||||
RF_AllocListElem_t *allocList, RF_IoType_t type);
|
||||
void rf_CreateMirrorIdleReadDAG(RF_Raid_t *raidPtr,
|
||||
RF_AccessStripeMap_t *asmap, RF_DagHeader_t *dag_h, void *bp,
|
||||
RF_RaidAccessFlags_t flags, RF_AllocListElem_t *allocList);
|
||||
void rf_CreateMirrorPartitionReadDAG(RF_Raid_t *raidPtr,
|
||||
RF_AccessStripeMap_t *asmap, RF_DagHeader_t *dag_h, void *bp,
|
||||
RF_RaidAccessFlags_t flags, RF_AllocListElem_t *allocList);
|
||||
|
||||
#endif /* !_RF__RF_DAGFFRD_H_ */
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,102 @@
|
|||
/* $NetBSD: rf_dagffwr.h,v 1.1 1998/11/13 04:20:27 oster Exp $ */
|
||||
/*
|
||||
* Copyright (c) 1995 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Author: Mark Holland, Daniel Stodolsky, William V. Courtright II
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and
|
||||
* its documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
||||
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*/
|
||||
|
||||
/*
|
||||
* :
|
||||
* Log: rf_dagffwr.h,v
|
||||
* Revision 1.6 1996/07/31 15:35:29 jimz
|
||||
* evenodd changes; bugfixes for double-degraded archs, generalize
|
||||
* some formerly PQ-only functions
|
||||
*
|
||||
* Revision 1.5 1996/07/22 19:52:16 jimz
|
||||
* switched node params to RF_DagParam_t, a union of
|
||||
* a 64-bit int and a void *, for better portability
|
||||
* attempted hpux port, but failed partway through for
|
||||
* lack of a single C compiler capable of compiling all
|
||||
* source files
|
||||
*
|
||||
* Revision 1.4 1996/06/10 22:25:28 wvcii
|
||||
* added write dags which do not have a commit node and are
|
||||
* used in forward and backward error recovery experiments.
|
||||
*
|
||||
* Revision 1.3 1996/05/24 22:17:04 jimz
|
||||
* continue code + namespace cleanup
|
||||
* typed a bunch of flags
|
||||
*
|
||||
* Revision 1.2 1996/05/23 21:46:35 jimz
|
||||
* checkpoint in code cleanup (release prep)
|
||||
* lots of types, function names have been fixed
|
||||
*
|
||||
* Revision 1.1 1996/05/03 19:20:18 wvcii
|
||||
* Initial revision
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _RF__RF_DAGFFWR_H_
|
||||
#define _RF__RF_DAGFFWR_H_
|
||||
|
||||
#include "rf_types.h"
|
||||
|
||||
/* fault-free write DAG creation routines */
|
||||
void rf_CreateNonRedundantWriteDAG(RF_Raid_t *raidPtr,
|
||||
RF_AccessStripeMap_t *asmap, RF_DagHeader_t *dag_h, void *bp,
|
||||
RF_RaidAccessFlags_t flags, RF_AllocListElem_t *allocList,
|
||||
RF_IoType_t type);
|
||||
void rf_CreateRAID0WriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
|
||||
RF_DagHeader_t *dag_h, void *bp, RF_RaidAccessFlags_t flags,
|
||||
RF_AllocListElem_t *allocList, RF_IoType_t type);
|
||||
void rf_CreateSmallWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
|
||||
RF_DagHeader_t *dag_h, void *bp, RF_RaidAccessFlags_t flags,
|
||||
RF_AllocListElem_t *allocList);
|
||||
void rf_CreateLargeWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
|
||||
RF_DagHeader_t *dag_h, void *bp, RF_RaidAccessFlags_t flags,
|
||||
RF_AllocListElem_t *allocList);
|
||||
void rf_CommonCreateLargeWriteDAG(RF_Raid_t *raidPtr,
|
||||
RF_AccessStripeMap_t *asmap, RF_DagHeader_t *dag_h, void *bp,
|
||||
RF_RaidAccessFlags_t flags, RF_AllocListElem_t *allocList, int nfaults,
|
||||
int (*redFunc)(RF_DagNode_t *), int allowBufferRecycle);
|
||||
void rf_CommonCreateLargeWriteDAGFwd(RF_Raid_t *raidPtr,
|
||||
RF_AccessStripeMap_t *asmap, RF_DagHeader_t *dag_h, void *bp,
|
||||
RF_RaidAccessFlags_t flags, RF_AllocListElem_t *allocList, int nfaults,
|
||||
int (*redFunc)(RF_DagNode_t *), int allowBufferRecycle);
|
||||
void rf_CommonCreateSmallWriteDAG(RF_Raid_t *raidPtr,
|
||||
RF_AccessStripeMap_t *asmap, RF_DagHeader_t *dag_h, void *bp,
|
||||
RF_RaidAccessFlags_t flags, RF_AllocListElem_t *allocList,
|
||||
RF_RedFuncs_t *pfuncs, RF_RedFuncs_t *qfuncs);
|
||||
void rf_CommonCreateSmallWriteDAGFwd(RF_Raid_t *raidPtr,
|
||||
RF_AccessStripeMap_t *asmap, RF_DagHeader_t *dag_h, void *bp,
|
||||
RF_RaidAccessFlags_t flags, RF_AllocListElem_t *allocList,
|
||||
RF_RedFuncs_t *pfuncs, RF_RedFuncs_t *qfuncs);
|
||||
void rf_CreateRaidOneWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
|
||||
RF_DagHeader_t *dag_h, void *bp, RF_RaidAccessFlags_t flags,
|
||||
RF_AllocListElem_t *allocList);
|
||||
void rf_CreateRaidOneWriteDAGFwd(RF_Raid_t *raidPtr,
|
||||
RF_AccessStripeMap_t *asmap, RF_DagHeader_t *dag_h, void *bp,
|
||||
RF_RaidAccessFlags_t flags, RF_AllocListElem_t *allocList);
|
||||
|
||||
#endif /* !_RF__RF_DAGFFWR_H_ */
|
|
@ -0,0 +1,85 @@
|
|||
/* $NetBSD: rf_dagflags.h,v 1.1 1998/11/13 04:20:27 oster Exp $ */
|
||||
/*
|
||||
* Copyright (c) 1995 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Author: Mark Holland
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and
|
||||
* its documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
||||
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*/
|
||||
|
||||
/**************************************************************************************
|
||||
*
|
||||
* dagflags.h -- flags that can be given to DoAccess
|
||||
* I pulled these out of dag.h because routines that call DoAccess may need these flags,
|
||||
* but certainly do not need the declarations related to the DAG data structures.
|
||||
*
|
||||
**************************************************************************************/
|
||||
|
||||
/* :
|
||||
* Log: rf_dagflags.h,v
|
||||
* Revision 1.10 1996/06/13 19:08:23 jimz
|
||||
* remove unused BD flag
|
||||
*
|
||||
* Revision 1.9 1996/05/30 11:29:41 jimz
|
||||
* Numerous bug fixes. Stripe lock release code disagreed with the taking code
|
||||
* about when stripes should be locked (I made it consistent: no parity, no lock)
|
||||
* There was a lot of extra serialization of I/Os which I've removed- a lot of
|
||||
* it was to calculate values for the cache code, which is no longer with us.
|
||||
* More types, function, macro cleanup. Added code to properly quiesce the array
|
||||
* on shutdown. Made a lot of stuff array-specific which was (bogusly) general
|
||||
* before. Fixed memory allocation, freeing bugs.
|
||||
*
|
||||
* Revision 1.8 1996/05/24 22:17:04 jimz
|
||||
* continue code + namespace cleanup
|
||||
* typed a bunch of flags
|
||||
*
|
||||
* Revision 1.7 1996/05/23 21:46:35 jimz
|
||||
* checkpoint in code cleanup (release prep)
|
||||
* lots of types, function names have been fixed
|
||||
*
|
||||
* Revision 1.6 1995/12/01 15:59:40 root
|
||||
* added copyright info
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _RF__RF_DAGFLAGS_H_
|
||||
#define _RF__RF_DAGFLAGS_H_
|
||||
|
||||
/*
|
||||
* Bitmasks for the "flags" parameter (RF_RaidAccessFlags_t) used
|
||||
* by DoAccess, SelectAlgorithm, and the DAG creation routines.
|
||||
*
|
||||
* If USE_DAG or USE_ASM is specified, neither the DAG nor the ASM
|
||||
* will be modified, which means that you can't SUPRESS if you
|
||||
* specify USE_DAG.
|
||||
*/
|
||||
|
||||
#define RF_DAG_FLAGS_NONE 0 /* no flags */
|
||||
#define RF_DAG_SUPPRESS_LOCKS (1<<0) /* supress all stripe locks in the DAG */
|
||||
#define RF_DAG_RETURN_ASM (1<<1) /* create an ASM and return it instead of freeing it */
|
||||
#define RF_DAG_RETURN_DAG (1<<2) /* create a DAG and return it instead of freeing it */
|
||||
#define RF_DAG_NONBLOCKING_IO (1<<3) /* cause DoAccess to be non-blocking */
|
||||
#define RF_DAG_ACCESS_COMPLETE (1<<4) /* the access is complete */
|
||||
#define RF_DAG_DISPATCH_RETURNED (1<<5) /* used to handle the case where the dag invokes no I/O */
|
||||
#define RF_DAG_TEST_ACCESS (1<<6) /* this access came through rf_ioctl instead of rf_strategy */
|
||||
|
||||
#endif /* !_RF__RF_DAGFLAGS_H_ */
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,137 @@
|
|||
/* $NetBSD: rf_dagfuncs.h,v 1.1 1998/11/13 04:20:28 oster Exp $ */
|
||||
/*
|
||||
* Copyright (c) 1995 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Author: Mark Holland, William V. Courtright II, Jim Zelenka
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and
|
||||
* its documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
||||
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*/
|
||||
|
||||
/*****************************************************************************************
|
||||
*
|
||||
* dagfuncs.h -- header file for DAG node execution routines
|
||||
*
|
||||
****************************************************************************************/
|
||||
|
||||
/*
|
||||
* :
|
||||
* Log: rf_dagfuncs.h,v
|
||||
* Revision 1.17 1996/07/22 19:52:16 jimz
|
||||
* switched node params to RF_DagParam_t, a union of
|
||||
* a 64-bit int and a void *, for better portability
|
||||
* attempted hpux port, but failed partway through for
|
||||
* lack of a single C compiler capable of compiling all
|
||||
* source files
|
||||
*
|
||||
* Revision 1.16 1996/06/10 11:55:47 jimz
|
||||
* Straightened out some per-array/not-per-array distinctions, fixed
|
||||
* a couple bugs related to confusion. Added shutdown lists. Removed
|
||||
* layout shutdown function (now subsumed by shutdown lists).
|
||||
*
|
||||
* Revision 1.15 1996/06/06 17:27:20 jimz
|
||||
* added another read mirror func (partitioning), changed names so dag
|
||||
* creation routines can use the appropriate one
|
||||
*
|
||||
* Revision 1.14 1996/05/30 11:29:41 jimz
|
||||
* Numerous bug fixes. Stripe lock release code disagreed with the taking code
|
||||
* about when stripes should be locked (I made it consistent: no parity, no lock)
|
||||
* There was a lot of extra serialization of I/Os which I've removed- a lot of
|
||||
* it was to calculate values for the cache code, which is no longer with us.
|
||||
* More types, function, macro cleanup. Added code to properly quiesce the array
|
||||
* on shutdown. Made a lot of stuff array-specific which was (bogusly) general
|
||||
* before. Fixed memory allocation, freeing bugs.
|
||||
*
|
||||
* Revision 1.13 1996/05/24 22:17:04 jimz
|
||||
* continue code + namespace cleanup
|
||||
* typed a bunch of flags
|
||||
*
|
||||
* Revision 1.12 1996/05/24 04:28:55 jimz
|
||||
* release cleanup ckpt
|
||||
*
|
||||
* Revision 1.11 1996/05/23 21:46:35 jimz
|
||||
* checkpoint in code cleanup (release prep)
|
||||
* lots of types, function names have been fixed
|
||||
*
|
||||
* Revision 1.10 1996/05/18 19:51:34 jimz
|
||||
* major code cleanup- fix syntax, make some types consistent,
|
||||
* add prototypes, clean out dead code, et cetera
|
||||
*
|
||||
* Revision 1.9 1995/12/01 15:56:46 root
|
||||
* added copyright info
|
||||
*
|
||||
* Revision 1.8 1995/11/07 16:25:23 wvcii
|
||||
* added DiskUnlockFuncForThreads
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _RF__RF_DAGFUNCS_H_
|
||||
#define _RF__RF_DAGFUNCS_H_
|
||||
|
||||
int rf_ConfigureDAGFuncs(RF_ShutdownList_t **listp);
|
||||
int rf_TerminateFunc(RF_DagNode_t *node);
|
||||
int rf_TerminateUndoFunc(RF_DagNode_t *node);
|
||||
int rf_DiskReadMirrorIdleFunc(RF_DagNode_t *node);
|
||||
int rf_DiskReadMirrorPartitionFunc(RF_DagNode_t *node);
|
||||
int rf_DiskReadMirrorUndoFunc(RF_DagNode_t *node);
|
||||
int rf_ParityLogUpdateFunc(RF_DagNode_t *node);
|
||||
int rf_ParityLogOverwriteFunc(RF_DagNode_t *node);
|
||||
int rf_ParityLogUpdateUndoFunc(RF_DagNode_t *node);
|
||||
int rf_ParityLogOverwriteUndoFunc(RF_DagNode_t *node);
|
||||
int rf_NullNodeFunc(RF_DagNode_t *node);
|
||||
int rf_NullNodeUndoFunc(RF_DagNode_t *node);
|
||||
int rf_DiskReadFuncForThreads(RF_DagNode_t *node);
|
||||
int rf_DiskWriteFuncForThreads(RF_DagNode_t *node);
|
||||
int rf_DiskUndoFunc(RF_DagNode_t *node);
|
||||
int rf_DiskUnlockFuncForThreads(RF_DagNode_t *node);
|
||||
int rf_GenericWakeupFunc(RF_DagNode_t *node, int status);
|
||||
int rf_RegularXorFunc(RF_DagNode_t *node);
|
||||
int rf_SimpleXorFunc(RF_DagNode_t *node);
|
||||
int rf_RecoveryXorFunc(RF_DagNode_t *node);
|
||||
int rf_XorIntoBuffer(RF_Raid_t *raidPtr, RF_PhysDiskAddr_t *pda, char *srcbuf,
|
||||
char *targbuf, void *bp);
|
||||
int rf_bxor(char *src, char *dest, int len, void *bp);
|
||||
int rf_longword_bxor(register unsigned long *src, register unsigned long *dest,
|
||||
int len, void *bp);
|
||||
int rf_longword_bxor3(register unsigned long *dest, register unsigned long *a,
|
||||
register unsigned long *b, register unsigned long *c, int len, void *bp);
|
||||
int rf_bxor3(unsigned char *dst, unsigned char *a, unsigned char *b,
|
||||
unsigned char *c, unsigned long len, void *bp);
|
||||
|
||||
/* function ptrs defined in ConfigureDAGFuncs() */
|
||||
extern int (*rf_DiskReadFunc)(RF_DagNode_t *);
|
||||
extern int (*rf_DiskWriteFunc)(RF_DagNode_t *);
|
||||
extern int (*rf_DiskReadUndoFunc)(RF_DagNode_t *);
|
||||
extern int (*rf_DiskWriteUndoFunc)(RF_DagNode_t *);
|
||||
extern int (*rf_DiskUnlockFunc)(RF_DagNode_t *);
|
||||
extern int (*rf_DiskUnlockUndoFunc)(RF_DagNode_t *);
|
||||
extern int (*rf_SimpleXorUndoFunc)(RF_DagNode_t *);
|
||||
extern int (*rf_RegularXorUndoFunc)(RF_DagNode_t *);
|
||||
extern int (*rf_RecoveryXorUndoFunc)(RF_DagNode_t *);
|
||||
|
||||
/* macros for manipulating the param[3] in a read or write node */
|
||||
#define RF_CREATE_PARAM3(pri, lk, unlk, wru) (((RF_uint64)(((wru&0xFFFFFF)<<8)|((lk)?0x10:0)|((unlk)?0x20:0)|((pri)&0xF)) ))
|
||||
#define RF_EXTRACT_PRIORITY(_x_) ((((unsigned) ((unsigned long)(_x_))) >> 0) & 0x0F)
|
||||
#define RF_EXTRACT_LOCK_FLAG(_x_) ((((unsigned) ((unsigned long)(_x_))) >> 4) & 0x1)
|
||||
#define RF_EXTRACT_UNLOCK_FLAG(_x_) ((((unsigned) ((unsigned long)(_x_))) >> 5) & 0x1)
|
||||
#define RF_EXTRACT_RU(_x_) ((((unsigned) ((unsigned long)(_x_))) >> 8) & 0xFFFFFF)
|
||||
|
||||
#endif /* !_RF__RF_DAGFUNCS_H_ */
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,191 @@
|
|||
/* $NetBSD: rf_dagutils.h,v 1.1 1998/11/13 04:20:28 oster Exp $ */
|
||||
/*
|
||||
* Copyright (c) 1995 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Author: Mark Holland, William V. Courtright II
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and
|
||||
* its documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
||||
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*/
|
||||
|
||||
/*************************************************************************
|
||||
*
|
||||
* rf_dagutils.h -- header file for utility routines for manipulating DAGs
|
||||
*
|
||||
*************************************************************************/
|
||||
|
||||
/*
|
||||
* :
|
||||
* Log: rf_dagutils.h,v
|
||||
* Revision 1.19 1996/07/22 19:52:16 jimz
|
||||
* switched node params to RF_DagParam_t, a union of
|
||||
* a 64-bit int and a void *, for better portability
|
||||
* attempted hpux port, but failed partway through for
|
||||
* lack of a single C compiler capable of compiling all
|
||||
* source files
|
||||
*
|
||||
* Revision 1.18 1996/07/15 17:22:18 jimz
|
||||
* nit-pick code cleanup
|
||||
* resolve stdlib problems on DEC OSF
|
||||
*
|
||||
* Revision 1.17 1996/06/10 11:55:47 jimz
|
||||
* Straightened out some per-array/not-per-array distinctions, fixed
|
||||
* a couple bugs related to confusion. Added shutdown lists. Removed
|
||||
* layout shutdown function (now subsumed by shutdown lists).
|
||||
*
|
||||
* Revision 1.16 1996/06/06 17:27:46 jimz
|
||||
* added another select mirror func (partitioning), changed names so dag
|
||||
* creation routines can use the appropriate one
|
||||
*
|
||||
* fixed old idle mirror func to pick closest arm if queue lengths are equal
|
||||
*
|
||||
* Revision 1.15 1996/06/03 23:28:26 jimz
|
||||
* more bugfixes
|
||||
* check in tree to sync for IPDS runs with current bugfixes
|
||||
* there still may be a problem with threads in the script test
|
||||
* getting I/Os stuck- not trivially reproducible (runs ~50 times
|
||||
* in a row without getting stuck)
|
||||
*
|
||||
* Revision 1.14 1996/05/27 18:56:37 jimz
|
||||
* more code cleanup
|
||||
* better typing
|
||||
* compiles in all 3 environments
|
||||
*
|
||||
* Revision 1.13 1996/05/24 22:17:04 jimz
|
||||
* continue code + namespace cleanup
|
||||
* typed a bunch of flags
|
||||
*
|
||||
* Revision 1.12 1996/05/24 04:28:55 jimz
|
||||
* release cleanup ckpt
|
||||
*
|
||||
* Revision 1.11 1996/05/23 21:46:35 jimz
|
||||
* checkpoint in code cleanup (release prep)
|
||||
* lots of types, function names have been fixed
|
||||
*
|
||||
* Revision 1.10 1996/05/23 00:33:23 jimz
|
||||
* code cleanup: move all debug decls to rf_options.c, all extern
|
||||
* debug decls to rf_options.h, all debug vars preceded by rf_
|
||||
*
|
||||
* Revision 1.9 1996/05/18 19:51:34 jimz
|
||||
* major code cleanup- fix syntax, make some types consistent,
|
||||
* add prototypes, clean out dead code, et cetera
|
||||
*
|
||||
* Revision 1.8 1996/05/08 21:01:24 jimz
|
||||
* fixed up enum type names that were conflicting with other
|
||||
* enums and function names (ie, "panic")
|
||||
* future naming trends will be towards RF_ and rf_ for
|
||||
* everything raidframe-related
|
||||
*
|
||||
* Revision 1.7 1996/05/03 19:55:27 wvcii
|
||||
* added misc routines from old dag creation files
|
||||
*
|
||||
* Revision 1.6 1995/12/01 15:57:28 root
|
||||
* added copyright info
|
||||
*
|
||||
* Revision 1.5 1995/11/07 16:21:36 wvcii
|
||||
* modified InitNode and InitNodeFromBuf prototypes
|
||||
*
|
||||
*/
|
||||
|
||||
#include "rf_types.h"
|
||||
#include "rf_dagfuncs.h"
|
||||
#include "rf_general.h"
|
||||
|
||||
#ifndef _RF__RF_DAGUTILS_H_
|
||||
#define _RF__RF_DAGUTILS_H_
|
||||
|
||||
struct RF_RedFuncs_s {
|
||||
int (*regular)(RF_DagNode_t *);
|
||||
char *RegularName;
|
||||
int (*simple)(RF_DagNode_t *);
|
||||
char *SimpleName;
|
||||
};
|
||||
|
||||
extern RF_RedFuncs_t rf_xorFuncs;
|
||||
extern RF_RedFuncs_t rf_xorRecoveryFuncs;
|
||||
|
||||
void rf_InitNode(RF_DagNode_t *node, RF_NodeStatus_t initstatus,
|
||||
int commit,
|
||||
int (*doFunc)(RF_DagNode_t *node),
|
||||
int (*undoFunc)(RF_DagNode_t *node),
|
||||
int (*wakeFunc)(RF_DagNode_t *node, int status),
|
||||
int nSucc, int nAnte, int nParam, int nResult,
|
||||
RF_DagHeader_t *hdr, char *name, RF_AllocListElem_t *alist);
|
||||
|
||||
void rf_FreeDAG(RF_DagHeader_t *dag_h);
|
||||
|
||||
RF_PropHeader_t *rf_MakePropListEntry(RF_DagHeader_t *dag_h, int resultNum,
|
||||
int paramNum, RF_PropHeader_t *next, RF_AllocListElem_t *allocList);
|
||||
|
||||
int rf_ConfigureDAGs(RF_ShutdownList_t **listp);
|
||||
|
||||
RF_DagHeader_t *rf_AllocDAGHeader(void);
|
||||
|
||||
void rf_FreeDAGHeader(RF_DagHeader_t *dh);
|
||||
|
||||
void *rf_AllocBuffer(RF_Raid_t *raidPtr, RF_DagHeader_t *dag_h,
|
||||
RF_PhysDiskAddr_t *pda, RF_AllocListElem_t *allocList);
|
||||
|
||||
char *rf_NodeStatusString(RF_DagNode_t *node);
|
||||
|
||||
void rf_PrintNodeInfoString(RF_DagNode_t *node);
|
||||
|
||||
int rf_AssignNodeNums(RF_DagHeader_t *dag_h);
|
||||
|
||||
int rf_RecurAssignNodeNums(RF_DagNode_t *node, int num, int unvisited);
|
||||
|
||||
void rf_ResetDAGHeaderPointers(RF_DagHeader_t *dag_h, RF_DagHeader_t *newptr);
|
||||
|
||||
void rf_RecurResetDAGHeaderPointers(RF_DagNode_t *node, RF_DagHeader_t *newptr);
|
||||
|
||||
void rf_PrintDAGList(RF_DagHeader_t *dag_h);
|
||||
|
||||
int rf_ValidateDAG(RF_DagHeader_t *dag_h);
|
||||
|
||||
void rf_redirect_asm(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap);
|
||||
|
||||
void rf_MapUnaccessedPortionOfStripe(RF_Raid_t *raidPtr,
|
||||
RF_RaidLayout_t *layoutPtr,
|
||||
RF_AccessStripeMap_t *asmap, RF_DagHeader_t *dag_h,
|
||||
RF_AccessStripeMapHeader_t **new_asm_h, int *nRodNodes, char **sosBuffer,
|
||||
char **eosBuffer, RF_AllocListElem_t *allocList);
|
||||
|
||||
int rf_PDAOverlap(RF_RaidLayout_t *layoutPtr, RF_PhysDiskAddr_t *src,
|
||||
RF_PhysDiskAddr_t *dest);
|
||||
|
||||
void rf_GenerateFailedAccessASMs(RF_Raid_t *raidPtr,
|
||||
RF_AccessStripeMap_t *asmap, RF_PhysDiskAddr_t *failedPDA,
|
||||
RF_DagHeader_t *dag_h, RF_AccessStripeMapHeader_t **new_asm_h,
|
||||
int *nXorBufs, char **rpBufPtr, char *overlappingPDAs,
|
||||
RF_AllocListElem_t *allocList);
|
||||
|
||||
/* flags used by RangeRestrictPDA */
|
||||
#define RF_RESTRICT_NOBUFFER 0
|
||||
#define RF_RESTRICT_DOBUFFER 1
|
||||
|
||||
void rf_RangeRestrictPDA(RF_Raid_t *raidPtr, RF_PhysDiskAddr_t *src,
|
||||
RF_PhysDiskAddr_t *dest, int dobuffer, int doraidaddr);
|
||||
|
||||
int rf_compute_workload_shift(RF_Raid_t *raidPtr, RF_PhysDiskAddr_t *pda);
|
||||
void rf_SelectMirrorDiskIdle(RF_DagNode_t *node);
|
||||
void rf_SelectMirrorDiskPartition(RF_DagNode_t *node);
|
||||
|
||||
#endif /* !_RF__RF_DAGUTILS_H_ */
|
|
@ -0,0 +1,577 @@
|
|||
/* $NetBSD: rf_debugMem.c,v 1.1 1998/11/13 04:20:28 oster Exp $ */
|
||||
/*
|
||||
* Copyright (c) 1995 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Author: Daniel Stodolsky, Mark Holland, Jim Zelenka
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and
|
||||
* its documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
||||
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*/
|
||||
|
||||
/* debugMem.c: memory usage debugging stuff.
|
||||
* Malloc, Calloc, and Free are #defined everywhere
|
||||
* to do_malloc, do_calloc, and do_free.
|
||||
*
|
||||
* if RF_UTILITY is nonzero, it means were compiling one of the
|
||||
* raidframe utility programs, such as rfctrl or smd. In this
|
||||
* case, we eliminate all references to the threads package
|
||||
* and to the allocation list stuff.
|
||||
*/
|
||||
|
||||
/* :
|
||||
* Log: rf_debugMem.c,v
|
||||
* Revision 1.38 1996/08/20 14:45:43 jimz
|
||||
* add debugging to track memory allocated (amount only, w/out
|
||||
* excessive sanity checking)
|
||||
*
|
||||
* Revision 1.37 1996/07/27 23:36:08 jimz
|
||||
* Solaris port of simulator
|
||||
*
|
||||
* Revision 1.36 1996/07/18 22:57:14 jimz
|
||||
* port simulator to AIX
|
||||
*
|
||||
* Revision 1.35 1996/06/13 08:55:38 jimz
|
||||
* make error messages refer to file, line of original
|
||||
* allocation
|
||||
*
|
||||
* Revision 1.34 1996/06/10 11:55:47 jimz
|
||||
* Straightened out some per-array/not-per-array distinctions, fixed
|
||||
* a couple bugs related to confusion. Added shutdown lists. Removed
|
||||
* layout shutdown function (now subsumed by shutdown lists).
|
||||
*
|
||||
* Revision 1.33 1996/06/09 02:36:46 jimz
|
||||
* lots of little crufty cleanup- fixup whitespace
|
||||
* issues, comment #ifdefs, improve typing in some
|
||||
* places (esp size-related)
|
||||
*
|
||||
* Revision 1.32 1996/06/05 18:06:02 jimz
|
||||
* Major code cleanup. The Great Renaming is now done.
|
||||
* Better modularity. Better typing. Fixed a bunch of
|
||||
* synchronization bugs. Made a lot of global stuff
|
||||
* per-desc or per-array. Removed dead code.
|
||||
*
|
||||
* Revision 1.31 1996/05/30 23:22:16 jimz
|
||||
* bugfixes of serialization, timing problems
|
||||
* more cleanup
|
||||
*
|
||||
* Revision 1.30 1996/05/30 11:29:41 jimz
|
||||
* Numerous bug fixes. Stripe lock release code disagreed with the taking code
|
||||
* about when stripes should be locked (I made it consistent: no parity, no lock)
|
||||
* There was a lot of extra serialization of I/Os which I've removed- a lot of
|
||||
* it was to calculate values for the cache code, which is no longer with us.
|
||||
* More types, function, macro cleanup. Added code to properly quiesce the array
|
||||
* on shutdown. Made a lot of stuff array-specific which was (bogusly) general
|
||||
* before. Fixed memory allocation, freeing bugs.
|
||||
*
|
||||
* Revision 1.29 1996/05/27 18:56:37 jimz
|
||||
* more code cleanup
|
||||
* better typing
|
||||
* compiles in all 3 environments
|
||||
*
|
||||
* Revision 1.28 1996/05/23 21:46:35 jimz
|
||||
* checkpoint in code cleanup (release prep)
|
||||
* lots of types, function names have been fixed
|
||||
*
|
||||
* Revision 1.27 1996/05/23 00:33:23 jimz
|
||||
* code cleanup: move all debug decls to rf_options.c, all extern
|
||||
* debug decls to rf_options.h, all debug vars preceded by rf_
|
||||
*
|
||||
* Revision 1.26 1996/05/21 18:53:46 jimz
|
||||
* return NULL for failed allocations, not panic
|
||||
*
|
||||
* Revision 1.25 1996/05/20 16:14:19 jimz
|
||||
* switch to rf_{mutex,cond}_{init,destroy}
|
||||
*
|
||||
* Revision 1.24 1996/05/18 19:51:34 jimz
|
||||
* major code cleanup- fix syntax, make some types consistent,
|
||||
* add prototypes, clean out dead code, et cetera
|
||||
*
|
||||
* Revision 1.23 1996/05/17 12:42:35 jimz
|
||||
* wrap get_threadid stuff in #ifndef UTILITY for utils which use
|
||||
* redzone allocation stuff
|
||||
*
|
||||
* Revision 1.22 1996/05/16 23:06:09 jimz
|
||||
* don't warn about NULL alists
|
||||
*
|
||||
* Revision 1.21 1996/05/16 22:25:02 jimz
|
||||
* show allocations for [MC]allocAndAdd
|
||||
*
|
||||
* Revision 1.20 1996/05/15 18:30:22 jimz
|
||||
* print memory allocation as well as frees if memDebug > 1
|
||||
*
|
||||
* Revision 1.19 1996/05/07 17:41:17 jimz
|
||||
* add "level 2" for memDebug, which will print freed address ranges
|
||||
*
|
||||
* Revision 1.18 1996/05/02 20:41:53 jimz
|
||||
* really fix malloc problem out-of-kernel in memory_hash_insert()
|
||||
*
|
||||
* Revision 1.17 1996/05/02 20:04:29 jimz
|
||||
* fixed malloc deadlock previous change introduced
|
||||
*
|
||||
* Revision 1.16 1996/05/01 16:27:26 jimz
|
||||
* get rid of ALLOCMH
|
||||
* stop using ccmn_ memory management
|
||||
*
|
||||
* Revision 1.15 1995/12/12 18:10:06 jimz
|
||||
* MIN -> RF_MIN, MAX -> RF_MAX, ASSERT -> RF_ASSERT
|
||||
* fix 80-column brain damage in comments
|
||||
*
|
||||
* Revision 1.14 1995/12/01 15:56:17 root
|
||||
* added copyright info
|
||||
*
|
||||
*/
|
||||
|
||||
#include "rf_types.h"
|
||||
#include "rf_sys.h"
|
||||
|
||||
#if RF_UTILITY == 0
|
||||
#include "rf_threadstuff.h"
|
||||
#include "rf_threadid.h"
|
||||
#include "rf_options.h"
|
||||
#else /* RF_UTILITY == 0 */
|
||||
#include "rf_utility.h"
|
||||
#endif /* RF_UTILITY == 0 */
|
||||
|
||||
#ifndef KERNEL
|
||||
#include <stdio.h>
|
||||
#include <assert.h>
|
||||
#endif /* !KERNEL */
|
||||
#include "rf_debugMem.h"
|
||||
#include "rf_general.h"
|
||||
|
||||
static long tot_mem_in_use = 0, max_mem = 0;
|
||||
|
||||
/* Hash table of information about memory allocations */
|
||||
#define RF_MH_TABLESIZE 1000
|
||||
|
||||
struct mh_struct {
|
||||
void *address;
|
||||
int size;
|
||||
int line;
|
||||
char *filen;
|
||||
char allocated;
|
||||
struct mh_struct *next;
|
||||
};
|
||||
static struct mh_struct *mh_table[RF_MH_TABLESIZE];
|
||||
RF_DECLARE_MUTEX(rf_debug_mem_mutex)
|
||||
static int mh_table_initialized=0;
|
||||
|
||||
static void memory_hash_insert(void *addr, int size, int line, char *filen);
|
||||
static int memory_hash_remove(void *addr, int sz);
|
||||
|
||||
#ifndef KERNEL /* no redzones or "real_" routines in the kernel */
|
||||
|
||||
static void rf_redzone_free_failed(void *ptr, int size, int line, char *file);
|
||||
|
||||
void *rf_real_redzone_malloc(_size_)
|
||||
int _size_;
|
||||
{
|
||||
char *p;
|
||||
|
||||
rf_validate_mh_table();
|
||||
p = malloc((_size_)+16);
|
||||
if (p == NULL)
|
||||
return(p);
|
||||
RF_ASSERT (p);
|
||||
*((long *) p) = (_size_) ;
|
||||
((char *) p)[(_size_)+8] = '!';
|
||||
((char *) p)[(_size_)+15] = '!';
|
||||
p += 8;
|
||||
return(p);
|
||||
}
|
||||
|
||||
void *rf_real_redzone_calloc(_n_,_size_)
|
||||
int _n_,_size_;
|
||||
{
|
||||
char *p;
|
||||
int _sz_;
|
||||
|
||||
rf_validate_mh_table();
|
||||
_sz_ = (_n_) * (_size_);
|
||||
p = malloc((_sz_)+16);
|
||||
if (p == NULL)
|
||||
return(p);
|
||||
bzero(p,(_sz_)+16);
|
||||
*((long *) p) = (_sz_) ;
|
||||
((char *) p)[(_sz_)+8] = '!';
|
||||
((char *) p)[(_sz_)+15] = '!';
|
||||
p += 8;
|
||||
return(p);
|
||||
}
|
||||
|
||||
void rf_real_redzone_free(p, line, filen)
|
||||
char *p;
|
||||
int line;
|
||||
char *filen;
|
||||
{
|
||||
unsigned long _size_;
|
||||
|
||||
rf_validate_mh_table();
|
||||
p -= 8;
|
||||
_size_ = *((long *) p);
|
||||
if ((((char *) p)[(_size_)+8] != '!') || (((char *) p)[(_size_)+15] != '!'))
|
||||
rf_redzone_free_failed(p,(_size_),line,filen);
|
||||
free(p);
|
||||
}
|
||||
|
||||
unsigned long rf_mem_alloc = 0;
|
||||
|
||||
char *rf_real_Malloc(size, line, file)
|
||||
int size;
|
||||
int line;
|
||||
char *file;
|
||||
{
|
||||
void *pp;
|
||||
char *p;
|
||||
int tid;
|
||||
|
||||
RF_LOCK_MUTEX(rf_debug_mem_mutex);
|
||||
rf_redzone_malloc(pp, size);
|
||||
p = pp;
|
||||
if (p == NULL) {
|
||||
RF_ERRORMSG3("Unable to malloc %d bytes at line %d file %s\n", size,
|
||||
line, file);
|
||||
}
|
||||
if (rf_memAmtDebug) {
|
||||
rf_mem_alloc += size;
|
||||
printf("%lu size %d %s:%d\n", rf_mem_alloc, size, file, line);
|
||||
}
|
||||
#if RF_UTILITY == 0
|
||||
if (rf_memDebug > 1) {
|
||||
rf_get_threadid(tid);
|
||||
printf("[%d] malloc 0x%lx - 0x%lx (%d) %s %d\n", tid, p, p+size, size,
|
||||
file, line);
|
||||
}
|
||||
#endif /* RF_UTILITY == 0 */
|
||||
if (rf_memDebug)
|
||||
rf_record_malloc(p, size, line, file);
|
||||
RF_UNLOCK_MUTEX(rf_debug_mem_mutex);
|
||||
return(p);
|
||||
}
|
||||
|
||||
#if RF_UTILITY == 0
|
||||
char *rf_real_MallocAndAdd(size, alist, line, file)
|
||||
int size;
|
||||
RF_AllocListElem_t *alist;
|
||||
int line;
|
||||
char *file;
|
||||
{
|
||||
void *pp;
|
||||
char *p;
|
||||
int tid;
|
||||
|
||||
RF_LOCK_MUTEX(rf_debug_mem_mutex);
|
||||
rf_redzone_malloc(pp, size);
|
||||
p = pp;
|
||||
if (p == NULL) {
|
||||
RF_ERRORMSG3("Unable to malloc %d bytes at line %d file %s\n", size,
|
||||
line, file);
|
||||
}
|
||||
if (rf_memAmtDebug) {
|
||||
rf_mem_alloc += size;
|
||||
printf("%lu size %d %s:%d\n", rf_mem_alloc, size, file, line);
|
||||
}
|
||||
if (rf_memDebug > 1) {
|
||||
rf_get_threadid(tid);
|
||||
printf("[%d] malloc+add 0x%lx - 0x%lx (%d) %s %d\n", tid, p, p+size,
|
||||
size, file, line);
|
||||
}
|
||||
if (alist) {
|
||||
rf_real_AddToAllocList(alist, pp, size, 0);
|
||||
}
|
||||
if (rf_memDebug)
|
||||
rf_record_malloc(p, size, line, file);
|
||||
RF_UNLOCK_MUTEX(rf_debug_mem_mutex);
|
||||
return(p);
|
||||
}
|
||||
#endif /* RF_UTILITY == 0 */
|
||||
|
||||
char *rf_real_Calloc(nel, elsz, line, file)
|
||||
int nel;
|
||||
int elsz;
|
||||
int line;
|
||||
char *file;
|
||||
{
|
||||
int tid, size;
|
||||
void *pp;
|
||||
char *p;
|
||||
|
||||
size = nel * elsz;
|
||||
RF_LOCK_MUTEX(rf_debug_mem_mutex);
|
||||
rf_redzone_calloc(pp, nel, elsz);
|
||||
p = pp;
|
||||
if (p == NULL) {
|
||||
RF_ERRORMSG4("Unable to calloc %d objects of size %d at line %d file %s\n",
|
||||
nel, elsz, line, file);
|
||||
return(NULL);
|
||||
}
|
||||
if (rf_memAmtDebug) {
|
||||
rf_mem_alloc += size;
|
||||
printf("%lu size %d %s:%d\n", rf_mem_alloc, size, file, line);
|
||||
}
|
||||
#if RF_UTILITY == 0
|
||||
if (rf_memDebug > 1) {
|
||||
rf_get_threadid(tid);
|
||||
printf("[%d] calloc 0x%lx - 0x%lx (%d,%d) %s %d\n", tid, p, p+size, nel,
|
||||
elsz, file, line);
|
||||
}
|
||||
#endif /* RF_UTILITY == 0 */
|
||||
if (rf_memDebug) {
|
||||
rf_record_malloc(p, size, line, file);
|
||||
}
|
||||
RF_UNLOCK_MUTEX(rf_debug_mem_mutex);
|
||||
return(p);
|
||||
}
|
||||
|
||||
#if RF_UTILITY == 0
|
||||
char *rf_real_CallocAndAdd(nel, elsz, alist, line, file)
|
||||
int nel;
|
||||
int elsz;
|
||||
RF_AllocListElem_t *alist;
|
||||
int line;
|
||||
char *file;
|
||||
{
|
||||
int tid, size;
|
||||
void *pp;
|
||||
char *p;
|
||||
|
||||
size = nel * elsz;
|
||||
RF_LOCK_MUTEX(rf_debug_mem_mutex);
|
||||
rf_redzone_calloc(pp, nel, elsz);
|
||||
p = pp;
|
||||
if (p == NULL) {
|
||||
RF_ERRORMSG4("Unable to calloc %d objs of size %d at line %d file %s\n",
|
||||
nel, elsz, line, file);
|
||||
return(NULL);
|
||||
}
|
||||
if (rf_memAmtDebug) {
|
||||
rf_mem_alloc += size;
|
||||
printf("%lu size %d %s:%d\n", rf_mem_alloc, size, file, line);
|
||||
}
|
||||
if (rf_memDebug > 1) {
|
||||
rf_get_threadid(tid);
|
||||
printf("[%d] calloc+add 0x%lx - 0x%lx (%d,%d) %s %d\n", tid, p,
|
||||
p+size, nel, elsz, file, line);
|
||||
}
|
||||
if (alist) {
|
||||
rf_real_AddToAllocList(alist, pp, size, 0);
|
||||
}
|
||||
if (rf_memDebug)
|
||||
rf_record_malloc(p, size, line, file);
|
||||
RF_UNLOCK_MUTEX(rf_debug_mem_mutex);
|
||||
return(p);
|
||||
}
|
||||
#endif /* RF_UTILITY == 0 */
|
||||
|
||||
void rf_real_Free(p, sz, line, file)
|
||||
void *p;
|
||||
int sz;
|
||||
int line;
|
||||
char *file;
|
||||
{
|
||||
int tid;
|
||||
|
||||
#if RF_UTILITY == 0
|
||||
if (rf_memDebug > 1) {
|
||||
rf_get_threadid(tid);
|
||||
printf("[%d] free 0x%lx - 0x%lx (%d) %s %d\n", tid, p, ((char *)p)+sz, sz,
|
||||
file, line);
|
||||
}
|
||||
#endif /* RF_UTILITY == 0 */
|
||||
RF_LOCK_MUTEX(rf_debug_mem_mutex);
|
||||
if (rf_memAmtDebug) {
|
||||
rf_mem_alloc -= sz;
|
||||
printf("%lu - size %d %s:%d\n", rf_mem_alloc, sz, file, line);
|
||||
}
|
||||
if (rf_memDebug) {
|
||||
rf_unrecord_malloc(p,sz);
|
||||
}
|
||||
rf_redzone_free(p);
|
||||
RF_UNLOCK_MUTEX(rf_debug_mem_mutex);
|
||||
}
|
||||
|
||||
void rf_validate_mh_table()
|
||||
{
|
||||
int i, size;
|
||||
struct mh_struct *p;
|
||||
char *cp;
|
||||
|
||||
return;
|
||||
for (i=0; i<RF_MH_TABLESIZE; i++) {
|
||||
for (p=mh_table[i]; p; p=p->next) if (p->allocated) {
|
||||
cp = ((char *) p->address) - 8;
|
||||
size = *((long *) cp);
|
||||
if ((((char *) cp)[(size)+8] != '!') || (((char *) cp)[(size)+15] != '!')) {
|
||||
rf_redzone_free_failed(cp,(size),__LINE__,__FILE__);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void rf_redzone_free_failed(ptr,size,line,file)
|
||||
void *ptr;
|
||||
int size;
|
||||
int line;
|
||||
char *file;
|
||||
{
|
||||
RF_ERRORMSG4("Free of 0x%lx (recorded size %d) at %d of %s detected redzone overrun\n",ptr,size,line,file);
|
||||
RF_ASSERT(0);
|
||||
}
|
||||
|
||||
#endif /* !KERNEL */
|
||||
|
||||
void rf_record_malloc(p, size, line, filen)
|
||||
void *p;
|
||||
int size, line;
|
||||
char *filen;
|
||||
{
|
||||
RF_ASSERT(size != 0);
|
||||
|
||||
/*RF_LOCK_MUTEX(rf_debug_mem_mutex);*/
|
||||
memory_hash_insert(p, size, line, filen);
|
||||
tot_mem_in_use += size;
|
||||
/*RF_UNLOCK_MUTEX(rf_debug_mem_mutex);*/
|
||||
if ( (long) p == rf_memDebugAddress) {
|
||||
printf("Allocate: debug address allocated from line %d file %s\n",line,filen);
|
||||
}
|
||||
}
|
||||
|
||||
void rf_unrecord_malloc(p, sz)
|
||||
void *p;
|
||||
int sz;
|
||||
{
|
||||
int size;
|
||||
|
||||
/*RF_LOCK_MUTEX(rf_debug_mem_mutex);*/
|
||||
size = memory_hash_remove(p, sz);
|
||||
tot_mem_in_use -= size;
|
||||
/*RF_UNLOCK_MUTEX(rf_debug_mem_mutex);*/
|
||||
if ( (long) p == rf_memDebugAddress) {
|
||||
printf("Free: Found debug address\n"); /* this is really only a flag line for gdb */
|
||||
}
|
||||
}
|
||||
|
||||
void rf_print_unfreed()
|
||||
{
|
||||
int i, foundone=0;
|
||||
struct mh_struct *p;
|
||||
|
||||
for (i=0; i<RF_MH_TABLESIZE; i++) {
|
||||
for (p=mh_table[i]; p; p=p->next) if (p->allocated) {
|
||||
if (!foundone) printf("\n\nThere are unfreed memory locations at program shutdown:\n");
|
||||
foundone = 1;
|
||||
printf("Addr 0x%lx Size %d line %d file %s\n",
|
||||
(long)p->address,p->size,p->line,p->filen);
|
||||
}
|
||||
}
|
||||
if (tot_mem_in_use) {
|
||||
printf("%ld total bytes in use\n", tot_mem_in_use);
|
||||
}
|
||||
}
|
||||
|
||||
int rf_ConfigureDebugMem(listp)
|
||||
RF_ShutdownList_t **listp;
|
||||
{
|
||||
int i, rc;
|
||||
|
||||
rc = rf_create_managed_mutex(listp, &rf_debug_mem_mutex);
|
||||
if (rc) {
|
||||
RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__,
|
||||
__LINE__, rc);
|
||||
return(rc);
|
||||
}
|
||||
if (rf_memDebug) {
|
||||
for (i=0; i<RF_MH_TABLESIZE; i++)
|
||||
mh_table[i] = NULL;
|
||||
mh_table_initialized=1;
|
||||
}
|
||||
return(0);
|
||||
}
|
||||
|
||||
#define HASHADDR(_a_) ( (((unsigned long) _a_)>>3) % RF_MH_TABLESIZE )
|
||||
|
||||
static void memory_hash_insert(addr, size, line, filen)
|
||||
void *addr;
|
||||
int size, line;
|
||||
char *filen;
|
||||
{
|
||||
unsigned long bucket = HASHADDR(addr);
|
||||
struct mh_struct *p;
|
||||
|
||||
RF_ASSERT(mh_table_initialized);
|
||||
|
||||
/* search for this address in the hash table */
|
||||
for (p=mh_table[bucket]; p && (p->address != addr); p=p->next);
|
||||
if (!p) {
|
||||
#ifdef KERNEL
|
||||
RF_Malloc(p,sizeof(struct mh_struct),(struct mh_struct *));
|
||||
#else /* KERNEL */
|
||||
p = (struct mh_struct *)malloc(sizeof(struct mh_struct));
|
||||
#endif /* KERNEL */
|
||||
RF_ASSERT(p);
|
||||
p->next = mh_table[bucket];
|
||||
mh_table[bucket] = p;
|
||||
p->address = addr;
|
||||
p->allocated = 0;
|
||||
}
|
||||
if (p->allocated) {
|
||||
printf("ERROR: reallocated address 0x%lx from line %d, file %s without intervening free\n",(long) addr, line, filen);
|
||||
printf(" last allocated from line %d file %s\n",p->line, p->filen);
|
||||
RF_ASSERT(0);
|
||||
}
|
||||
p->size = size; p->line = line; p->filen = filen;
|
||||
p->allocated = 1;
|
||||
}
|
||||
|
||||
static int memory_hash_remove(addr, sz)
|
||||
void *addr;
|
||||
int sz;
|
||||
{
|
||||
unsigned long bucket = HASHADDR(addr);
|
||||
struct mh_struct *p;
|
||||
|
||||
RF_ASSERT(mh_table_initialized);
|
||||
for (p=mh_table[bucket]; p && (p->address != addr); p=p->next);
|
||||
if (!p) {
|
||||
printf("ERROR: freeing never-allocated address 0x%lx\n",(long) addr);
|
||||
RF_PANIC();
|
||||
}
|
||||
if (!p->allocated) {
|
||||
printf("ERROR: freeing unallocated address 0x%lx. Last allocation line %d file %s\n",(long) addr, p->line, p->filen);
|
||||
RF_PANIC();
|
||||
}
|
||||
if (sz > 0 && p->size != sz) { /* you can suppress this error by using a negative value as the size to free */
|
||||
printf("ERROR: incorrect size at free for address 0x%lx: is %d should be %d. Alloc at line %d of file %s\n",(unsigned long) addr, sz, p->size,p->line, p->filen);
|
||||
RF_PANIC();
|
||||
}
|
||||
p->allocated = 0;
|
||||
return(p->size);
|
||||
}
|
||||
|
||||
void rf_ReportMaxMem()
|
||||
{
|
||||
printf("Max memory used: %d bytes\n",(int)max_mem);
|
||||
#ifndef KERNEL
|
||||
fflush(stdout);
|
||||
fprintf(stderr,"Max memory used: %d bytes\n",max_mem);
|
||||
fflush(stderr);
|
||||
#endif /* !KERNEL */
|
||||
}
|
|
@ -0,0 +1,262 @@
|
|||
/* $NetBSD: rf_debugMem.h,v 1.1 1998/11/13 04:20:28 oster Exp $ */
|
||||
/*
|
||||
* Copyright (c) 1995 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Author: Daniel Stodolsky, Mark Holland
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and
|
||||
* its documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
||||
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*/
|
||||
|
||||
/*
|
||||
* rf_debugMem.h -- memory leak debugging module
|
||||
*
|
||||
* IMPORTANT: if you put the lock/unlock mutex stuff back in here, you
|
||||
* need to take it out of the routines in debugMem.c
|
||||
*
|
||||
* Log: rf_debugMem.h,v
|
||||
* Revision 1.27 1996/07/18 22:57:14 jimz
|
||||
* port simulator to AIX
|
||||
*
|
||||
* Revision 1.26 1996/06/11 13:46:43 jimz
|
||||
* make bracing consistent around memory allocation macros
|
||||
*
|
||||
* Revision 1.25 1996/06/10 11:55:47 jimz
|
||||
* Straightened out some per-array/not-per-array distinctions, fixed
|
||||
* a couple bugs related to confusion. Added shutdown lists. Removed
|
||||
* layout shutdown function (now subsumed by shutdown lists).
|
||||
*
|
||||
* Revision 1.24 1996/06/05 18:06:02 jimz
|
||||
* Major code cleanup. The Great Renaming is now done.
|
||||
* Better modularity. Better typing. Fixed a bunch of
|
||||
* synchronization bugs. Made a lot of global stuff
|
||||
* per-desc or per-array. Removed dead code.
|
||||
*
|
||||
* Revision 1.23 1996/05/30 11:29:41 jimz
|
||||
* Numerous bug fixes. Stripe lock release code disagreed with the taking code
|
||||
* about when stripes should be locked (I made it consistent: no parity, no lock)
|
||||
* There was a lot of extra serialization of I/Os which I've removed- a lot of
|
||||
* it was to calculate values for the cache code, which is no longer with us.
|
||||
* More types, function, macro cleanup. Added code to properly quiesce the array
|
||||
* on shutdown. Made a lot of stuff array-specific which was (bogusly) general
|
||||
* before. Fixed memory allocation, freeing bugs.
|
||||
*
|
||||
* Revision 1.22 1996/05/27 18:56:37 jimz
|
||||
* more code cleanup
|
||||
* better typing
|
||||
* compiles in all 3 environments
|
||||
*
|
||||
* Revision 1.21 1996/05/23 22:17:40 jimz
|
||||
* fix alloclist macro names for kernel
|
||||
*
|
||||
* Revision 1.20 1996/05/23 21:46:35 jimz
|
||||
* checkpoint in code cleanup (release prep)
|
||||
* lots of types, function names have been fixed
|
||||
*
|
||||
* Revision 1.19 1996/05/23 13:18:23 jimz
|
||||
* include rf_options.h
|
||||
*
|
||||
* Revision 1.18 1996/05/23 00:33:23 jimz
|
||||
* code cleanup: move all debug decls to rf_options.c, all extern
|
||||
* debug decls to rf_options.h, all debug vars preceded by rf_
|
||||
*
|
||||
* Revision 1.17 1996/05/21 18:51:54 jimz
|
||||
* cleaned up macro args
|
||||
*
|
||||
* Revision 1.16 1996/05/18 19:51:34 jimz
|
||||
* major code cleanup- fix syntax, make some types consistent,
|
||||
* add prototypes, clean out dead code, et cetera
|
||||
*
|
||||
* Revision 1.15 1996/05/01 16:26:22 jimz
|
||||
* get rid of old ccmn stuff
|
||||
*
|
||||
* Revision 1.14 1995/12/01 15:58:09 root
|
||||
* added copyright info
|
||||
*
|
||||
* Revision 1.13 1995/10/11 15:26:03 jimz
|
||||
* zero memory after allocation in kernel (hide effects
|
||||
* of uninitialized structs)
|
||||
*
|
||||
* Revision 1.12 1995/10/06 17:04:15 jimz
|
||||
* make Malloc and Free in kernel use kernel malloc package, not cam
|
||||
* dbufs (which is gross, and was exhausting cam zalloc limit)
|
||||
*
|
||||
* Revision 1.11 1995/05/01 13:28:00 holland
|
||||
* parity range locks, locking disk requests, recon+parityscan in kernel, etc.
|
||||
*
|
||||
* Revision 1.10 1995/04/24 13:25:51 holland
|
||||
* rewrite to move disk queues, recon, & atomic RMW to kernel
|
||||
*
|
||||
* Revision 1.9 1995/02/17 19:39:56 holland
|
||||
* added size param to all calls to Free().
|
||||
* this is ignored at user level, but necessary in the kernel.
|
||||
*
|
||||
* Revision 1.8 1995/02/10 17:34:10 holland
|
||||
* kernelization changes
|
||||
*
|
||||
* Revision 1.7 1995/02/03 22:31:36 holland
|
||||
* many changes related to kernelization
|
||||
*
|
||||
* Revision 1.6 1995/02/01 15:13:05 holland
|
||||
* moved #include of general.h out of raid.h and into each file
|
||||
*
|
||||
* Revision 1.5 1995/02/01 14:25:19 holland
|
||||
* began changes for kernelization:
|
||||
* changed all instances of mutex_t and cond_t to DECLARE macros
|
||||
* converted configuration code to use config structure
|
||||
*
|
||||
* Revision 1.4 1995/01/11 19:27:02 holland
|
||||
* many changes related to performance tuning
|
||||
*
|
||||
* Revision 1.3 1994/11/29 21:34:56 danner
|
||||
* Changed type of redzone_calloc and malloc to void *.
|
||||
*
|
||||
* Revision 1.2 1994/11/28 22:13:23 danner
|
||||
* Many macros converted to functions.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _RF__RF_DEBUGMEM_H_
|
||||
#define _RF__RF_DEBUGMEM_H_
|
||||
|
||||
#include "rf_archs.h"
|
||||
#include "rf_alloclist.h"
|
||||
#include "rf_options.h"
|
||||
|
||||
#ifndef KERNEL
|
||||
|
||||
#ifndef __NetBSD__
|
||||
void *malloc(), *calloc();
|
||||
#endif
|
||||
RF_DECLARE_EXTERN_MUTEX(rf_debug_mem_mutex)
|
||||
|
||||
/*
|
||||
* redzone malloc, calloc, and free allocate an extra 16 bytes on each
|
||||
* malloc/calloc call to allow tracking of overflows on free.
|
||||
*/
|
||||
|
||||
#if RF_MEMORY_REDZONES > 0
|
||||
#define rf_redzone_malloc(_p_,_size_) _p_ = rf_real_redzone_malloc(_size_)
|
||||
#define rf_redzone_calloc(_p_,_n_,_size_) _p_ = rf_real_redzone_calloc(_n_,_size_)
|
||||
#define rf_redzone_free(_p_) rf_real_redzone_free(_p_, __LINE__, __FILE__)
|
||||
#else /* RF_MEMORY_REDZONES > 0 */
|
||||
#define rf_redzone_malloc(_p_,_size_) _p_ = malloc(_size_)
|
||||
#define rf_redzone_calloc(_p_,_nel_,_size_) _p_ = calloc(_nel_,_size_)
|
||||
#define rf_redzone_free(_ptr_) free(_ptr_)
|
||||
#endif /* RF_MEMORY_REDZONES > 0 */
|
||||
|
||||
#define RF_Malloc(_p_, _size_, _cast_) { \
|
||||
_p_ = _cast_ rf_real_Malloc(_size_, __LINE__, __FILE__); \
|
||||
}
|
||||
|
||||
#define RF_MallocAndAdd(_p_, _size_, _cast_, _alist_) { \
|
||||
_p_ = _cast_ rf_real_MallocAndAdd(_size_, _alist_, __LINE__, __FILE__); \
|
||||
}
|
||||
|
||||
#define RF_Calloc(_p_, _nel_, _elsz_, _cast_) { \
|
||||
_p_ = _cast_ rf_real_Calloc(_nel_, _elsz_, __LINE__, __FILE__); \
|
||||
}
|
||||
|
||||
#define RF_CallocAndAdd(_p_, _nel_, _elsz_, _cast_, _alist_) { \
|
||||
_p_ = _cast_ rf_real_CallocAndAdd(_nel_, _elsz_, _alist_, __LINE__, __FILE__); \
|
||||
}
|
||||
|
||||
#define RF_Free(__p_, _sz_) { \
|
||||
rf_real_Free(__p_, _sz_, __LINE__, __FILE__); \
|
||||
}
|
||||
|
||||
#else /* KERNEL */
|
||||
|
||||
#include <sys/types.h>
|
||||
#ifdef __NetBSD__
|
||||
typedef u_int32_t U32;
|
||||
#else
|
||||
#include <io/common/iotypes.h> /* just to get defn of U32 */
|
||||
#endif /* __NetBSD__ */
|
||||
#include <sys/malloc.h>
|
||||
|
||||
|
||||
#ifdef __NetBSD__
|
||||
|
||||
#define RF_Malloc(_p_, _size_, _cast_) \
|
||||
{ \
|
||||
_p_ = _cast_ malloc((u_long)_size_, M_DEVBUF, M_WAITOK); \
|
||||
bzero((char *)_p_, _size_); \
|
||||
if (rf_memDebug) rf_record_malloc(_p_, _size_, __LINE__, __FILE__); \
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#define RF_Malloc(_p_, _size_, _cast_) \
|
||||
{ \
|
||||
_p_ = _cast_ malloc((u_long)_size_, BUCKETINDEX(_size_), M_DEVBUF, M_WAITOK); \
|
||||
bzero((char *)_p_, _size_); \
|
||||
if (rf_memDebug) rf_record_malloc(_p_, _size_, __LINE__, __FILE__); \
|
||||
}
|
||||
#endif /* __NetBSD__ */
|
||||
|
||||
#define RF_MallocAndAdd(__p_, __size_, __cast_, __alist_) \
|
||||
{ \
|
||||
RF_Malloc(__p_, __size_, __cast_); \
|
||||
if (__alist_) rf_AddToAllocList(__alist_, __p_, __size_); \
|
||||
}
|
||||
|
||||
#define RF_Calloc(_p_, _nel_, _elsz_, _cast_) \
|
||||
{ \
|
||||
RF_Malloc( _p_, (_nel_) * (_elsz_), _cast_); \
|
||||
bzero( (_p_), (_nel_) * (_elsz_) ); \
|
||||
}
|
||||
|
||||
#define RF_CallocAndAdd(__p,__nel,__elsz,__cast,__alist) \
|
||||
{ \
|
||||
RF_Calloc(__p, __nel, __elsz, __cast); \
|
||||
if (__alist) rf_AddToAllocList(__alist, __p, (__nel)*(__elsz)); \
|
||||
}
|
||||
|
||||
#define RF_Free(_p_, _sz_) \
|
||||
{ \
|
||||
free((void *)(_p_), M_DEVBUF); \
|
||||
if (rf_memDebug) rf_unrecord_malloc(_p_, (U32) (_sz_)); \
|
||||
}
|
||||
|
||||
#endif /* KERNEL */
|
||||
|
||||
#ifndef KERNEL
|
||||
void *rf_real_redzone_malloc(int size);
|
||||
void *rf_real_redzone_calloc(int n, int size);
|
||||
void rf_real_redzone_free(char *p, int line, char *filen);
|
||||
char *rf_real_Malloc(int size, int line, char *file);
|
||||
char *rf_real_Calloc(int nel, int elsz, int line, char *file);
|
||||
void rf_real_Free(void *p, int sz, int line, char *file);
|
||||
void rf_validate_mh_table(void);
|
||||
#if RF_UTILITY == 0
|
||||
char *rf_real_MallocAndAdd(int size, RF_AllocListElem_t *alist, int line, char *file);
|
||||
char *rf_real_CallocAndAdd(int nel, int elsz, RF_AllocListElem_t *alist, int line, char *file);
|
||||
#endif /* RF_UTILITY == 0 */
|
||||
#endif /* !KERNEL */
|
||||
|
||||
void rf_record_malloc(void *p, int size, int line, char *filen);
|
||||
void rf_unrecord_malloc(void *p, int sz);
|
||||
void rf_print_unfreed(void);
|
||||
int rf_ConfigureDebugMem(RF_ShutdownList_t **listp);
|
||||
void rf_ReportMaxMem(void);
|
||||
|
||||
#endif /* !_RF__RF_DEBUGMEM_H_ */
|
|
@ -0,0 +1,185 @@
|
|||
/* $NetBSD: rf_debugprint.c,v 1.1 1998/11/13 04:20:28 oster Exp $ */
|
||||
/*
|
||||
* Copyright (c) 1995 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Author: Mark Holland
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and
|
||||
* its documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
||||
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Code to do debug printfs. Calls to rf_debug_printf cause the corresponding
|
||||
* information to be printed to a circular buffer rather than the screen.
|
||||
* The point is to try and minimize the timing variations induced by the
|
||||
* printfs, and to capture only the printf's immediately preceding a failure.
|
||||
*/
|
||||
|
||||
/* :
|
||||
* Log: rf_debugprint.c,v
|
||||
* Revision 1.13 1996/08/07 21:08:31 jimz
|
||||
* remove bogus ; from mutex decl
|
||||
*
|
||||
* Revision 1.12 1996/06/10 11:55:47 jimz
|
||||
* Straightened out some per-array/not-per-array distinctions, fixed
|
||||
* a couple bugs related to confusion. Added shutdown lists. Removed
|
||||
* layout shutdown function (now subsumed by shutdown lists).
|
||||
*
|
||||
* Revision 1.11 1996/06/05 18:06:02 jimz
|
||||
* Major code cleanup. The Great Renaming is now done.
|
||||
* Better modularity. Better typing. Fixed a bunch of
|
||||
* synchronization bugs. Made a lot of global stuff
|
||||
* per-desc or per-array. Removed dead code.
|
||||
*
|
||||
* Revision 1.10 1996/05/30 23:22:16 jimz
|
||||
* bugfixes of serialization, timing problems
|
||||
* more cleanup
|
||||
*
|
||||
* Revision 1.9 1996/05/27 18:56:37 jimz
|
||||
* more code cleanup
|
||||
* better typing
|
||||
* compiles in all 3 environments
|
||||
*
|
||||
* Revision 1.8 1996/05/23 00:33:23 jimz
|
||||
* code cleanup: move all debug decls to rf_options.c, all extern
|
||||
* debug decls to rf_options.h, all debug vars preceded by rf_
|
||||
*
|
||||
* Revision 1.7 1996/05/20 16:16:06 jimz
|
||||
* switch to rf_{mutex,cond}_{init,destroy}
|
||||
*
|
||||
* Revision 1.6 1996/05/18 19:51:34 jimz
|
||||
* major code cleanup- fix syntax, make some types consistent,
|
||||
* add prototypes, clean out dead code, et cetera
|
||||
*
|
||||
* Revision 1.5 1995/12/01 16:00:45 root
|
||||
* added copyright info
|
||||
*
|
||||
*/
|
||||
|
||||
#include "rf_types.h"
|
||||
#include "rf_threadstuff.h"
|
||||
#include "rf_debugprint.h"
|
||||
#include "rf_general.h"
|
||||
#include "rf_options.h"
|
||||
|
||||
#include <sys/param.h>
|
||||
|
||||
struct RF_Entry_s {
|
||||
char *cstring;
|
||||
void *a1, *a2, *a3, *a4, *a5, *a6, *a7, *a8;
|
||||
};
|
||||
|
||||
/* space for 1k lines */
|
||||
#define BUFSHIFT 10
|
||||
#define BUFSIZE (1<<BUFSHIFT)
|
||||
#define BUFMASK (BUFSIZE-1)
|
||||
|
||||
static struct RF_Entry_s rf_debugprint_buf[BUFSIZE];
|
||||
static int rf_debugprint_index = 0;
|
||||
RF_DECLARE_STATIC_MUTEX(rf_debug_print_mutex)
|
||||
|
||||
int rf_ConfigureDebugPrint(listp)
|
||||
RF_ShutdownList_t **listp;
|
||||
{
|
||||
int rc;
|
||||
|
||||
rc = rf_create_managed_mutex(listp, &rf_debug_print_mutex);
|
||||
if (rc) {
|
||||
RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__,
|
||||
__LINE__, rc);
|
||||
return(rc);
|
||||
}
|
||||
rf_clear_debug_print_buffer();
|
||||
return(0);
|
||||
}
|
||||
|
||||
void rf_clear_debug_print_buffer()
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i=0; i<BUFSIZE; i++)
|
||||
rf_debugprint_buf[i].cstring = NULL;
|
||||
rf_debugprint_index = 0;
|
||||
}
|
||||
|
||||
void rf_debug_printf(s,a1,a2,a3,a4,a5,a6,a7,a8)
|
||||
char *s;
|
||||
void *a1,*a2,*a3,*a4,*a5,*a6,*a7,*a8;
|
||||
{
|
||||
int idx;
|
||||
|
||||
if (rf_debugPrintUseBuffer) {
|
||||
|
||||
RF_LOCK_MUTEX(rf_debug_print_mutex);
|
||||
idx = rf_debugprint_index;
|
||||
rf_debugprint_index = (rf_debugprint_index+1) & BUFMASK;
|
||||
RF_UNLOCK_MUTEX(rf_debug_print_mutex);
|
||||
|
||||
rf_debugprint_buf[idx].cstring = s;
|
||||
rf_debugprint_buf[idx].a1 = a1;
|
||||
rf_debugprint_buf[idx].a2 = a2;
|
||||
rf_debugprint_buf[idx].a3 = a3;
|
||||
rf_debugprint_buf[idx].a4 = a4;
|
||||
rf_debugprint_buf[idx].a5 = a5;
|
||||
rf_debugprint_buf[idx].a6 = a6;
|
||||
rf_debugprint_buf[idx].a7 = a7;
|
||||
rf_debugprint_buf[idx].a8 = a8;
|
||||
}
|
||||
else {
|
||||
printf(s,a1,a2,a3,a4,a5,a6,a7,a8);
|
||||
}
|
||||
}
|
||||
|
||||
void rf_print_debug_buffer()
|
||||
{
|
||||
rf_spill_debug_buffer(NULL);
|
||||
}
|
||||
|
||||
void rf_spill_debug_buffer(fname)
|
||||
char *fname;
|
||||
{
|
||||
int i;
|
||||
#ifndef KERNEL
|
||||
FILE *fp;
|
||||
#endif /* !KERNEL */
|
||||
|
||||
if (!rf_debugPrintUseBuffer)
|
||||
return;
|
||||
|
||||
RF_LOCK_MUTEX(rf_debug_print_mutex);
|
||||
#ifndef KERNEL
|
||||
fp = (fname) ? fopen(fname,"w") : stdout;
|
||||
if (!fp) {printf("Unable to open file %s for writing\n",fname); return;}
|
||||
for (i=rf_debugprint_index+1; i != rf_debugprint_index; i = (i+1)&BUFMASK) if (rf_debugprint_buf[i].cstring)
|
||||
fprintf(fp,rf_debugprint_buf[i].cstring,rf_debugprint_buf[i].a1,rf_debugprint_buf[i].a2,rf_debugprint_buf[i].a3,
|
||||
rf_debugprint_buf[i].a4,rf_debugprint_buf[i].a5,rf_debugprint_buf[i].a6,rf_debugprint_buf[i].a7,rf_debugprint_buf[i].a8);
|
||||
fprintf(fp,rf_debugprint_buf[i].cstring,rf_debugprint_buf[i].a1,rf_debugprint_buf[i].a2,rf_debugprint_buf[i].a3,
|
||||
rf_debugprint_buf[i].a4,rf_debugprint_buf[i].a5,rf_debugprint_buf[i].a6,rf_debugprint_buf[i].a7,rf_debugprint_buf[i].a8);
|
||||
fclose(fp);
|
||||
#else /* !KERNEL */
|
||||
for (i=rf_debugprint_index+1; i != rf_debugprint_index; i = (i+1)&BUFMASK) if (rf_debugprint_buf[i].cstring)
|
||||
printf(rf_debugprint_buf[i].cstring,rf_debugprint_buf[i].a1,rf_debugprint_buf[i].a2,rf_debugprint_buf[i].a3,
|
||||
rf_debugprint_buf[i].a4,rf_debugprint_buf[i].a5,rf_debugprint_buf[i].a6,rf_debugprint_buf[i].a7,rf_debugprint_buf[i].a8);
|
||||
printf(rf_debugprint_buf[i].cstring,rf_debugprint_buf[i].a1,rf_debugprint_buf[i].a2,rf_debugprint_buf[i].a3,
|
||||
rf_debugprint_buf[i].a4,rf_debugprint_buf[i].a5,rf_debugprint_buf[i].a6,rf_debugprint_buf[i].a7,rf_debugprint_buf[i].a8);
|
||||
#endif /* !KERNEL */
|
||||
RF_UNLOCK_MUTEX(rf_debug_print_mutex);
|
||||
}
|
|
@ -0,0 +1,63 @@
|
|||
/* $NetBSD: rf_debugprint.h,v 1.1 1998/11/13 04:20:28 oster Exp $ */
|
||||
/*
|
||||
* rf_debugprint.h
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 1996 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Author: Mark Holland
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and
|
||||
* its documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
||||
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*/
|
||||
/*
|
||||
* :
|
||||
* Log: rf_debugprint.h,v
|
||||
* Revision 1.4 1996/06/10 11:55:47 jimz
|
||||
* Straightened out some per-array/not-per-array distinctions, fixed
|
||||
* a couple bugs related to confusion. Added shutdown lists. Removed
|
||||
* layout shutdown function (now subsumed by shutdown lists).
|
||||
*
|
||||
* Revision 1.3 1996/05/27 18:56:37 jimz
|
||||
* more code cleanup
|
||||
* better typing
|
||||
* compiles in all 3 environments
|
||||
*
|
||||
* Revision 1.2 1996/05/23 21:46:35 jimz
|
||||
* checkpoint in code cleanup (release prep)
|
||||
* lots of types, function names have been fixed
|
||||
*
|
||||
* Revision 1.1 1996/05/18 19:55:43 jimz
|
||||
* Initial revision
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _RF__RF_DEBUGPRINT_H_
|
||||
#define _RF__RF_DEBUGPRINT_H_
|
||||
|
||||
int rf_ConfigureDebugPrint(RF_ShutdownList_t **listp);
|
||||
void rf_clear_debug_print_buffer(void);
|
||||
void rf_debug_printf(char *s, void *a1, void *a2, void *a3, void *a4,
|
||||
void *a5, void *a6, void *a7, void *a8);
|
||||
void rf_print_debug_buffer(void);
|
||||
void rf_spill_debug_buffer(char *fname);
|
||||
|
||||
#endif /* !_RF__RF_DEBUGPRINT_H_ */
|
|
@ -0,0 +1,846 @@
|
|||
/* $NetBSD: rf_decluster.c,v 1.1 1998/11/13 04:20:28 oster Exp $ */
|
||||
/*
|
||||
* Copyright (c) 1995 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Author: Mark Holland
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and
|
||||
* its documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
||||
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*/
|
||||
|
||||
/*----------------------------------------------------------------------
|
||||
*
|
||||
* rf_decluster.c -- code related to the declustered layout
|
||||
*
|
||||
* Created 10-21-92 (MCH)
|
||||
*
|
||||
* Nov 93: adding support for distributed sparing. This code is a little
|
||||
* complex: the basic layout used is as follows:
|
||||
* let F = (v-1)/GCD(r,v-1). The spare space for each set of
|
||||
* F consecutive fulltables is grouped together and placed after
|
||||
* that set of tables.
|
||||
* +------------------------------+
|
||||
* | F fulltables |
|
||||
* | Spare Space |
|
||||
* | F fulltables |
|
||||
* | Spare Space |
|
||||
* | ... |
|
||||
* +------------------------------+
|
||||
*
|
||||
*--------------------------------------------------------------------*/
|
||||
|
||||
/*
|
||||
* :
|
||||
* Log: rf_decluster.c,v
|
||||
* Revision 1.51 1996/08/21 19:47:10 jimz
|
||||
* fix bogus return values from config
|
||||
*
|
||||
* Revision 1.50 1996/08/20 22:41:42 jimz
|
||||
* better diagnostics for bad blockdesigns
|
||||
*
|
||||
* Revision 1.49 1996/07/31 16:56:18 jimz
|
||||
* dataBytesPerStripe, sectorsPerDisk init arch-indep.
|
||||
*
|
||||
* Revision 1.48 1996/07/29 14:05:12 jimz
|
||||
* fix numPUs/numRUs confusion (everything is now numRUs)
|
||||
* clean up some commenting, return values
|
||||
*
|
||||
* Revision 1.47 1996/07/27 23:36:08 jimz
|
||||
* Solaris port of simulator
|
||||
*
|
||||
* Revision 1.46 1996/07/27 18:40:11 jimz
|
||||
* cleanup sweep
|
||||
*
|
||||
* Revision 1.45 1996/07/18 22:57:14 jimz
|
||||
* port simulator to AIX
|
||||
*
|
||||
* Revision 1.44 1996/07/13 00:00:59 jimz
|
||||
* sanitized generalized reconstruction architecture
|
||||
* cleaned up head sep, rbuf problems
|
||||
*
|
||||
* Revision 1.43 1996/06/19 17:53:48 jimz
|
||||
* move GetNumSparePUs, InstallSpareTable ops into layout switch
|
||||
*
|
||||
* Revision 1.42 1996/06/17 03:23:48 jimz
|
||||
* switch DeclusteredDS typing
|
||||
*
|
||||
* Revision 1.41 1996/06/11 08:55:15 jimz
|
||||
* improved error-checking at configuration time
|
||||
*
|
||||
* Revision 1.40 1996/06/10 11:55:47 jimz
|
||||
* Straightened out some per-array/not-per-array distinctions, fixed
|
||||
* a couple bugs related to confusion. Added shutdown lists. Removed
|
||||
* layout shutdown function (now subsumed by shutdown lists).
|
||||
*
|
||||
* Revision 1.39 1996/06/09 02:36:46 jimz
|
||||
* lots of little crufty cleanup- fixup whitespace
|
||||
* issues, comment #ifdefs, improve typing in some
|
||||
* places (esp size-related)
|
||||
*
|
||||
* Revision 1.38 1996/06/07 22:26:27 jimz
|
||||
* type-ify which_ru (RF_ReconUnitNum_t)
|
||||
*
|
||||
* Revision 1.37 1996/06/07 21:33:04 jimz
|
||||
* begin using consistent types for sector numbers,
|
||||
* stripe numbers, row+col numbers, recon unit numbers
|
||||
*
|
||||
* Revision 1.36 1996/06/03 23:28:26 jimz
|
||||
* more bugfixes
|
||||
* check in tree to sync for IPDS runs with current bugfixes
|
||||
* there still may be a problem with threads in the script test
|
||||
* getting I/Os stuck- not trivially reproducible (runs ~50 times
|
||||
* in a row without getting stuck)
|
||||
*
|
||||
* Revision 1.35 1996/06/02 17:31:48 jimz
|
||||
* Moved a lot of global stuff into array structure, where it belongs.
|
||||
* Fixed up paritylogging, pss modules in this manner. Some general
|
||||
* code cleanup. Removed lots of dead code, some dead files.
|
||||
*
|
||||
* Revision 1.34 1996/05/30 23:22:16 jimz
|
||||
* bugfixes of serialization, timing problems
|
||||
* more cleanup
|
||||
*
|
||||
* Revision 1.33 1996/05/30 11:29:41 jimz
|
||||
* Numerous bug fixes. Stripe lock release code disagreed with the taking code
|
||||
* about when stripes should be locked (I made it consistent: no parity, no lock)
|
||||
* There was a lot of extra serialization of I/Os which I've removed- a lot of
|
||||
* it was to calculate values for the cache code, which is no longer with us.
|
||||
* More types, function, macro cleanup. Added code to properly quiesce the array
|
||||
* on shutdown. Made a lot of stuff array-specific which was (bogusly) general
|
||||
* before. Fixed memory allocation, freeing bugs.
|
||||
*
|
||||
* Revision 1.32 1996/05/27 18:56:37 jimz
|
||||
* more code cleanup
|
||||
* better typing
|
||||
* compiles in all 3 environments
|
||||
*
|
||||
* Revision 1.31 1996/05/24 01:59:45 jimz
|
||||
* another checkpoint in code cleanup for release
|
||||
* time to sync kernel tree
|
||||
*
|
||||
* Revision 1.30 1996/05/23 00:33:23 jimz
|
||||
* code cleanup: move all debug decls to rf_options.c, all extern
|
||||
* debug decls to rf_options.h, all debug vars preceded by rf_
|
||||
*
|
||||
* Revision 1.29 1996/05/18 19:51:34 jimz
|
||||
* major code cleanup- fix syntax, make some types consistent,
|
||||
* add prototypes, clean out dead code, et cetera
|
||||
*
|
||||
* Revision 1.28 1995/12/12 18:10:06 jimz
|
||||
* MIN -> RF_MIN, MAX -> RF_MAX, ASSERT -> RF_ASSERT
|
||||
* fix 80-column brain damage in comments
|
||||
*
|
||||
* Revision 1.27 1995/12/01 16:00:08 root
|
||||
* added copyright info
|
||||
*
|
||||
* Revision 1.26 1995/11/28 21:35:12 amiri
|
||||
* set the RF_BD_DECLUSTERED flag
|
||||
*
|
||||
* Revision 1.25 1995/11/17 18:56:00 wvcii
|
||||
* added prototyping to MapParity
|
||||
*
|
||||
* Revision 1.24 1995/07/04 22:25:33 holland
|
||||
* increased default num bufs
|
||||
*
|
||||
* Revision 1.23 1995/07/03 20:23:51 holland
|
||||
* changed floating recon bufs & head sep yet again
|
||||
*
|
||||
* Revision 1.22 1995/07/03 18:12:14 holland
|
||||
* changed the way the number of floating recon bufs & the head sep
|
||||
* limit are set
|
||||
*
|
||||
* Revision 1.21 1995/07/02 15:07:42 holland
|
||||
* bug fixes related to getting distributed sparing numbers
|
||||
*
|
||||
* Revision 1.20 1995/06/23 13:41:28 robby
|
||||
* updeated to prototypes in rf_layout.h
|
||||
*
|
||||
*/
|
||||
|
||||
#ifdef _KERNEL
|
||||
#define KERNEL
|
||||
#endif
|
||||
|
||||
|
||||
#include "rf_types.h"
|
||||
#include "rf_raid.h"
|
||||
#include "rf_raidframe.h"
|
||||
#include "rf_configure.h"
|
||||
#include "rf_decluster.h"
|
||||
#include "rf_debugMem.h"
|
||||
#include "rf_utils.h"
|
||||
#include "rf_alloclist.h"
|
||||
#include "rf_general.h"
|
||||
#include "rf_shutdown.h"
|
||||
#include "rf_sys.h"
|
||||
|
||||
extern int rf_copyback_in_progress; /* debug only */
|
||||
|
||||
/* found in rf_kintf.c */
|
||||
int rf_GetSpareTableFromDaemon(RF_SparetWait_t *req);
|
||||
|
||||
/* configuration code */
|
||||
|
||||
int rf_ConfigureDeclustered(
|
||||
RF_ShutdownList_t **listp,
|
||||
RF_Raid_t *raidPtr,
|
||||
RF_Config_t *cfgPtr)
|
||||
{
|
||||
RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
|
||||
int b, v, k, r, lambda; /* block design params */
|
||||
int i, j;
|
||||
RF_RowCol_t *first_avail_slot;
|
||||
RF_StripeCount_t complete_FT_count, numCompleteFullTablesPerDisk;
|
||||
RF_DeclusteredConfigInfo_t *info;
|
||||
RF_StripeCount_t PUsPerDisk, spareRegionDepthInPUs, numCompleteSpareRegionsPerDisk, extraPUsPerDisk;
|
||||
RF_StripeCount_t totSparePUsPerDisk;
|
||||
RF_SectorNum_t diskOffsetOfLastFullTableInSUs;
|
||||
RF_SectorCount_t SpareSpaceInSUs;
|
||||
char *cfgBuf = (char *) (cfgPtr->layoutSpecific);
|
||||
RF_StripeNum_t l, SUID;
|
||||
|
||||
SUID = l = 0;
|
||||
numCompleteSpareRegionsPerDisk = 0;
|
||||
|
||||
/* 1. create layout specific structure */
|
||||
RF_MallocAndAdd(info, sizeof(RF_DeclusteredConfigInfo_t), (RF_DeclusteredConfigInfo_t *), raidPtr->cleanupList);
|
||||
if (info == NULL)
|
||||
return(ENOMEM);
|
||||
layoutPtr->layoutSpecificInfo = (void *) info;
|
||||
info->SpareTable = NULL;
|
||||
|
||||
/* 2. extract parameters from the config structure */
|
||||
if (layoutPtr->map->flags & RF_DISTRIBUTE_SPARE) {
|
||||
(void) bcopy(cfgBuf, info->sparemap_fname, RF_SPAREMAP_NAME_LEN);
|
||||
}
|
||||
cfgBuf += RF_SPAREMAP_NAME_LEN;
|
||||
|
||||
b = *( (int *) cfgBuf); cfgBuf += sizeof(int);
|
||||
v = *( (int *) cfgBuf); cfgBuf += sizeof(int);
|
||||
k = *( (int *) cfgBuf); cfgBuf += sizeof(int);
|
||||
r = *( (int *) cfgBuf); cfgBuf += sizeof(int);
|
||||
lambda = *( (int *) cfgBuf); cfgBuf += sizeof(int);
|
||||
raidPtr->noRotate = *( (int *) cfgBuf); cfgBuf += sizeof(int);
|
||||
|
||||
/* the sparemaps are generated assuming that parity is rotated, so we issue
|
||||
* a warning if both distributed sparing and no-rotate are on at the same time
|
||||
*/
|
||||
if ((layoutPtr->map->flags & RF_DISTRIBUTE_SPARE) && raidPtr->noRotate) {
|
||||
RF_ERRORMSG("Warning: distributed sparing specified without parity rotation.\n");
|
||||
}
|
||||
|
||||
if (raidPtr->numCol != v) {
|
||||
RF_ERRORMSG2("RAID: config error: table element count (%d) not equal to no. of cols (%d)\n", v, raidPtr->numCol);
|
||||
return(EINVAL);
|
||||
}
|
||||
|
||||
/* 3. set up the values used in the mapping code */
|
||||
info->BlocksPerTable = b;
|
||||
info->Lambda = lambda;
|
||||
info->NumParityReps = info->groupSize = k;
|
||||
info->SUsPerTable = b * (k-1) * layoutPtr->SUsPerPU;/* b blks, k-1 SUs each */
|
||||
info->SUsPerFullTable = k * info->SUsPerTable; /* rot k times */
|
||||
info->PUsPerBlock = k-1;
|
||||
info->SUsPerBlock = info->PUsPerBlock * layoutPtr->SUsPerPU;
|
||||
info->TableDepthInPUs = (b*k) / v;
|
||||
info->FullTableDepthInPUs = info->TableDepthInPUs * k; /* k repetitions */
|
||||
|
||||
/* used only in distributed sparing case */
|
||||
info->FullTablesPerSpareRegion = (v-1) / rf_gcd(r, v-1); /* (v-1)/gcd fulltables */
|
||||
info->TablesPerSpareRegion = k * info->FullTablesPerSpareRegion;
|
||||
info->SpareSpaceDepthPerRegionInSUs = (r * info->TablesPerSpareRegion / (v-1)) * layoutPtr->SUsPerPU;
|
||||
|
||||
/* check to make sure the block design is sufficiently small */
|
||||
if ((raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) {
|
||||
if (info->FullTableDepthInPUs * layoutPtr->SUsPerPU + info->SpareSpaceDepthPerRegionInSUs > layoutPtr->stripeUnitsPerDisk) {
|
||||
RF_ERRORMSG3("RAID: config error: Full Table depth (%d) + Spare Space (%d) larger than disk size (%d) (BD too big)\n",
|
||||
(int)info->FullTableDepthInPUs,
|
||||
(int)info->SpareSpaceDepthPerRegionInSUs,
|
||||
(int)layoutPtr->stripeUnitsPerDisk);
|
||||
return(EINVAL);
|
||||
}
|
||||
} else {
|
||||
if (info->TableDepthInPUs * layoutPtr->SUsPerPU > layoutPtr->stripeUnitsPerDisk) {
|
||||
RF_ERRORMSG2("RAID: config error: Table depth (%d) larger than disk size (%d) (BD too big)\n",
|
||||
(int)(info->TableDepthInPUs * layoutPtr->SUsPerPU), \
|
||||
(int)layoutPtr->stripeUnitsPerDisk);
|
||||
return(EINVAL);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* compute the size of each disk, and the number of tables in the last fulltable (which
|
||||
* need not be complete)
|
||||
*/
|
||||
if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) {
|
||||
|
||||
PUsPerDisk = layoutPtr->stripeUnitsPerDisk / layoutPtr->SUsPerPU;
|
||||
spareRegionDepthInPUs = (info->TablesPerSpareRegion * info->TableDepthInPUs +
|
||||
(info->TablesPerSpareRegion * info->TableDepthInPUs) / (v-1));
|
||||
info->SpareRegionDepthInSUs = spareRegionDepthInPUs * layoutPtr->SUsPerPU;
|
||||
|
||||
numCompleteSpareRegionsPerDisk = PUsPerDisk / spareRegionDepthInPUs;
|
||||
info->NumCompleteSRs = numCompleteSpareRegionsPerDisk;
|
||||
extraPUsPerDisk = PUsPerDisk % spareRegionDepthInPUs;
|
||||
|
||||
/* assume conservatively that we need the full amount of spare space in one region in order
|
||||
* to provide spares for the partial spare region at the end of the array. We set "i" to
|
||||
* the number of tables in the partial spare region. This may actually include some fulltables.
|
||||
*/
|
||||
extraPUsPerDisk -= (info->SpareSpaceDepthPerRegionInSUs / layoutPtr->SUsPerPU);
|
||||
if (extraPUsPerDisk <= 0) i = 0;
|
||||
else i = extraPUsPerDisk/info->TableDepthInPUs;
|
||||
|
||||
complete_FT_count = raidPtr->numRow * (numCompleteSpareRegionsPerDisk * (info->TablesPerSpareRegion/k) + i/k);
|
||||
info->FullTableLimitSUID = complete_FT_count * info->SUsPerFullTable;
|
||||
info->ExtraTablesPerDisk = i % k;
|
||||
|
||||
/* note that in the last spare region, the spare space is complete even though data/parity space is not */
|
||||
totSparePUsPerDisk = (numCompleteSpareRegionsPerDisk+1) * (info->SpareSpaceDepthPerRegionInSUs / layoutPtr->SUsPerPU);
|
||||
info->TotSparePUsPerDisk = totSparePUsPerDisk;
|
||||
|
||||
layoutPtr->stripeUnitsPerDisk =
|
||||
((complete_FT_count/raidPtr->numRow) * info->FullTableDepthInPUs + /* data & parity space */
|
||||
info->ExtraTablesPerDisk * info->TableDepthInPUs +
|
||||
totSparePUsPerDisk /* spare space */
|
||||
) * layoutPtr->SUsPerPU;
|
||||
layoutPtr->dataStripeUnitsPerDisk =
|
||||
(complete_FT_count * info->FullTableDepthInPUs + info->ExtraTablesPerDisk * info->TableDepthInPUs)
|
||||
* layoutPtr->SUsPerPU * (k-1) / k;
|
||||
|
||||
} else {
|
||||
/* non-dist spare case: force each disk to contain an integral number of tables */
|
||||
layoutPtr->stripeUnitsPerDisk /= (info->TableDepthInPUs * layoutPtr->SUsPerPU);
|
||||
layoutPtr->stripeUnitsPerDisk *= (info->TableDepthInPUs * layoutPtr->SUsPerPU);
|
||||
|
||||
/* compute the number of tables in the last fulltable, which need not be complete */
|
||||
complete_FT_count =
|
||||
((layoutPtr->stripeUnitsPerDisk/layoutPtr->SUsPerPU) / info->FullTableDepthInPUs) * raidPtr->numRow;
|
||||
|
||||
info->FullTableLimitSUID = complete_FT_count * info->SUsPerFullTable;
|
||||
info->ExtraTablesPerDisk =
|
||||
((layoutPtr->stripeUnitsPerDisk/layoutPtr->SUsPerPU) / info->TableDepthInPUs) % k;
|
||||
}
|
||||
|
||||
raidPtr->sectorsPerDisk = layoutPtr->stripeUnitsPerDisk * layoutPtr->sectorsPerStripeUnit;
|
||||
|
||||
/* find the disk offset of the stripe unit where the last fulltable starts */
|
||||
numCompleteFullTablesPerDisk = complete_FT_count / raidPtr->numRow;
|
||||
diskOffsetOfLastFullTableInSUs = numCompleteFullTablesPerDisk * info->FullTableDepthInPUs * layoutPtr->SUsPerPU;
|
||||
if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) {
|
||||
SpareSpaceInSUs = numCompleteSpareRegionsPerDisk * info->SpareSpaceDepthPerRegionInSUs;
|
||||
diskOffsetOfLastFullTableInSUs += SpareSpaceInSUs;
|
||||
info->DiskOffsetOfLastSpareSpaceChunkInSUs =
|
||||
diskOffsetOfLastFullTableInSUs + info->ExtraTablesPerDisk * info->TableDepthInPUs * layoutPtr->SUsPerPU;
|
||||
}
|
||||
info->DiskOffsetOfLastFullTableInSUs = diskOffsetOfLastFullTableInSUs;
|
||||
info->numCompleteFullTablesPerDisk = numCompleteFullTablesPerDisk;
|
||||
|
||||
/* 4. create and initialize the lookup tables */
|
||||
info->LayoutTable = rf_make_2d_array(b, k, raidPtr->cleanupList);
|
||||
if (info->LayoutTable == NULL)
|
||||
return(ENOMEM);
|
||||
info->OffsetTable = rf_make_2d_array(b, k, raidPtr->cleanupList);
|
||||
if (info->OffsetTable == NULL)
|
||||
return(ENOMEM);
|
||||
info->BlockTable = rf_make_2d_array(info->TableDepthInPUs*layoutPtr->SUsPerPU, raidPtr->numCol, raidPtr->cleanupList);
|
||||
if (info->BlockTable == NULL)
|
||||
return(ENOMEM);
|
||||
|
||||
first_avail_slot = rf_make_1d_array(v, NULL);
|
||||
if (first_avail_slot == NULL)
|
||||
return(ENOMEM);
|
||||
|
||||
for (i=0; i<b; i++)
|
||||
for (j=0; j<k; j++)
|
||||
info->LayoutTable[i][j] = *cfgBuf++;
|
||||
|
||||
/* initialize offset table */
|
||||
for (i=0; i<b; i++) for (j=0; j<k; j++) {
|
||||
info->OffsetTable[i][j] = first_avail_slot[ info->LayoutTable[i][j] ];
|
||||
first_avail_slot[ info->LayoutTable[i][j] ]++;
|
||||
}
|
||||
|
||||
/* initialize block table */
|
||||
for (SUID=l=0; l<layoutPtr->SUsPerPU; l++) {
|
||||
for (i=0; i<b; i++) {
|
||||
for (j=0; j<k; j++) {
|
||||
info->BlockTable[ (info->OffsetTable[i][j] * layoutPtr->SUsPerPU) + l ]
|
||||
[ info->LayoutTable[i][j] ] = SUID;
|
||||
}
|
||||
SUID++;
|
||||
}
|
||||
}
|
||||
|
||||
rf_free_1d_array(first_avail_slot, v);
|
||||
|
||||
/* 5. set up the remaining redundant-but-useful parameters */
|
||||
|
||||
raidPtr->totalSectors = (k*complete_FT_count + raidPtr->numRow*info->ExtraTablesPerDisk) *
|
||||
info->SUsPerTable * layoutPtr->sectorsPerStripeUnit;
|
||||
layoutPtr->numStripe = (raidPtr->totalSectors / layoutPtr->sectorsPerStripeUnit) / (k-1);
|
||||
|
||||
/* strange evaluation order below to try and minimize overflow problems */
|
||||
|
||||
layoutPtr->dataSectorsPerStripe = (k-1) * layoutPtr->sectorsPerStripeUnit;
|
||||
layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << raidPtr->logBytesPerSector;
|
||||
layoutPtr->numDataCol = k-1;
|
||||
layoutPtr->numParityCol = 1;
|
||||
|
||||
return(0);
|
||||
}
|
||||
|
||||
/* declustering with distributed sparing */
|
||||
static void rf_ShutdownDeclusteredDS(RF_ThreadArg_t);
|
||||
static void rf_ShutdownDeclusteredDS(arg)
|
||||
RF_ThreadArg_t arg;
|
||||
{
|
||||
RF_DeclusteredConfigInfo_t *info;
|
||||
RF_Raid_t *raidPtr;
|
||||
|
||||
raidPtr = (RF_Raid_t *)arg;
|
||||
info = (RF_DeclusteredConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
|
||||
if (info->SpareTable)
|
||||
rf_FreeSpareTable(raidPtr);
|
||||
}
|
||||
|
||||
int rf_ConfigureDeclusteredDS(
|
||||
RF_ShutdownList_t **listp,
|
||||
RF_Raid_t *raidPtr,
|
||||
RF_Config_t *cfgPtr)
|
||||
{
|
||||
int rc;
|
||||
|
||||
rc = rf_ConfigureDeclustered(listp, raidPtr, cfgPtr);
|
||||
if (rc)
|
||||
return(rc);
|
||||
rc = rf_ShutdownCreate(listp, rf_ShutdownDeclusteredDS, raidPtr);
|
||||
if (rc) {
|
||||
RF_ERRORMSG1("Got %d adding shutdown event for DeclusteredDS\n", rc);
|
||||
rf_ShutdownDeclusteredDS(raidPtr);
|
||||
return(rc);
|
||||
}
|
||||
return(0);
|
||||
}
|
||||
|
||||
void rf_MapSectorDeclustered(raidPtr, raidSector, row, col, diskSector, remap)
|
||||
RF_Raid_t *raidPtr;
|
||||
RF_RaidAddr_t raidSector;
|
||||
RF_RowCol_t *row;
|
||||
RF_RowCol_t *col;
|
||||
RF_SectorNum_t *diskSector;
|
||||
int remap;
|
||||
{
|
||||
RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
|
||||
RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo;
|
||||
RF_StripeNum_t SUID = raidSector / layoutPtr->sectorsPerStripeUnit;
|
||||
RF_StripeNum_t FullTableID, FullTableOffset, TableID, TableOffset;
|
||||
RF_StripeNum_t BlockID, BlockOffset, RepIndex;
|
||||
RF_StripeCount_t sus_per_fulltable = info->SUsPerFullTable;
|
||||
RF_StripeCount_t fulltable_depth = info->FullTableDepthInPUs * layoutPtr->SUsPerPU;
|
||||
RF_StripeNum_t base_suid = 0, outSU, SpareRegion=0, SpareSpace=0;
|
||||
|
||||
rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable, &fulltable_depth, &base_suid);
|
||||
|
||||
FullTableID = SUID / sus_per_fulltable; /* fulltable ID within array (across rows) */
|
||||
if (raidPtr->numRow == 1) *row = 0; /* avoid a mod and a div in the common case */
|
||||
else {
|
||||
*row = FullTableID % raidPtr->numRow;
|
||||
FullTableID /= raidPtr->numRow; /* convert to fulltable ID on this disk */
|
||||
}
|
||||
if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) {
|
||||
SpareRegion = FullTableID / info->FullTablesPerSpareRegion;
|
||||
SpareSpace = SpareRegion * info->SpareSpaceDepthPerRegionInSUs;
|
||||
}
|
||||
FullTableOffset = SUID % sus_per_fulltable;
|
||||
TableID = FullTableOffset / info->SUsPerTable;
|
||||
TableOffset = FullTableOffset - TableID * info->SUsPerTable;
|
||||
BlockID = TableOffset / info->PUsPerBlock;
|
||||
BlockOffset = TableOffset - BlockID * info->PUsPerBlock;
|
||||
BlockID %= info->BlocksPerTable;
|
||||
RepIndex = info->PUsPerBlock - TableID;
|
||||
if (!raidPtr->noRotate) BlockOffset += ((BlockOffset >= RepIndex) ? 1 : 0);
|
||||
*col = info->LayoutTable[BlockID][BlockOffset];
|
||||
|
||||
/* remap to distributed spare space if indicated */
|
||||
if (remap) {
|
||||
RF_ASSERT( raidPtr->Disks[*row][*col].status == rf_ds_reconstructing || raidPtr->Disks[*row][*col].status == rf_ds_dist_spared ||
|
||||
(rf_copyback_in_progress && raidPtr->Disks[*row][*col].status == rf_ds_optimal));
|
||||
rf_remap_to_spare_space(layoutPtr, info, *row, FullTableID, TableID, BlockID, (base_suid) ? 1 : 0, SpareRegion, col, &outSU);
|
||||
} else {
|
||||
|
||||
outSU = base_suid;
|
||||
outSU += FullTableID * fulltable_depth; /* offs to strt of FT */
|
||||
outSU += SpareSpace; /* skip rsvd spare space */
|
||||
outSU += TableID * info->TableDepthInPUs * layoutPtr->SUsPerPU; /* offs to strt of tble */
|
||||
outSU += info->OffsetTable[BlockID][BlockOffset] * layoutPtr->SUsPerPU; /* offs to the PU */
|
||||
}
|
||||
outSU += TableOffset / (info->BlocksPerTable * info->PUsPerBlock); /* offs to the SU within a PU */
|
||||
|
||||
/* convert SUs to sectors, and, if not aligned to SU boundary, add in offset to sector. */
|
||||
*diskSector = outSU*layoutPtr->sectorsPerStripeUnit + (raidSector % layoutPtr->sectorsPerStripeUnit);
|
||||
|
||||
RF_ASSERT( *col != -1 );
|
||||
}
|
||||
|
||||
|
||||
/* prototyping this inexplicably causes the compile of the layout table (rf_layout.c) to fail */
|
||||
void rf_MapParityDeclustered(
|
||||
RF_Raid_t *raidPtr,
|
||||
RF_RaidAddr_t raidSector,
|
||||
RF_RowCol_t *row,
|
||||
RF_RowCol_t *col,
|
||||
RF_SectorNum_t *diskSector,
|
||||
int remap)
|
||||
{
|
||||
RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
|
||||
RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo;
|
||||
RF_StripeNum_t SUID = raidSector / layoutPtr->sectorsPerStripeUnit;
|
||||
RF_StripeNum_t FullTableID, FullTableOffset, TableID, TableOffset;
|
||||
RF_StripeNum_t BlockID, BlockOffset, RepIndex;
|
||||
RF_StripeCount_t sus_per_fulltable = info->SUsPerFullTable;
|
||||
RF_StripeCount_t fulltable_depth = info->FullTableDepthInPUs * layoutPtr->SUsPerPU;
|
||||
RF_StripeNum_t base_suid = 0, outSU, SpareRegion=0, SpareSpace=0;
|
||||
|
||||
rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable, &fulltable_depth, &base_suid);
|
||||
|
||||
/* compute row & (possibly) spare space exactly as before */
|
||||
FullTableID = SUID / sus_per_fulltable;
|
||||
if (raidPtr->numRow == 1) *row = 0; /* avoid a mod and a div in the common case */
|
||||
else {
|
||||
*row = FullTableID % raidPtr->numRow;
|
||||
FullTableID /= raidPtr->numRow; /* convert to fulltable ID on this disk */
|
||||
}
|
||||
if ((raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) {
|
||||
SpareRegion = FullTableID / info->FullTablesPerSpareRegion;
|
||||
SpareSpace = SpareRegion * info->SpareSpaceDepthPerRegionInSUs;
|
||||
}
|
||||
|
||||
/* compute BlockID and RepIndex exactly as before */
|
||||
FullTableOffset = SUID % sus_per_fulltable;
|
||||
TableID = FullTableOffset / info->SUsPerTable;
|
||||
TableOffset = FullTableOffset - TableID * info->SUsPerTable;
|
||||
/*TableOffset = FullTableOffset % info->SUsPerTable;*/
|
||||
/*BlockID = (TableOffset / info->PUsPerBlock) % info->BlocksPerTable;*/
|
||||
BlockID = TableOffset / info->PUsPerBlock;
|
||||
/*BlockOffset = TableOffset % info->PUsPerBlock;*/
|
||||
BlockOffset = TableOffset - BlockID * info->PUsPerBlock;
|
||||
BlockID %= info->BlocksPerTable;
|
||||
|
||||
/* the parity block is in the position indicated by RepIndex */
|
||||
RepIndex = (raidPtr->noRotate) ? info->PUsPerBlock : info->PUsPerBlock - TableID;
|
||||
*col = info->LayoutTable[BlockID][RepIndex];
|
||||
|
||||
if (remap) {
|
||||
RF_ASSERT( raidPtr->Disks[*row][*col].status == rf_ds_reconstructing || raidPtr->Disks[*row][*col].status == rf_ds_dist_spared ||
|
||||
(rf_copyback_in_progress && raidPtr->Disks[*row][*col].status == rf_ds_optimal));
|
||||
rf_remap_to_spare_space(layoutPtr, info, *row, FullTableID, TableID, BlockID, (base_suid) ? 1 : 0, SpareRegion, col, &outSU);
|
||||
} else {
|
||||
|
||||
/* compute sector as before, except use RepIndex instead of BlockOffset */
|
||||
outSU = base_suid;
|
||||
outSU += FullTableID * fulltable_depth;
|
||||
outSU += SpareSpace; /* skip rsvd spare space */
|
||||
outSU += TableID * info->TableDepthInPUs * layoutPtr->SUsPerPU;
|
||||
outSU += info->OffsetTable[BlockID][RepIndex] * layoutPtr->SUsPerPU;
|
||||
}
|
||||
|
||||
outSU += TableOffset / (info->BlocksPerTable * info->PUsPerBlock);
|
||||
*diskSector = outSU*layoutPtr->sectorsPerStripeUnit + (raidSector % layoutPtr->sectorsPerStripeUnit);
|
||||
|
||||
RF_ASSERT( *col != -1 );
|
||||
}
|
||||
|
||||
/* returns an array of ints identifying the disks that comprise the stripe containing the indicated address.
|
||||
* the caller must _never_ attempt to modify this array.
|
||||
*/
|
||||
void rf_IdentifyStripeDeclustered(
|
||||
RF_Raid_t *raidPtr,
|
||||
RF_RaidAddr_t addr,
|
||||
RF_RowCol_t **diskids,
|
||||
RF_RowCol_t *outRow)
|
||||
{
|
||||
RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
|
||||
RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo;
|
||||
RF_StripeCount_t sus_per_fulltable = info->SUsPerFullTable;
|
||||
RF_StripeCount_t fulltable_depth = info->FullTableDepthInPUs * layoutPtr->SUsPerPU;
|
||||
RF_StripeNum_t base_suid = 0;
|
||||
RF_StripeNum_t SUID = rf_RaidAddressToStripeUnitID(layoutPtr, addr);
|
||||
RF_StripeNum_t stripeID, FullTableID;
|
||||
int tableOffset;
|
||||
|
||||
rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable, &fulltable_depth, &base_suid);
|
||||
FullTableID = SUID / sus_per_fulltable; /* fulltable ID within array (across rows) */
|
||||
*outRow = FullTableID % raidPtr->numRow;
|
||||
stripeID = rf_StripeUnitIDToStripeID(layoutPtr, SUID); /* find stripe offset into array */
|
||||
tableOffset = (stripeID % info->BlocksPerTable); /* find offset into block design table */
|
||||
*diskids = info->LayoutTable[tableOffset];
|
||||
}
|
||||
|
||||
/* This returns the default head-separation limit, which is measured
|
||||
* in "required units for reconstruction". Each time a disk fetches
|
||||
* a unit, it bumps a counter. The head-sep code prohibits any disk
|
||||
* from getting more than headSepLimit counter values ahead of any
|
||||
* other.
|
||||
*
|
||||
* We assume here that the number of floating recon buffers is already
|
||||
* set. There are r stripes to be reconstructed in each table, and so
|
||||
* if we have a total of B buffers, we can have at most B/r tables
|
||||
* under recon at any one time. In each table, lambda units are required
|
||||
* from each disk, so given B buffers, the head sep limit has to be
|
||||
* (lambda*B)/r units. We subtract one to avoid weird boundary cases.
|
||||
*
|
||||
* for example, suppose were given 50 buffers, r=19, and lambda=4 as in
|
||||
* the 20.5 design. There are 19 stripes/table to be reconstructed, so
|
||||
* we can have 50/19 tables concurrently under reconstruction, which means
|
||||
* we can allow the fastest disk to get 50/19 tables ahead of the slower
|
||||
* disk. There are lambda "required units" for each disk, so the fastest
|
||||
* disk can get 4*50/19 = 10 counter values ahead of the slowest.
|
||||
*
|
||||
* If numBufsToAccumulate is not 1, we need to limit the head sep further
|
||||
* because multiple bufs will be required for each stripe under recon.
|
||||
*/
|
||||
RF_HeadSepLimit_t rf_GetDefaultHeadSepLimitDeclustered(
|
||||
RF_Raid_t *raidPtr)
|
||||
{
|
||||
RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
|
||||
|
||||
return(info->Lambda * raidPtr->numFloatingReconBufs / info->TableDepthInPUs / rf_numBufsToAccumulate);
|
||||
}
|
||||
|
||||
/* returns the default number of recon buffers to use. The value
|
||||
* is somewhat arbitrary...it's intended to be large enough to allow
|
||||
* for a reasonably large head-sep limit, but small enough that you
|
||||
* don't use up all your system memory with buffers.
|
||||
*/
|
||||
int rf_GetDefaultNumFloatingReconBuffersDeclustered(RF_Raid_t *raidPtr)
|
||||
{
|
||||
return(100 * rf_numBufsToAccumulate);
|
||||
}
|
||||
|
||||
/* sectors in the last fulltable of the array need to be handled
|
||||
* specially since this fulltable can be incomplete. this function
|
||||
* changes the values of certain params to handle this.
|
||||
*
|
||||
* the idea here is that MapSector et. al. figure out which disk the
|
||||
* addressed unit lives on by computing the modulos of the unit number
|
||||
* with the number of units per fulltable, table, etc. In the last
|
||||
* fulltable, there are fewer units per fulltable, so we need to adjust
|
||||
* the number of user data units per fulltable to reflect this.
|
||||
*
|
||||
* so, we (1) convert the fulltable size and depth parameters to
|
||||
* the size of the partial fulltable at the end, (2) compute the
|
||||
* disk sector offset where this fulltable starts, and (3) convert
|
||||
* the users stripe unit number from an offset into the array to
|
||||
* an offset into the last fulltable.
|
||||
*/
|
||||
void rf_decluster_adjust_params(
|
||||
RF_RaidLayout_t *layoutPtr,
|
||||
RF_StripeNum_t *SUID,
|
||||
RF_StripeCount_t *sus_per_fulltable,
|
||||
RF_StripeCount_t *fulltable_depth,
|
||||
RF_StripeNum_t *base_suid)
|
||||
{
|
||||
RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo;
|
||||
#if defined(__NetBSD__) && defined(_KERNEL)
|
||||
/* Nothing! */
|
||||
#else
|
||||
char pc = layoutPtr->map->parityConfig;
|
||||
#endif
|
||||
|
||||
if (*SUID >= info->FullTableLimitSUID) {
|
||||
/* new full table size is size of last full table on disk */
|
||||
*sus_per_fulltable = info->ExtraTablesPerDisk * info->SUsPerTable;
|
||||
|
||||
/* new full table depth is corresponding depth */
|
||||
*fulltable_depth = info->ExtraTablesPerDisk * info->TableDepthInPUs * layoutPtr->SUsPerPU;
|
||||
|
||||
/* set up the new base offset */
|
||||
*base_suid = info->DiskOffsetOfLastFullTableInSUs;
|
||||
|
||||
/* convert users array address to an offset into the last fulltable */
|
||||
*SUID -= info->FullTableLimitSUID;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* map a stripe ID to a parity stripe ID.
|
||||
* See comment above RaidAddressToParityStripeID in layout.c.
|
||||
*/
|
||||
void rf_MapSIDToPSIDDeclustered(
|
||||
RF_RaidLayout_t *layoutPtr,
|
||||
RF_StripeNum_t stripeID,
|
||||
RF_StripeNum_t *psID,
|
||||
RF_ReconUnitNum_t *which_ru)
|
||||
{
|
||||
RF_DeclusteredConfigInfo_t *info;
|
||||
|
||||
info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo;
|
||||
|
||||
*psID = (stripeID / (layoutPtr->SUsPerPU * info->BlocksPerTable))
|
||||
* info->BlocksPerTable + (stripeID % info->BlocksPerTable);
|
||||
*which_ru = (stripeID % (info->BlocksPerTable * layoutPtr->SUsPerPU))
|
||||
/ info->BlocksPerTable;
|
||||
RF_ASSERT( (*which_ru) < layoutPtr->SUsPerPU/layoutPtr->SUsPerRU);
|
||||
}
|
||||
|
||||
/*
|
||||
* Called from MapSector and MapParity to retarget an access at the spare unit.
|
||||
* Modifies the "col" and "outSU" parameters only.
|
||||
*/
|
||||
void rf_remap_to_spare_space(
|
||||
RF_RaidLayout_t *layoutPtr,
|
||||
RF_DeclusteredConfigInfo_t *info,
|
||||
RF_RowCol_t row,
|
||||
RF_StripeNum_t FullTableID,
|
||||
RF_StripeNum_t TableID,
|
||||
RF_SectorNum_t BlockID,
|
||||
RF_StripeNum_t base_suid,
|
||||
RF_StripeNum_t SpareRegion,
|
||||
RF_RowCol_t *outCol,
|
||||
RF_StripeNum_t *outSU)
|
||||
{
|
||||
RF_StripeNum_t ftID, spareTableStartSU, TableInSpareRegion, lastSROffset, which_ft;
|
||||
|
||||
/*
|
||||
* note that FullTableID and hence SpareRegion may have gotten
|
||||
* tweaked by rf_decluster_adjust_params. We detect this by
|
||||
* noticing that base_suid is not 0.
|
||||
*/
|
||||
if (base_suid == 0) {
|
||||
ftID = FullTableID;
|
||||
}
|
||||
else {
|
||||
/*
|
||||
* There may be > 1.0 full tables in the last (i.e. partial)
|
||||
* spare region. find out which of these we're in.
|
||||
*/
|
||||
lastSROffset = info->NumCompleteSRs * info->SpareRegionDepthInSUs;
|
||||
which_ft = (info->DiskOffsetOfLastFullTableInSUs - lastSROffset) / (info->FullTableDepthInPUs * layoutPtr->SUsPerPU);
|
||||
|
||||
/* compute the actual full table ID */
|
||||
ftID = info->DiskOffsetOfLastFullTableInSUs / (info->FullTableDepthInPUs * layoutPtr->SUsPerPU) + which_ft;
|
||||
SpareRegion = info->NumCompleteSRs;
|
||||
}
|
||||
TableInSpareRegion = (ftID * info->NumParityReps + TableID) % info->TablesPerSpareRegion;
|
||||
|
||||
*outCol = info->SpareTable[TableInSpareRegion][BlockID].spareDisk;
|
||||
RF_ASSERT( *outCol != -1);
|
||||
|
||||
spareTableStartSU = (SpareRegion == info->NumCompleteSRs) ?
|
||||
info->DiskOffsetOfLastFullTableInSUs + info->ExtraTablesPerDisk * info->TableDepthInPUs * layoutPtr->SUsPerPU :
|
||||
(SpareRegion+1) * info->SpareRegionDepthInSUs - info->SpareSpaceDepthPerRegionInSUs;
|
||||
*outSU = spareTableStartSU + info->SpareTable[TableInSpareRegion][BlockID].spareBlockOffsetInSUs;
|
||||
if (*outSU >= layoutPtr->stripeUnitsPerDisk) {
|
||||
printf("rf_remap_to_spare_space: invalid remapped disk SU offset %ld\n",(long)*outSU);
|
||||
}
|
||||
}
|
||||
|
||||
int rf_InstallSpareTable(
|
||||
RF_Raid_t *raidPtr,
|
||||
RF_RowCol_t frow,
|
||||
RF_RowCol_t fcol)
|
||||
{
|
||||
RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
|
||||
RF_SparetWait_t *req;
|
||||
int retcode;
|
||||
|
||||
RF_Malloc(req, sizeof(*req), (RF_SparetWait_t *));
|
||||
req->C = raidPtr->numCol;
|
||||
req->G = raidPtr->Layout.numDataCol + raidPtr->Layout.numParityCol;
|
||||
req->fcol = fcol;
|
||||
req->SUsPerPU = raidPtr->Layout.SUsPerPU;
|
||||
req->TablesPerSpareRegion = info->TablesPerSpareRegion;
|
||||
req->BlocksPerTable = info->BlocksPerTable;
|
||||
req->TableDepthInPUs = info->TableDepthInPUs;
|
||||
req->SpareSpaceDepthPerRegionInSUs = info->SpareSpaceDepthPerRegionInSUs;
|
||||
|
||||
#ifndef KERNEL
|
||||
info->SpareTable = rf_ReadSpareTable(req, info->sparemap_fname);
|
||||
RF_Free(req, sizeof(*req));
|
||||
retcode = (info->SpareTable) ? 0 : 1;
|
||||
#else /* !KERNEL */
|
||||
retcode = rf_GetSpareTableFromDaemon(req);
|
||||
RF_ASSERT(!retcode); /* XXX -- fix this to recover gracefully -- XXX */
|
||||
#endif /* !KERNEL */
|
||||
|
||||
return(retcode);
|
||||
}
|
||||
|
||||
#ifdef KERNEL
|
||||
/*
|
||||
* Invoked via ioctl to install a spare table in the kernel.
|
||||
*/
|
||||
int rf_SetSpareTable(raidPtr, data)
|
||||
RF_Raid_t *raidPtr;
|
||||
void *data;
|
||||
{
|
||||
RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
|
||||
RF_SpareTableEntry_t **ptrs;
|
||||
int i, retcode;
|
||||
|
||||
/* what we need to copyin is a 2-d array, so first copyin the user pointers to the rows in the table */
|
||||
RF_Malloc(ptrs, info->TablesPerSpareRegion * sizeof(RF_SpareTableEntry_t *), (RF_SpareTableEntry_t **));
|
||||
retcode = copyin((caddr_t) data, (caddr_t) ptrs, info->TablesPerSpareRegion * sizeof(RF_SpareTableEntry_t *));
|
||||
|
||||
if (retcode) return(retcode);
|
||||
|
||||
/* now allocate kernel space for the row pointers */
|
||||
RF_Malloc(info->SpareTable, info->TablesPerSpareRegion * sizeof(RF_SpareTableEntry_t *), (RF_SpareTableEntry_t **));
|
||||
|
||||
/* now allocate kernel space for each row in the table, and copy it in from user space */
|
||||
for (i=0; i<info->TablesPerSpareRegion; i++) {
|
||||
RF_Malloc(info->SpareTable[i], info->BlocksPerTable * sizeof(RF_SpareTableEntry_t), (RF_SpareTableEntry_t *));
|
||||
retcode = copyin(ptrs[i], info->SpareTable[i], info->BlocksPerTable * sizeof(RF_SpareTableEntry_t));
|
||||
if (retcode) {
|
||||
info->SpareTable = NULL; /* blow off the memory we've allocated */
|
||||
return(retcode);
|
||||
}
|
||||
}
|
||||
|
||||
/* free up the temporary array we used */
|
||||
RF_Free(ptrs, info->TablesPerSpareRegion * sizeof(RF_SpareTableEntry_t *));
|
||||
|
||||
return(0);
|
||||
}
|
||||
#endif /* KERNEL */
|
||||
|
||||
RF_ReconUnitCount_t rf_GetNumSpareRUsDeclustered(raidPtr)
|
||||
RF_Raid_t *raidPtr;
|
||||
{
|
||||
RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
|
||||
|
||||
return( ((RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo)->TotSparePUsPerDisk );
|
||||
}
|
||||
|
||||
|
||||
void rf_FreeSpareTable(raidPtr)
|
||||
RF_Raid_t *raidPtr;
|
||||
{
|
||||
long i;
|
||||
RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
|
||||
RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo;
|
||||
RF_SpareTableEntry_t **table = info->SpareTable;
|
||||
|
||||
for (i=0; i<info->TablesPerSpareRegion; i++) {RF_Free(table[i], info->BlocksPerTable * sizeof(RF_SpareTableEntry_t));}
|
||||
RF_Free(table, info->TablesPerSpareRegion * sizeof(RF_SpareTableEntry_t *));
|
||||
info->SpareTable = (RF_SpareTableEntry_t **) NULL;
|
||||
}
|
|
@ -0,0 +1,181 @@
|
|||
/* $NetBSD: rf_decluster.h,v 1.1 1998/11/13 04:20:28 oster Exp $ */
|
||||
/*
|
||||
* Copyright (c) 1995 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Author: Mark Holland
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and
|
||||
* its documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
||||
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*/
|
||||
|
||||
/*----------------------------------------------------------------------
|
||||
*
|
||||
* decluster.h -- header file for declustered layout code
|
||||
*
|
||||
* Adapted from raidSim version July 1994
|
||||
* Created 10-21-92 (MCH)
|
||||
*
|
||||
*--------------------------------------------------------------------*/
|
||||
|
||||
/*
|
||||
* :
|
||||
* Log: rf_decluster.h,v
|
||||
* Revision 1.20 1996/07/29 14:05:12 jimz
|
||||
* fix numPUs/numRUs confusion (everything is now numRUs)
|
||||
* clean up some commenting, return values
|
||||
*
|
||||
* Revision 1.19 1996/07/13 00:00:59 jimz
|
||||
* sanitized generalized reconstruction architecture
|
||||
* cleaned up head sep, rbuf problems
|
||||
*
|
||||
* Revision 1.18 1996/06/19 17:53:48 jimz
|
||||
* move GetNumSparePUs, InstallSpareTable ops into layout switch
|
||||
*
|
||||
* Revision 1.17 1996/06/10 11:55:47 jimz
|
||||
* Straightened out some per-array/not-per-array distinctions, fixed
|
||||
* a couple bugs related to confusion. Added shutdown lists. Removed
|
||||
* layout shutdown function (now subsumed by shutdown lists).
|
||||
*
|
||||
* Revision 1.16 1996/06/09 02:36:46 jimz
|
||||
* lots of little crufty cleanup- fixup whitespace
|
||||
* issues, comment #ifdefs, improve typing in some
|
||||
* places (esp size-related)
|
||||
*
|
||||
* Revision 1.15 1996/06/07 22:26:27 jimz
|
||||
* type-ify which_ru (RF_ReconUnitNum_t)
|
||||
*
|
||||
* Revision 1.14 1996/06/07 21:33:04 jimz
|
||||
* begin using consistent types for sector numbers,
|
||||
* stripe numbers, row+col numbers, recon unit numbers
|
||||
*
|
||||
* Revision 1.13 1996/05/27 18:56:37 jimz
|
||||
* more code cleanup
|
||||
* better typing
|
||||
* compiles in all 3 environments
|
||||
*
|
||||
* Revision 1.12 1996/05/24 01:59:45 jimz
|
||||
* another checkpoint in code cleanup for release
|
||||
* time to sync kernel tree
|
||||
*
|
||||
* Revision 1.11 1996/05/23 21:46:35 jimz
|
||||
* checkpoint in code cleanup (release prep)
|
||||
* lots of types, function names have been fixed
|
||||
*
|
||||
* Revision 1.10 1996/05/18 19:51:34 jimz
|
||||
* major code cleanup- fix syntax, make some types consistent,
|
||||
* add prototypes, clean out dead code, et cetera
|
||||
*
|
||||
* Revision 1.9 1995/12/01 15:58:23 root
|
||||
* added copyright info
|
||||
*
|
||||
* Revision 1.8 1995/11/17 18:57:02 wvcii
|
||||
* added prototyping to MapParity
|
||||
*
|
||||
* Revision 1.7 1995/07/02 15:08:31 holland
|
||||
* bug fixes related to getting distributed sparing numbers
|
||||
*
|
||||
* Revision 1.6 1995/06/23 13:41:18 robby
|
||||
* updeated to prototypes in rf_layout.h
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _RF__RF_DECLUSTER_H_
|
||||
#define _RF__RF_DECLUSTER_H_
|
||||
|
||||
#include "rf_types.h"
|
||||
|
||||
/*
|
||||
* These structures define the tables used to locate the spare unit
|
||||
* associated with a particular data or parity unit, and to perform
|
||||
* the associated inverse mapping.
|
||||
*/
|
||||
struct RF_SpareTableEntry_s {
|
||||
u_int spareDisk; /* disk to which this block is spared */
|
||||
u_int spareBlockOffsetInSUs; /* offset into spare table for that disk */
|
||||
};
|
||||
|
||||
#define RF_SPAREMAP_NAME_LEN 128
|
||||
|
||||
/* this is the layout-specific info structure for the declustered layout.
|
||||
*/
|
||||
struct RF_DeclusteredConfigInfo_s {
|
||||
RF_StripeCount_t groupSize; /* no. of stripe units per parity stripe */
|
||||
RF_RowCol_t **LayoutTable; /* the block design table */
|
||||
RF_RowCol_t **OffsetTable; /* the sector offset table */
|
||||
RF_RowCol_t **BlockTable; /* the block membership table */
|
||||
RF_StripeCount_t SUsPerFullTable; /* stripe units per full table */
|
||||
RF_StripeCount_t SUsPerTable; /* stripe units per table */
|
||||
RF_StripeCount_t PUsPerBlock; /* parity units per block */
|
||||
RF_StripeCount_t SUsPerBlock; /* stripe units per block */
|
||||
RF_StripeCount_t BlocksPerTable; /* block design tuples per table */
|
||||
RF_StripeCount_t NumParityReps; /* tables per full table */
|
||||
RF_StripeCount_t TableDepthInPUs; /* PUs on one disk in 1 table */
|
||||
RF_StripeCount_t FullTableDepthInPUs; /* PUs on one disk in 1 fulltable */
|
||||
RF_StripeCount_t FullTableLimitSUID; /* SU where partial fulltables start */
|
||||
RF_StripeCount_t ExtraTablesPerDisk; /* # of tables in last fulltable */
|
||||
RF_SectorNum_t DiskOffsetOfLastFullTableInSUs; /* disk offs of partial ft, if any */
|
||||
RF_StripeCount_t numCompleteFullTablesPerDisk; /* ft identifier of partial ft, if any */
|
||||
u_int Lambda; /* the pair count in the block design */
|
||||
|
||||
/* these are used only in the distributed-sparing case */
|
||||
RF_StripeCount_t FullTablesPerSpareRegion; /* # of ft's comprising 1 spare region */
|
||||
RF_StripeCount_t TablesPerSpareRegion; /* # of tables */
|
||||
RF_SectorCount_t SpareSpaceDepthPerRegionInSUs; /* spare space/disk/region */
|
||||
RF_SectorCount_t SpareRegionDepthInSUs; /* # of units/disk/region */
|
||||
RF_SectorNum_t DiskOffsetOfLastSpareSpaceChunkInSUs; /* locates sp space after partial ft */
|
||||
RF_StripeCount_t TotSparePUsPerDisk; /* total number of spare PUs per disk */
|
||||
RF_StripeCount_t NumCompleteSRs;
|
||||
RF_SpareTableEntry_t **SpareTable; /* remap table for spare space */
|
||||
char sparemap_fname[RF_SPAREMAP_NAME_LEN]; /* where to find sparemap. not used in kernel */
|
||||
};
|
||||
|
||||
int rf_ConfigureDeclustered(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
|
||||
RF_Config_t *cfgPtr);
|
||||
int rf_ConfigureDeclusteredDS(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
|
||||
RF_Config_t *cfgPtr);
|
||||
|
||||
void rf_MapSectorDeclustered(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector,
|
||||
RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap);
|
||||
void rf_MapParityDeclustered(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector,
|
||||
RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap);
|
||||
void rf_IdentifyStripeDeclustered(RF_Raid_t *raidPtr, RF_RaidAddr_t addr,
|
||||
RF_RowCol_t **diskids, RF_RowCol_t *outRow);
|
||||
void rf_MapSIDToPSIDDeclustered(RF_RaidLayout_t *layoutPtr,
|
||||
RF_StripeNum_t stripeID, RF_StripeNum_t *psID,
|
||||
RF_ReconUnitNum_t *which_ru);
|
||||
int rf_InstallSpareTable(RF_Raid_t *raidPtr, RF_RowCol_t frow, RF_RowCol_t fcol);
|
||||
void rf_FreeSpareTable(RF_Raid_t *raidPtr);
|
||||
|
||||
RF_HeadSepLimit_t rf_GetDefaultHeadSepLimitDeclustered(RF_Raid_t *raidPtr);
|
||||
int rf_GetDefaultNumFloatingReconBuffersDeclustered(RF_Raid_t *raidPtr);
|
||||
|
||||
void rf_decluster_adjust_params(RF_RaidLayout_t *layoutPtr,
|
||||
RF_StripeNum_t *SUID, RF_StripeCount_t *sus_per_fulltable,
|
||||
RF_StripeCount_t *fulltable_depth, RF_StripeNum_t *base_suid);
|
||||
void rf_remap_to_spare_space(
|
||||
RF_RaidLayout_t *layoutPtr,
|
||||
RF_DeclusteredConfigInfo_t *info, RF_RowCol_t row, RF_StripeNum_t FullTableID,
|
||||
RF_StripeNum_t TableID, RF_SectorNum_t BlockID, RF_StripeNum_t base_suid,
|
||||
RF_StripeNum_t SpareRegion, RF_RowCol_t *outCol, RF_StripeNum_t *outSU);
|
||||
int rf_SetSpareTable(RF_Raid_t *raidPtr, void *data);
|
||||
RF_ReconUnitCount_t rf_GetNumSpareRUsDeclustered(RF_Raid_t *raidPtr);
|
||||
|
||||
#endif /* !_RF__RF_DECLUSTER_H_ */
|
|
@ -0,0 +1,588 @@
|
|||
/* $NetBSD: rf_declusterPQ.c,v 1.1 1998/11/13 04:20:28 oster Exp $ */
|
||||
/*
|
||||
* Copyright (c) 1995 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Authors: Daniel Stodolsky, Mark Holland, Jim Zelenka
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and
|
||||
* its documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
||||
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*/
|
||||
|
||||
/*--------------------------------------------------
|
||||
* rf_declusterPQ.c
|
||||
*
|
||||
* mapping code for declustered P & Q or declustered EvenOdd
|
||||
* much code borrowed from rf_decluster.c
|
||||
*
|
||||
*--------------------------------------------------*/
|
||||
|
||||
/*
|
||||
* $Header: /cvsroot/src/sys/dev/raidframe/rf_declusterPQ.c,v 1.1 1998/11/13 04:20:28 oster Exp $
|
||||
*
|
||||
* Log: rf_declusterPQ.c,v
|
||||
* Revision 1.34 1996/08/21 19:47:14 jimz
|
||||
* fix bogus return values from config
|
||||
*
|
||||
* Revision 1.33 1996/08/21 15:09:16 jimz
|
||||
* cleanup debugging spoo
|
||||
*
|
||||
* Revision 1.32 1996/08/21 04:13:36 jimz
|
||||
* debug with EvenOdd
|
||||
*
|
||||
* Revision 1.31 1996/08/20 22:41:54 jimz
|
||||
* 2 parity disks, not 1
|
||||
*
|
||||
* Revision 1.30 1996/07/31 16:56:18 jimz
|
||||
* dataBytesPerStripe, sectorsPerDisk init arch-indep.
|
||||
*
|
||||
* Revision 1.29 1996/07/18 22:57:14 jimz
|
||||
* port simulator to AIX
|
||||
*
|
||||
* Revision 1.28 1996/07/13 00:00:59 jimz
|
||||
* sanitized generalized reconstruction architecture
|
||||
* cleaned up head sep, rbuf problems
|
||||
*
|
||||
* Revision 1.27 1996/06/11 08:45:12 jimz
|
||||
* improved error-checking on array configuration
|
||||
*
|
||||
* Revision 1.26 1996/06/10 11:55:47 jimz
|
||||
* Straightened out some per-array/not-per-array distinctions, fixed
|
||||
* a couple bugs related to confusion. Added shutdown lists. Removed
|
||||
* layout shutdown function (now subsumed by shutdown lists).
|
||||
*
|
||||
* Revision 1.25 1996/06/07 21:33:04 jimz
|
||||
* begin using consistent types for sector numbers,
|
||||
* stripe numbers, row+col numbers, recon unit numbers
|
||||
*
|
||||
* Revision 1.24 1996/06/02 17:31:48 jimz
|
||||
* Moved a lot of global stuff into array structure, where it belongs.
|
||||
* Fixed up paritylogging, pss modules in this manner. Some general
|
||||
* code cleanup. Removed lots of dead code, some dead files.
|
||||
*
|
||||
* Revision 1.23 1996/05/30 23:22:16 jimz
|
||||
* bugfixes of serialization, timing problems
|
||||
* more cleanup
|
||||
*
|
||||
* Revision 1.22 1996/05/27 18:56:37 jimz
|
||||
* more code cleanup
|
||||
* better typing
|
||||
* compiles in all 3 environments
|
||||
*
|
||||
* Revision 1.21 1996/05/24 22:17:04 jimz
|
||||
* continue code + namespace cleanup
|
||||
* typed a bunch of flags
|
||||
*
|
||||
* Revision 1.20 1996/05/24 01:59:45 jimz
|
||||
* another checkpoint in code cleanup for release
|
||||
* time to sync kernel tree
|
||||
*
|
||||
* Revision 1.19 1996/05/23 00:33:23 jimz
|
||||
* code cleanup: move all debug decls to rf_options.c, all extern
|
||||
* debug decls to rf_options.h, all debug vars preceded by rf_
|
||||
*
|
||||
* Revision 1.18 1996/05/18 19:51:34 jimz
|
||||
* major code cleanup- fix syntax, make some types consistent,
|
||||
* add prototypes, clean out dead code, et cetera
|
||||
*
|
||||
* Revision 1.17 1996/05/17 00:52:56 jimz
|
||||
* RepIndex was not being initialized before the computation of
|
||||
* RepIndexQ in MapQDeclusteredPQ(). I copied the initialization
|
||||
* from MapParityDeclusteredPQ(). Hope that was right.
|
||||
*
|
||||
* Revision 1.16 1995/12/12 18:10:06 jimz
|
||||
* MIN -> RF_MIN, MAX -> RF_MAX, ASSERT -> RF_ASSERT
|
||||
* fix 80-column brain damage in comments
|
||||
*
|
||||
* Revision 1.15 1995/12/01 15:57:46 root
|
||||
* added copyright info
|
||||
*
|
||||
* Revision 1.14 1995/11/17 19:00:13 wvcii
|
||||
* added prototyping to MapParity
|
||||
* created MapQ
|
||||
*
|
||||
* Revision 1.13 1995/10/05 22:20:48 jimz
|
||||
* free_1d_array() takes two args; provide them both
|
||||
*
|
||||
* Revision 1.12 1995/09/06 19:26:33 wvcii
|
||||
* offset cfgBuf by sparemap length (ConfigureDeclusteredPQ)
|
||||
*
|
||||
* Revision 1.11 95/06/23 13:41:11 robby
|
||||
* updeated to prototypes in rf_layout.h
|
||||
*
|
||||
* Revision 1.10 1995/05/02 22:46:53 holland
|
||||
* minor code cleanups.
|
||||
*
|
||||
* Revision 1.9 1995/03/15 20:45:23 holland
|
||||
* distr sparing changes.
|
||||
*
|
||||
* Revision 1.8 1995/03/01 20:25:48 holland
|
||||
* kernelization changes
|
||||
*
|
||||
* Revision 1.7 1995/02/17 19:39:56 holland
|
||||
* added size param to all calls to Free().
|
||||
* this is ignored at user level, but necessary in the kernel.
|
||||
*
|
||||
* Revision 1.6 1995/02/10 17:34:10 holland
|
||||
* kernelization changes
|
||||
*
|
||||
* Revision 1.5 1995/02/03 22:31:36 holland
|
||||
* many changes related to kernelization
|
||||
*
|
||||
* Revision 1.4 1995/02/01 15:13:05 holland
|
||||
* moved #include of general.h out of raid.h and into each file
|
||||
*
|
||||
* Revision 1.3 1995/02/01 14:25:19 holland
|
||||
* began changes for kernelization:
|
||||
* changed all instances of mutex_t and cond_t to DECLARE macros
|
||||
* converted configuration code to use config structure
|
||||
*
|
||||
* Revision 1.2 1994/11/28 22:13:56 danner
|
||||
* corrected some mapping bugs.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "rf_types.h"
|
||||
#include "rf_raid.h"
|
||||
#include "rf_configure.h"
|
||||
#include "rf_decluster.h"
|
||||
#include "rf_declusterPQ.h"
|
||||
#include "rf_debugMem.h"
|
||||
#include "rf_utils.h"
|
||||
#include "rf_alloclist.h"
|
||||
#include "rf_general.h"
|
||||
|
||||
/* configuration code */
|
||||
|
||||
int rf_ConfigureDeclusteredPQ(
|
||||
RF_ShutdownList_t **listp,
|
||||
RF_Raid_t *raidPtr,
|
||||
RF_Config_t *cfgPtr)
|
||||
{
|
||||
RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
|
||||
int b, v, k, r, lambda; /* block design params */
|
||||
int i, j, l;
|
||||
int *first_avail_slot;
|
||||
int complete_FT_count, SUID;
|
||||
RF_DeclusteredConfigInfo_t *info;
|
||||
int numCompleteFullTablesPerDisk;
|
||||
int PUsPerDisk, spareRegionDepthInPUs, numCompleteSpareRegionsPerDisk = 0, extraPUsPerDisk;
|
||||
int totSparePUsPerDisk;
|
||||
int diskOffsetOfLastFullTableInSUs, SpareSpaceInSUs;
|
||||
char *cfgBuf = (char *) (cfgPtr->layoutSpecific);
|
||||
|
||||
cfgBuf += RF_SPAREMAP_NAME_LEN;
|
||||
|
||||
b = *( (int *) cfgBuf); cfgBuf += sizeof(int);
|
||||
v = *( (int *) cfgBuf); cfgBuf += sizeof(int);
|
||||
k = *( (int *) cfgBuf); cfgBuf += sizeof(int);
|
||||
r = *( (int *) cfgBuf); cfgBuf += sizeof(int);
|
||||
lambda = *( (int *) cfgBuf); cfgBuf += sizeof(int);
|
||||
raidPtr->noRotate = *( (int *) cfgBuf); cfgBuf += sizeof(int);
|
||||
|
||||
if (k <= 2) {
|
||||
printf("RAIDFRAME: k=%d, minimum value 2\n", k);
|
||||
return(EINVAL);
|
||||
}
|
||||
|
||||
/* 1. create layout specific structure */
|
||||
RF_MallocAndAdd(info, sizeof(RF_DeclusteredConfigInfo_t), (RF_DeclusteredConfigInfo_t *), raidPtr->cleanupList);
|
||||
if (info == NULL)
|
||||
return(ENOMEM);
|
||||
layoutPtr->layoutSpecificInfo = (void *) info;
|
||||
|
||||
/* the sparemaps are generated assuming that parity is rotated, so we issue
|
||||
* a warning if both distributed sparing and no-rotate are on at the same time
|
||||
*/
|
||||
if ((raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) && raidPtr->noRotate) {
|
||||
RF_ERRORMSG("Warning: distributed sparing specified without parity rotation.\n");
|
||||
}
|
||||
|
||||
if (raidPtr->numCol != v) {
|
||||
RF_ERRORMSG2("RAID: config error: table element count (%d) not equal to no. of cols (%d)\n", v, raidPtr->numCol);
|
||||
return(EINVAL);
|
||||
}
|
||||
|
||||
/* 3. set up the values used in devRaidMap */
|
||||
info->BlocksPerTable = b;
|
||||
info->NumParityReps = info->groupSize = k;
|
||||
info->PUsPerBlock = k-2; /* PQ */
|
||||
info->SUsPerTable = b * info->PUsPerBlock * layoutPtr->SUsPerPU;/* b blks, k-1 SUs each */
|
||||
info->SUsPerFullTable = k * info->SUsPerTable; /* rot k times */
|
||||
info->SUsPerBlock = info->PUsPerBlock * layoutPtr->SUsPerPU;
|
||||
info->TableDepthInPUs = (b*k) / v;
|
||||
info->FullTableDepthInPUs = info->TableDepthInPUs * k; /* k repetitions */
|
||||
|
||||
/* used only in distributed sparing case */
|
||||
info->FullTablesPerSpareRegion = (v-1) / rf_gcd(r, v-1); /* (v-1)/gcd fulltables */
|
||||
info->TablesPerSpareRegion = k * info->FullTablesPerSpareRegion;
|
||||
info->SpareSpaceDepthPerRegionInSUs = (r * info->TablesPerSpareRegion / (v-1)) * layoutPtr->SUsPerPU;
|
||||
|
||||
/* check to make sure the block design is sufficiently small */
|
||||
if ((raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) {
|
||||
if (info->FullTableDepthInPUs * layoutPtr->SUsPerPU + info->SpareSpaceDepthPerRegionInSUs > layoutPtr->stripeUnitsPerDisk) {
|
||||
RF_ERRORMSG3("RAID: config error: Full Table depth (%d) + Spare Space (%d) larger than disk size (%d) (BD too big)\n",
|
||||
(int)info->FullTableDepthInPUs,
|
||||
(int)info->SpareSpaceDepthPerRegionInSUs,
|
||||
(int)layoutPtr->stripeUnitsPerDisk);
|
||||
return(EINVAL);
|
||||
}
|
||||
} else {
|
||||
if (info->TableDepthInPUs * layoutPtr->SUsPerPU > layoutPtr->stripeUnitsPerDisk) {
|
||||
RF_ERRORMSG2("RAID: config error: Table depth (%d) larger than disk size (%d) (BD too big)\n",
|
||||
(int)(info->TableDepthInPUs * layoutPtr->SUsPerPU),
|
||||
(int)layoutPtr->stripeUnitsPerDisk);
|
||||
return(EINVAL);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* compute the size of each disk, and the number of tables in the last fulltable (which
|
||||
* need not be complete)
|
||||
*/
|
||||
if ((raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) {
|
||||
|
||||
PUsPerDisk = layoutPtr->stripeUnitsPerDisk / layoutPtr->SUsPerPU;
|
||||
spareRegionDepthInPUs = (info->TablesPerSpareRegion * info->TableDepthInPUs +
|
||||
(info->TablesPerSpareRegion * info->TableDepthInPUs) / (v-1));
|
||||
info->SpareRegionDepthInSUs = spareRegionDepthInPUs * layoutPtr->SUsPerPU;
|
||||
|
||||
numCompleteSpareRegionsPerDisk = PUsPerDisk / spareRegionDepthInPUs;
|
||||
info->NumCompleteSRs = numCompleteSpareRegionsPerDisk;
|
||||
extraPUsPerDisk = PUsPerDisk % spareRegionDepthInPUs;
|
||||
|
||||
/* assume conservatively that we need the full amount of spare space in one region in order
|
||||
* to provide spares for the partial spare region at the end of the array. We set "i" to
|
||||
* the number of tables in the partial spare region. This may actually include some fulltables.
|
||||
*/
|
||||
extraPUsPerDisk -= (info->SpareSpaceDepthPerRegionInSUs / layoutPtr->SUsPerPU);
|
||||
if (extraPUsPerDisk <= 0) i = 0;
|
||||
else i = extraPUsPerDisk/info->TableDepthInPUs;
|
||||
|
||||
complete_FT_count = raidPtr->numRow * (numCompleteSpareRegionsPerDisk * (info->TablesPerSpareRegion/k) + i/k);
|
||||
info->FullTableLimitSUID = complete_FT_count * info->SUsPerFullTable;
|
||||
info->ExtraTablesPerDisk = i % k;
|
||||
|
||||
/* note that in the last spare region, the spare space is complete even though data/parity space is not */
|
||||
totSparePUsPerDisk = (numCompleteSpareRegionsPerDisk+1) * (info->SpareSpaceDepthPerRegionInSUs / layoutPtr->SUsPerPU);
|
||||
info->TotSparePUsPerDisk = totSparePUsPerDisk;
|
||||
|
||||
layoutPtr->stripeUnitsPerDisk =
|
||||
((complete_FT_count/raidPtr->numRow) * info->FullTableDepthInPUs + /* data & parity space */
|
||||
info->ExtraTablesPerDisk * info->TableDepthInPUs +
|
||||
totSparePUsPerDisk /* spare space */
|
||||
) * layoutPtr->SUsPerPU;
|
||||
layoutPtr->dataStripeUnitsPerDisk =
|
||||
(complete_FT_count * info->FullTableDepthInPUs + info->ExtraTablesPerDisk * info->TableDepthInPUs)
|
||||
* layoutPtr->SUsPerPU * (k-1) / k;
|
||||
|
||||
} else {
|
||||
/* non-dist spare case: force each disk to contain an integral number of tables */
|
||||
layoutPtr->stripeUnitsPerDisk /= (info->TableDepthInPUs * layoutPtr->SUsPerPU);
|
||||
layoutPtr->stripeUnitsPerDisk *= (info->TableDepthInPUs * layoutPtr->SUsPerPU);
|
||||
|
||||
/* compute the number of tables in the last fulltable, which need not be complete */
|
||||
complete_FT_count =
|
||||
((layoutPtr->stripeUnitsPerDisk/layoutPtr->SUsPerPU) / info->FullTableDepthInPUs) * raidPtr->numRow;
|
||||
|
||||
info->FullTableLimitSUID = complete_FT_count * info->SUsPerFullTable;
|
||||
info->ExtraTablesPerDisk =
|
||||
((layoutPtr->stripeUnitsPerDisk/layoutPtr->SUsPerPU) / info->TableDepthInPUs) % k;
|
||||
}
|
||||
|
||||
raidPtr->sectorsPerDisk = layoutPtr->stripeUnitsPerDisk * layoutPtr->sectorsPerStripeUnit;
|
||||
|
||||
/* find the disk offset of the stripe unit where the last fulltable starts */
|
||||
numCompleteFullTablesPerDisk = complete_FT_count / raidPtr->numRow;
|
||||
diskOffsetOfLastFullTableInSUs = numCompleteFullTablesPerDisk * info->FullTableDepthInPUs * layoutPtr->SUsPerPU;
|
||||
if ((raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) {
|
||||
SpareSpaceInSUs = numCompleteSpareRegionsPerDisk * info->SpareSpaceDepthPerRegionInSUs;
|
||||
diskOffsetOfLastFullTableInSUs += SpareSpaceInSUs;
|
||||
info->DiskOffsetOfLastSpareSpaceChunkInSUs =
|
||||
diskOffsetOfLastFullTableInSUs + info->ExtraTablesPerDisk * info->TableDepthInPUs * layoutPtr->SUsPerPU;
|
||||
}
|
||||
info->DiskOffsetOfLastFullTableInSUs = diskOffsetOfLastFullTableInSUs;
|
||||
info->numCompleteFullTablesPerDisk = numCompleteFullTablesPerDisk;
|
||||
|
||||
/* 4. create and initialize the lookup tables */
|
||||
info->LayoutTable = rf_make_2d_array(b, k, raidPtr->cleanupList);
|
||||
if (info->LayoutTable == NULL)
|
||||
return(ENOMEM);
|
||||
info->OffsetTable = rf_make_2d_array(b, k, raidPtr->cleanupList);
|
||||
if (info->OffsetTable == NULL)
|
||||
return(ENOMEM);
|
||||
info->BlockTable = rf_make_2d_array(info->TableDepthInPUs*layoutPtr->SUsPerPU, raidPtr->numCol, raidPtr->cleanupList);
|
||||
if (info->BlockTable == NULL)
|
||||
return(ENOMEM);
|
||||
|
||||
first_avail_slot = (int *) rf_make_1d_array(v, NULL);
|
||||
if (first_avail_slot == NULL)
|
||||
return(ENOMEM);
|
||||
|
||||
for (i=0; i<b; i++)
|
||||
for (j=0; j<k; j++)
|
||||
info->LayoutTable[i][j] = *cfgBuf++;
|
||||
|
||||
/* initialize offset table */
|
||||
for (i=0; i<b; i++) for (j=0; j<k; j++) {
|
||||
info->OffsetTable[i][j] = first_avail_slot[ info->LayoutTable[i][j] ];
|
||||
first_avail_slot[ info->LayoutTable[i][j] ]++;
|
||||
}
|
||||
|
||||
/* initialize block table */
|
||||
for (SUID=l=0; l<layoutPtr->SUsPerPU; l++) {
|
||||
for (i=0; i<b; i++) {
|
||||
for (j=0; j<k; j++) {
|
||||
info->BlockTable[ (info->OffsetTable[i][j] * layoutPtr->SUsPerPU) + l ]
|
||||
[ info->LayoutTable[i][j] ] = SUID;
|
||||
}
|
||||
SUID++;
|
||||
}
|
||||
}
|
||||
|
||||
rf_free_1d_array(first_avail_slot, v);
|
||||
|
||||
/* 5. set up the remaining redundant-but-useful parameters */
|
||||
|
||||
raidPtr->totalSectors = (k*complete_FT_count + raidPtr->numRow*info->ExtraTablesPerDisk) *
|
||||
info->SUsPerTable * layoutPtr->sectorsPerStripeUnit;
|
||||
layoutPtr->numStripe = (raidPtr->totalSectors / layoutPtr->sectorsPerStripeUnit) / (k-2);
|
||||
|
||||
/* strange evaluation order below to try and minimize overflow problems */
|
||||
|
||||
layoutPtr->dataSectorsPerStripe = (k-2) * layoutPtr->sectorsPerStripeUnit;
|
||||
layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << raidPtr->logBytesPerSector;
|
||||
layoutPtr->numDataCol = k-2;
|
||||
layoutPtr->numParityCol = 2;
|
||||
|
||||
return(0);
|
||||
}
|
||||
|
||||
int rf_GetDefaultNumFloatingReconBuffersPQ(RF_Raid_t *raidPtr)
|
||||
{
|
||||
int def_decl;
|
||||
|
||||
def_decl = rf_GetDefaultNumFloatingReconBuffersDeclustered(raidPtr);
|
||||
return(RF_MAX(3 * raidPtr->numCol, def_decl));
|
||||
}
|
||||
|
||||
void rf_MapSectorDeclusteredPQ(
|
||||
RF_Raid_t *raidPtr,
|
||||
RF_RaidAddr_t raidSector,
|
||||
RF_RowCol_t *row,
|
||||
RF_RowCol_t *col,
|
||||
RF_SectorNum_t *diskSector,
|
||||
int remap)
|
||||
{
|
||||
RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
|
||||
RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo;
|
||||
RF_StripeNum_t SUID = raidSector / layoutPtr->sectorsPerStripeUnit;
|
||||
RF_StripeNum_t FullTableID, FullTableOffset, TableID, TableOffset;
|
||||
RF_StripeNum_t BlockID, BlockOffset, RepIndex;
|
||||
RF_StripeCount_t sus_per_fulltable = info->SUsPerFullTable;
|
||||
RF_StripeCount_t fulltable_depth = info->FullTableDepthInPUs * layoutPtr->SUsPerPU;
|
||||
RF_StripeNum_t base_suid = 0, outSU, SpareRegion=0, SpareSpace=0;
|
||||
|
||||
rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable, &fulltable_depth, &base_suid);
|
||||
|
||||
FullTableID = SUID / sus_per_fulltable; /* fulltable ID within array (across rows) */
|
||||
*row = FullTableID % raidPtr->numRow;
|
||||
FullTableID /= raidPtr->numRow; /* convert to fulltable ID on this disk */
|
||||
if ((raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) {
|
||||
SpareRegion = FullTableID / info->FullTablesPerSpareRegion;
|
||||
SpareSpace = SpareRegion * info->SpareSpaceDepthPerRegionInSUs;
|
||||
}
|
||||
FullTableOffset = SUID % sus_per_fulltable;
|
||||
TableID = FullTableOffset / info->SUsPerTable;
|
||||
TableOffset = FullTableOffset - TableID * info->SUsPerTable;
|
||||
BlockID = TableOffset / info->PUsPerBlock;
|
||||
BlockOffset = TableOffset - BlockID * info->PUsPerBlock;
|
||||
BlockID %= info->BlocksPerTable;
|
||||
RF_ASSERT(BlockOffset < info->groupSize-2 );
|
||||
/*
|
||||
TableIDs go from 0 .. GroupSize-1 inclusive.
|
||||
PUsPerBlock is k-2.
|
||||
We want the tableIDs to rotate from the
|
||||
right, so use GroupSize
|
||||
*/
|
||||
RepIndex = info->groupSize - 1 - TableID;
|
||||
RF_ASSERT(RepIndex >= 0);
|
||||
if (!raidPtr->noRotate)
|
||||
{
|
||||
if (TableID==0)
|
||||
BlockOffset++; /* P on last drive, Q on first */
|
||||
else
|
||||
BlockOffset += ((BlockOffset >= RepIndex) ? 2 : 0); /* skip over PQ */
|
||||
RF_ASSERT(BlockOffset < info->groupSize);
|
||||
*col = info->LayoutTable[BlockID][BlockOffset];
|
||||
}
|
||||
|
||||
/* remap to distributed spare space if indicated */
|
||||
if (remap) {
|
||||
rf_remap_to_spare_space(layoutPtr, info, *row, FullTableID, TableID, BlockID, (base_suid) ? 1 : 0, SpareRegion, col, &outSU);
|
||||
} else {
|
||||
|
||||
outSU = base_suid;
|
||||
outSU += FullTableID * fulltable_depth; /* offs to strt of FT */
|
||||
outSU += SpareSpace; /* skip rsvd spare space */
|
||||
outSU += TableID * info->TableDepthInPUs * layoutPtr->SUsPerPU; /* offs to strt of tble */
|
||||
outSU += info->OffsetTable[BlockID][BlockOffset] * layoutPtr->SUsPerPU; /* offs to the PU */
|
||||
}
|
||||
outSU += TableOffset / (info->BlocksPerTable * info->PUsPerBlock); /* offs to the SU within a PU */
|
||||
|
||||
/* convert SUs to sectors, and, if not aligned to SU boundary, add in offset to sector */
|
||||
*diskSector = outSU*layoutPtr->sectorsPerStripeUnit + (raidSector % layoutPtr->sectorsPerStripeUnit);
|
||||
}
|
||||
|
||||
|
||||
void rf_MapParityDeclusteredPQ(
|
||||
RF_Raid_t *raidPtr,
|
||||
RF_RaidAddr_t raidSector,
|
||||
RF_RowCol_t *row,
|
||||
RF_RowCol_t *col,
|
||||
RF_SectorNum_t *diskSector,
|
||||
int remap)
|
||||
{
|
||||
RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
|
||||
RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo;
|
||||
RF_StripeNum_t SUID = raidSector / layoutPtr->sectorsPerStripeUnit;
|
||||
RF_StripeNum_t FullTableID, FullTableOffset, TableID, TableOffset;
|
||||
RF_StripeNum_t BlockID, BlockOffset, RepIndex;
|
||||
RF_StripeCount_t sus_per_fulltable = info->SUsPerFullTable;
|
||||
RF_StripeCount_t fulltable_depth = info->FullTableDepthInPUs * layoutPtr->SUsPerPU;
|
||||
RF_StripeNum_t base_suid = 0, outSU, SpareRegion, SpareSpace=0;
|
||||
|
||||
rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable, &fulltable_depth, &base_suid);
|
||||
|
||||
/* compute row & (possibly) spare space exactly as before */
|
||||
FullTableID = SUID / sus_per_fulltable;
|
||||
*row = FullTableID % raidPtr->numRow;
|
||||
FullTableID /= raidPtr->numRow; /* convert to fulltable ID on this disk */
|
||||
if ((raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) {
|
||||
SpareRegion = FullTableID / info->FullTablesPerSpareRegion;
|
||||
SpareSpace = SpareRegion * info->SpareSpaceDepthPerRegionInSUs;
|
||||
}
|
||||
|
||||
/* compute BlockID and RepIndex exactly as before */
|
||||
FullTableOffset = SUID % sus_per_fulltable;
|
||||
TableID = FullTableOffset / info->SUsPerTable;
|
||||
TableOffset = FullTableOffset - TableID * info->SUsPerTable;
|
||||
BlockID = TableOffset / info->PUsPerBlock;
|
||||
BlockOffset = TableOffset - BlockID * info->PUsPerBlock;
|
||||
BlockID %= info->BlocksPerTable;
|
||||
|
||||
/* the parity block is in the position indicated by RepIndex */
|
||||
RepIndex = (raidPtr->noRotate) ? info->PUsPerBlock : info->groupSize - 1 - TableID;
|
||||
*col = info->LayoutTable[BlockID][RepIndex];
|
||||
|
||||
if (remap)
|
||||
RF_PANIC();
|
||||
|
||||
/* compute sector as before, except use RepIndex instead of BlockOffset */
|
||||
outSU = base_suid;
|
||||
outSU += FullTableID * fulltable_depth;
|
||||
outSU += SpareSpace; /* skip rsvd spare space */
|
||||
outSU += TableID * info->TableDepthInPUs * layoutPtr->SUsPerPU;
|
||||
outSU += info->OffsetTable[BlockID][RepIndex] * layoutPtr->SUsPerPU;
|
||||
outSU += TableOffset / (info->BlocksPerTable * info->PUsPerBlock);
|
||||
|
||||
*diskSector = outSU*layoutPtr->sectorsPerStripeUnit + (raidSector % layoutPtr->sectorsPerStripeUnit);
|
||||
}
|
||||
|
||||
void rf_MapQDeclusteredPQ(
|
||||
RF_Raid_t *raidPtr,
|
||||
RF_RaidAddr_t raidSector,
|
||||
RF_RowCol_t *row,
|
||||
RF_RowCol_t *col,
|
||||
RF_SectorNum_t *diskSector,
|
||||
int remap)
|
||||
{
|
||||
RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
|
||||
RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo;
|
||||
RF_StripeNum_t SUID = raidSector / layoutPtr->sectorsPerStripeUnit;
|
||||
RF_StripeNum_t FullTableID, FullTableOffset, TableID, TableOffset;
|
||||
RF_StripeNum_t BlockID, BlockOffset, RepIndex, RepIndexQ;
|
||||
RF_StripeCount_t sus_per_fulltable = info->SUsPerFullTable;
|
||||
RF_StripeCount_t fulltable_depth = info->FullTableDepthInPUs * layoutPtr->SUsPerPU;
|
||||
RF_StripeNum_t base_suid = 0, outSU, SpareRegion, SpareSpace=0;
|
||||
|
||||
rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable, &fulltable_depth, &base_suid);
|
||||
|
||||
/* compute row & (possibly) spare space exactly as before */
|
||||
FullTableID = SUID / sus_per_fulltable;
|
||||
*row = FullTableID % raidPtr->numRow;
|
||||
FullTableID /= raidPtr->numRow; /* convert to fulltable ID on this disk */
|
||||
if ((raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) {
|
||||
SpareRegion = FullTableID / info->FullTablesPerSpareRegion;
|
||||
SpareSpace = SpareRegion * info->SpareSpaceDepthPerRegionInSUs;
|
||||
}
|
||||
|
||||
/* compute BlockID and RepIndex exactly as before */
|
||||
FullTableOffset = SUID % sus_per_fulltable;
|
||||
TableID = FullTableOffset / info->SUsPerTable;
|
||||
TableOffset = FullTableOffset - TableID * info->SUsPerTable;
|
||||
BlockID = TableOffset / info->PUsPerBlock;
|
||||
BlockOffset = TableOffset - BlockID * info->PUsPerBlock;
|
||||
BlockID %= info->BlocksPerTable;
|
||||
|
||||
/* the q block is in the position indicated by RepIndex */
|
||||
RepIndex = (raidPtr->noRotate) ? info->PUsPerBlock : info->groupSize - 1 - TableID;
|
||||
RepIndexQ = ((RepIndex == (info->groupSize-1)) ? 0 : RepIndex+1);
|
||||
*col = info->LayoutTable[BlockID][RepIndexQ];
|
||||
|
||||
if (remap)
|
||||
RF_PANIC();
|
||||
|
||||
/* compute sector as before, except use RepIndex instead of BlockOffset */
|
||||
outSU = base_suid;
|
||||
outSU += FullTableID * fulltable_depth;
|
||||
outSU += SpareSpace; /* skip rsvd spare space */
|
||||
outSU += TableID * info->TableDepthInPUs * layoutPtr->SUsPerPU;
|
||||
outSU += TableOffset / (info->BlocksPerTable * info->PUsPerBlock);
|
||||
|
||||
outSU += info->OffsetTable[BlockID][RepIndexQ] * layoutPtr->SUsPerPU;
|
||||
*diskSector = outSU*layoutPtr->sectorsPerStripeUnit + (raidSector % layoutPtr->sectorsPerStripeUnit);
|
||||
}
|
||||
|
||||
/* returns an array of ints identifying the disks that comprise the stripe containing the indicated address.
|
||||
* the caller must _never_ attempt to modify this array.
|
||||
*/
|
||||
void rf_IdentifyStripeDeclusteredPQ(
|
||||
RF_Raid_t *raidPtr,
|
||||
RF_RaidAddr_t addr,
|
||||
RF_RowCol_t **diskids,
|
||||
RF_RowCol_t *outRow)
|
||||
{
|
||||
RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
|
||||
RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo;
|
||||
RF_StripeCount_t sus_per_fulltable = info->SUsPerFullTable;
|
||||
RF_StripeCount_t fulltable_depth = info->FullTableDepthInPUs * layoutPtr->SUsPerPU;
|
||||
RF_StripeNum_t base_suid = 0;
|
||||
RF_StripeNum_t SUID = rf_RaidAddressToStripeUnitID(layoutPtr, addr);
|
||||
RF_StripeNum_t stripeID, FullTableID;
|
||||
int tableOffset;
|
||||
|
||||
rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable, &fulltable_depth, &base_suid);
|
||||
FullTableID = SUID / sus_per_fulltable; /* fulltable ID within array (across rows) */
|
||||
*outRow = FullTableID % raidPtr->numRow;
|
||||
stripeID = rf_StripeUnitIDToStripeID(layoutPtr, SUID); /* find stripe offset into array */
|
||||
tableOffset = (stripeID % info->BlocksPerTable); /* find offset into block design table */
|
||||
*diskids = info->LayoutTable[tableOffset];
|
||||
}
|
|
@ -0,0 +1,99 @@
|
|||
/* $NetBSD: rf_declusterPQ.h,v 1.1 1998/11/13 04:20:28 oster Exp $ */
|
||||
/*
|
||||
* Copyright (c) 1995 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Author: Daniel Stodolsky, Mark Holland
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and
|
||||
* its documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
||||
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*/
|
||||
|
||||
/* :
|
||||
* Log: rf_declusterPQ.h,v
|
||||
* Revision 1.13 1996/08/20 22:42:08 jimz
|
||||
* missing prototype of IdentifyStripeDeclusteredPQ added
|
||||
*
|
||||
* Revision 1.12 1996/07/13 00:00:59 jimz
|
||||
* sanitized generalized reconstruction architecture
|
||||
* cleaned up head sep, rbuf problems
|
||||
*
|
||||
* Revision 1.11 1996/06/10 11:55:47 jimz
|
||||
* Straightened out some per-array/not-per-array distinctions, fixed
|
||||
* a couple bugs related to confusion. Added shutdown lists. Removed
|
||||
* layout shutdown function (now subsumed by shutdown lists).
|
||||
*
|
||||
* Revision 1.10 1996/06/07 21:33:04 jimz
|
||||
* begin using consistent types for sector numbers,
|
||||
* stripe numbers, row+col numbers, recon unit numbers
|
||||
*
|
||||
* Revision 1.9 1996/05/27 18:56:37 jimz
|
||||
* more code cleanup
|
||||
* better typing
|
||||
* compiles in all 3 environments
|
||||
*
|
||||
* Revision 1.8 1996/05/24 01:59:45 jimz
|
||||
* another checkpoint in code cleanup for release
|
||||
* time to sync kernel tree
|
||||
*
|
||||
* Revision 1.7 1996/05/18 19:51:34 jimz
|
||||
* major code cleanup- fix syntax, make some types consistent,
|
||||
* add prototypes, clean out dead code, et cetera
|
||||
*
|
||||
* Revision 1.6 1995/12/01 15:59:20 root
|
||||
* added copyright info
|
||||
*
|
||||
* Revision 1.5 1995/11/17 19:08:23 wvcii
|
||||
* added prototyping to MapParity
|
||||
*
|
||||
* Revision 1.4 1995/11/07 15:30:33 wvcii
|
||||
* changed PQDagSelect prototype
|
||||
* function no longer generates numHdrSucc, numTermAnt
|
||||
* removed ParityLoggingDagSelect prototype
|
||||
*
|
||||
* Revision 1.3 1995/06/23 13:40:57 robby
|
||||
* updeated to prototypes in rf_layout.h
|
||||
*
|
||||
* Revision 1.2 1995/05/02 22:46:53 holland
|
||||
* minor code cleanups.
|
||||
*
|
||||
* Revision 1.1 1994/11/19 20:26:57 danner
|
||||
* Initial revision
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _RF__RF_DECLUSTERPQ_H_
|
||||
#define _RF__RF_DECLUSTERPQ_H_
|
||||
|
||||
#include "rf_types.h"
|
||||
|
||||
int rf_ConfigureDeclusteredPQ(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
|
||||
RF_Config_t *cfgPtr);
|
||||
int rf_GetDefaultNumFloatingReconBuffersPQ(RF_Raid_t *raidPtr);
|
||||
void rf_MapSectorDeclusteredPQ(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector,
|
||||
RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap);
|
||||
void rf_MapParityDeclusteredPQ(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector,
|
||||
RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap);
|
||||
void rf_MapQDeclusteredPQ(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector,
|
||||
RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap);
|
||||
void rf_IdentifyStripeDeclusteredPQ(RF_Raid_t *raidPtr, RF_RaidAddr_t addr,
|
||||
RF_RowCol_t **diskids, RF_RowCol_t *outRow);
|
||||
|
||||
#endif /* !_RF__RF_DECLUSTERPQ_H_ */
|
|
@ -0,0 +1,505 @@
|
|||
/* $NetBSD: rf_demo.c,v 1.1 1998/11/13 04:20:28 oster Exp $ */
|
||||
/*
|
||||
* Copyright (c) 1995 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Author: Mark Holland, Khalil Amiri
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and
|
||||
* its documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
||||
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*/
|
||||
|
||||
/**********************************************************************************
|
||||
*
|
||||
* rf_demo.c -- code for supporting demos. this is not actually part of the driver.
|
||||
*
|
||||
**********************************************************************************/
|
||||
|
||||
/* :
|
||||
* Log: rf_demo.c,v
|
||||
* Revision 1.24 1996/06/17 14:38:33 jimz
|
||||
* properly #if out RF_DEMO code
|
||||
* fix bug in MakeConfig that was causing weird behavior
|
||||
* in configuration routines (config was not zeroed at start)
|
||||
* clean up genplot handling of stacks
|
||||
*
|
||||
* Revision 1.23 1996/06/17 03:23:09 jimz
|
||||
* explicitly do pthread stuff (for join)
|
||||
* NOTE: this should be changed!
|
||||
*
|
||||
* Revision 1.22 1996/06/14 23:15:38 jimz
|
||||
* attempt to deal with thread GC problem
|
||||
*
|
||||
* Revision 1.21 1996/06/09 02:36:46 jimz
|
||||
* lots of little crufty cleanup- fixup whitespace
|
||||
* issues, comment #ifdefs, improve typing in some
|
||||
* places (esp size-related)
|
||||
*
|
||||
* Revision 1.20 1996/06/05 18:06:02 jimz
|
||||
* Major code cleanup. The Great Renaming is now done.
|
||||
* Better modularity. Better typing. Fixed a bunch of
|
||||
* synchronization bugs. Made a lot of global stuff
|
||||
* per-desc or per-array. Removed dead code.
|
||||
*
|
||||
* Revision 1.19 1996/05/30 23:22:16 jimz
|
||||
* bugfixes of serialization, timing problems
|
||||
* more cleanup
|
||||
*
|
||||
* Revision 1.18 1996/05/30 11:29:41 jimz
|
||||
* Numerous bug fixes. Stripe lock release code disagreed with the taking code
|
||||
* about when stripes should be locked (I made it consistent: no parity, no lock)
|
||||
* There was a lot of extra serialization of I/Os which I've removed- a lot of
|
||||
* it was to calculate values for the cache code, which is no longer with us.
|
||||
* More types, function, macro cleanup. Added code to properly quiesce the array
|
||||
* on shutdown. Made a lot of stuff array-specific which was (bogusly) general
|
||||
* before. Fixed memory allocation, freeing bugs.
|
||||
*
|
||||
* Revision 1.17 1996/05/27 18:56:37 jimz
|
||||
* more code cleanup
|
||||
* better typing
|
||||
* compiles in all 3 environments
|
||||
*
|
||||
* Revision 1.16 1996/05/23 00:33:23 jimz
|
||||
* code cleanup: move all debug decls to rf_options.c, all extern
|
||||
* debug decls to rf_options.h, all debug vars preceded by rf_
|
||||
*
|
||||
* Revision 1.15 1996/05/20 16:14:08 jimz
|
||||
* switch to rf_{mutex,cond}_{init,destroy}
|
||||
*
|
||||
* Revision 1.14 1996/05/18 19:51:34 jimz
|
||||
* major code cleanup- fix syntax, make some types consistent,
|
||||
* add prototypes, clean out dead code, et cetera
|
||||
*
|
||||
* Revision 1.13 1995/12/01 15:56:07 root
|
||||
* added copyright info
|
||||
*
|
||||
*/
|
||||
|
||||
#include "rf_archs.h"
|
||||
|
||||
#if RF_DEMO > 0
|
||||
|
||||
#include <stdio.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/socket.h>
|
||||
#include <strings.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/time.h>
|
||||
#include <signal.h>
|
||||
|
||||
#include "rf_threadstuff.h"
|
||||
#include "rf_demo.h"
|
||||
#include "rf_utils.h"
|
||||
#include "rf_general.h"
|
||||
#include "rf_options.h"
|
||||
|
||||
#ifdef SIMULATE
|
||||
#include "rf_diskevent.h"
|
||||
#endif /* SIMULATE */
|
||||
|
||||
static int doMax = 0; /* currently no way to set this */
|
||||
|
||||
/****************************************************************************************
|
||||
* fault-free demo code
|
||||
***************************************************************************************/
|
||||
|
||||
static int user_iops_meter = -1;
|
||||
static int disk_iops_meter = -1;
|
||||
static int max_user_meter = -1;
|
||||
static int max_disk_meter = -1;
|
||||
static int recon_pctg_meter = -1;
|
||||
static int avg_resp_time_meter = -1;
|
||||
static int recon_time_meter = -1;
|
||||
static int ff_avg_resp_time_meter = -1;
|
||||
static int deg_avg_resp_time_meter = -1;
|
||||
static int recon_avg_resp_time_meter = -1;
|
||||
static int user_ios_ff=0;
|
||||
static int user_ios_deg=0;
|
||||
static int user_ios_recon=0;
|
||||
static long user_resp_time_sum_ff = 0;
|
||||
static long user_resp_time_sum_deg = 0;
|
||||
static long user_resp_time_sum_recon = 0;
|
||||
|
||||
int rf_demo_op_mode = 0;
|
||||
|
||||
RF_DECLARE_STATIC_MUTEX(iops_mutex)
|
||||
static int user_ios_so_far, disk_ios_so_far, max_user, max_disk;
|
||||
static long user_resp_time_sum_ms;
|
||||
static int recon_pctg;
|
||||
static struct timeval iops_starttime;
|
||||
#ifndef SIMULATE
|
||||
static RF_Thread_t update_thread_desc;
|
||||
#endif /* !SIMULATE */
|
||||
static int meter_update_terminate;
|
||||
|
||||
static int meter_update_interval = 2; /* seconds between meter updates */
|
||||
static int iops_initialized = 0, recon_initialized = 0;
|
||||
|
||||
static char *demoMeterTags[] = {"FF", "Degr", "Recon"};
|
||||
|
||||
static int vpos=0;
|
||||
|
||||
static int rf_CreateMeter(char *title, char *geom, char *color);
|
||||
static void rf_UpdateMeter(int meterid, int value);
|
||||
static void rf_DestroyMeter(int meterid, int killproc);
|
||||
|
||||
void rf_startup_iops_demo(meter_vpos, C, G)
|
||||
int meter_vpos;
|
||||
int C;
|
||||
int G;
|
||||
{
|
||||
char buf[100], title[100];
|
||||
int rc;
|
||||
|
||||
vpos = meter_vpos;
|
||||
sprintf(buf, "%dx%d-0+%d",RF_DEMO_METER_WIDTH, RF_DEMO_METER_HEIGHT, vpos * (RF_DEMO_METER_HEIGHT+RF_DEMO_METER_VSPACE));
|
||||
sprintf(title,"%s %d/%d User IOs/sec",demoMeterTags[rf_demoMeterTag],C,G);
|
||||
user_iops_meter = rf_CreateMeter(title, buf, "black");
|
||||
sprintf(buf, "%dx%d-%d+%d",RF_DEMO_METER_WIDTH, RF_DEMO_METER_HEIGHT, RF_DEMO_METER_WIDTH+RF_DEMO_METER_SPACING,vpos * (RF_DEMO_METER_HEIGHT+RF_DEMO_METER_VSPACE));
|
||||
sprintf(title,"%s %d/%d Disk IOs/sec",demoMeterTags[rf_demoMeterTag],C,G);
|
||||
disk_iops_meter = rf_CreateMeter(title, buf, "red");
|
||||
if (doMax) {
|
||||
sprintf(buf, "%dx%d-%d+%d",RF_DEMO_METER_WIDTH, RF_DEMO_METER_HEIGHT, 2*(RF_DEMO_METER_WIDTH+RF_DEMO_METER_SPACING),vpos * (RF_DEMO_METER_HEIGHT+RF_DEMO_METER_VSPACE));
|
||||
sprintf(title,"%s %d/%d Avg User IOs/s",demoMeterTags[rf_demoMeterTag],C,G);
|
||||
max_user_meter = rf_CreateMeter(title, buf, "black");
|
||||
sprintf(buf, "%dx%d-%d+%d",RF_DEMO_METER_WIDTH, RF_DEMO_METER_HEIGHT, 3*(RF_DEMO_METER_WIDTH+RF_DEMO_METER_SPACING), vpos * (RF_DEMO_METER_HEIGHT+RF_DEMO_METER_VSPACE));
|
||||
sprintf(title,"%s %d/%d Avg Disk IOs/s",demoMeterTags[rf_demoMeterTag],C,G);
|
||||
max_disk_meter = rf_CreateMeter(title, buf, "red");
|
||||
sprintf(buf, "%dx%d-%d+%d",RF_DEMO_METER_WIDTH, RF_DEMO_METER_HEIGHT, 4*(RF_DEMO_METER_WIDTH+RF_DEMO_METER_SPACING), vpos * (RF_DEMO_METER_HEIGHT+RF_DEMO_METER_VSPACE));
|
||||
} else {
|
||||
sprintf(buf, "%dx%d-%d+%d",RF_DEMO_METER_WIDTH, RF_DEMO_METER_HEIGHT, 2*(RF_DEMO_METER_WIDTH+RF_DEMO_METER_SPACING), vpos * (RF_DEMO_METER_HEIGHT+RF_DEMO_METER_VSPACE));
|
||||
}
|
||||
sprintf(title,"%s %d/%d Avg User Resp Time (ms)",demoMeterTags[rf_demoMeterTag],C,G);
|
||||
avg_resp_time_meter = rf_CreateMeter(title, buf, "blue");
|
||||
rc = rf_mutex_init(&iops_mutex);
|
||||
if (rc) {
|
||||
RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__,
|
||||
__LINE__, rc);
|
||||
return;
|
||||
}
|
||||
user_ios_so_far = disk_ios_so_far = max_user = max_disk = 0;
|
||||
user_resp_time_sum_ms = 0;
|
||||
|
||||
meter_update_terminate = 0;
|
||||
#ifndef SIMULATE
|
||||
pthread_create(&update_thread_desc, raidframe_attr_default, (pthread_startroutine_t)rf_meter_update_thread, NULL);
|
||||
#endif /* !SIMULATE */
|
||||
gettimeofday(&iops_starttime, NULL);
|
||||
iops_initialized = 1;
|
||||
}
|
||||
|
||||
|
||||
void rf_update_user_stats(resptime)
|
||||
int resptime;
|
||||
{
|
||||
if (!iops_initialized && !recon_initialized) return;
|
||||
RF_LOCK_MUTEX(iops_mutex);
|
||||
user_ios_so_far++;
|
||||
user_resp_time_sum_ms += resptime;
|
||||
RF_UNLOCK_MUTEX(iops_mutex);
|
||||
}
|
||||
|
||||
void rf_update_disk_iops(val)
|
||||
int val;
|
||||
{
|
||||
if (!iops_initialized) return;
|
||||
RF_LOCK_MUTEX(iops_mutex);
|
||||
disk_ios_so_far += val;
|
||||
RF_UNLOCK_MUTEX(iops_mutex);
|
||||
}
|
||||
|
||||
void rf_meter_update_thread()
|
||||
{
|
||||
struct timeval now, diff;
|
||||
int iops, resptime;
|
||||
float secs;
|
||||
|
||||
#ifndef SIMULATE
|
||||
while (!meter_update_terminate) {
|
||||
gettimeofday(&now, NULL);
|
||||
RF_TIMEVAL_DIFF(&iops_starttime, &now, &diff);
|
||||
secs = ((float) diff.tv_sec) + ((float) diff.tv_usec)/1000000.0;
|
||||
#else /* !SIMULATE */
|
||||
secs = rf_cur_time;
|
||||
#endif /* !SIMULATE */
|
||||
if (user_iops_meter >= 0) {
|
||||
iops = (secs!=0.0) ? (int) (((float) user_ios_so_far) / secs) : 0;
|
||||
rf_UpdateMeter(user_iops_meter, iops);
|
||||
if (max_user_meter && iops > max_user) {max_user = iops; rf_UpdateMeter(max_user_meter, iops);}
|
||||
}
|
||||
|
||||
if (disk_iops_meter >= 0) {
|
||||
iops = (secs!=0.0) ? (int) (((float) disk_ios_so_far) / secs) : 0;
|
||||
rf_UpdateMeter(disk_iops_meter, iops);
|
||||
if (max_disk_meter && iops > max_disk) {max_disk = iops; rf_UpdateMeter(max_disk_meter, iops);}
|
||||
}
|
||||
|
||||
if (recon_pctg_meter >= 0) {
|
||||
rf_UpdateMeter(recon_pctg_meter, recon_pctg);
|
||||
}
|
||||
|
||||
switch (rf_demo_op_mode){
|
||||
case RF_DEMO_FAULT_FREE:
|
||||
resptime = (user_ios_so_far != 0) ? user_resp_time_sum_ms / user_ios_so_far : 0;
|
||||
if (resptime && (ff_avg_resp_time_meter >=0))
|
||||
rf_UpdateMeter(ff_avg_resp_time_meter, resptime);
|
||||
user_ios_ff += user_ios_so_far;
|
||||
user_resp_time_sum_ff += user_resp_time_sum_ms;
|
||||
break;
|
||||
case RF_DEMO_DEGRADED:
|
||||
resptime = (user_ios_so_far != 0) ? user_resp_time_sum_ms / user_ios_so_far : 0;
|
||||
if (resptime &&(deg_avg_resp_time_meter >=0))
|
||||
rf_UpdateMeter(deg_avg_resp_time_meter, resptime);
|
||||
user_ios_deg += user_ios_so_far;
|
||||
user_resp_time_sum_deg += user_resp_time_sum_ms;
|
||||
case RF_DEMO_RECON:
|
||||
resptime = (user_ios_so_far != 0) ? user_resp_time_sum_ms / user_ios_so_far : 0;
|
||||
if (resptime && (recon_avg_resp_time_meter >= 0))
|
||||
rf_UpdateMeter(recon_avg_resp_time_meter, resptime);
|
||||
user_ios_recon += user_ios_so_far;
|
||||
user_resp_time_sum_recon += user_resp_time_sum_ms;
|
||||
break;
|
||||
default: printf("WARNING: demo meter update thread: Invalid op mode! \n");
|
||||
}
|
||||
user_ios_so_far = 0;
|
||||
user_resp_time_sum_ms = 0;
|
||||
#ifndef SIMULATE
|
||||
RF_DELAY_THREAD(1,0);
|
||||
}
|
||||
#endif /* !SIMULATE */
|
||||
}
|
||||
|
||||
void rf_finish_iops_demo()
|
||||
{
|
||||
long status;
|
||||
|
||||
if (!iops_initialized) return;
|
||||
iops_initialized = 0; /* make sure any subsequent update calls don't do anything */
|
||||
meter_update_terminate = 1;
|
||||
#ifndef SIMULATE
|
||||
pthread_join(update_thread_desc, (pthread_addr_t)&status);
|
||||
#endif /* !SIMULATE */
|
||||
|
||||
rf_DestroyMeter(user_iops_meter, (doMax) ? 1 : 0);
|
||||
rf_DestroyMeter(disk_iops_meter, (doMax) ? 1 : 0);
|
||||
rf_DestroyMeter(max_user_meter, 0);
|
||||
rf_DestroyMeter(max_disk_meter, 0);
|
||||
rf_DestroyMeter(avg_resp_time_meter, 0);
|
||||
rf_mutex_destroy(&iops_mutex);
|
||||
}
|
||||
|
||||
void rf_demo_update_mode(arg_mode)
|
||||
int arg_mode;
|
||||
{
|
||||
int hpos;
|
||||
char buf[100], title[100];
|
||||
|
||||
switch (rf_demo_op_mode = arg_mode) {
|
||||
case RF_DEMO_DEGRADED:
|
||||
|
||||
/* freeze fault-free response time meter; create degraded mode meter */
|
||||
hpos=rf_demoMeterHpos+2;
|
||||
sprintf(buf, "%dx%d-%d+%d",RF_DEMO_METER_WIDTH, RF_DEMO_METER_HEIGHT, hpos * (RF_DEMO_METER_WIDTH+RF_DEMO_METER_SPACING), vpos * (RF_DEMO_METER_HEIGHT+RF_DEMO_METER_VSPACE));
|
||||
sprintf(title,"Degraded Mode Average Response Time (ms)",demoMeterTags[rf_demoMeterTag]);
|
||||
deg_avg_resp_time_meter = rf_CreateMeter(title, buf, "purple");
|
||||
rf_UpdateMeter(ff_avg_resp_time_meter, (user_ios_ff == 0)? 0: user_resp_time_sum_ff/user_ios_ff);
|
||||
break;
|
||||
|
||||
case RF_DEMO_RECON:
|
||||
|
||||
/* freeze degraded mode response time meter; create recon meters */
|
||||
hpos = rf_demoMeterHpos+1;
|
||||
sprintf(buf, "%dx%d-%d+%d",RF_DEMO_METER_WIDTH, RF_DEMO_METER_HEIGHT, hpos * (RF_DEMO_METER_WIDTH+RF_DEMO_METER_SPACING), vpos * (RF_DEMO_METER_HEIGHT+RF_DEMO_METER_VSPACE));
|
||||
sprintf(title,"Reconstruction Average Response Time (ms)",demoMeterTags[rf_demoMeterTag]);
|
||||
recon_avg_resp_time_meter = rf_CreateMeter(title, buf, "darkgreen");
|
||||
sprintf(buf, "%dx%d-%d+%d",RF_DEMO_METER_WIDTH, RF_DEMO_METER_HEIGHT, (rf_demoMeterHpos) * (RF_DEMO_METER_WIDTH + RF_DEMO_METER_SPACING), vpos * (RF_DEMO_METER_HEIGHT+RF_DEMO_METER_VSPACE));
|
||||
sprintf(title,"Percent Complete / Recon Time");
|
||||
recon_pctg_meter = rf_CreateMeter(title,buf,"red");
|
||||
rf_UpdateMeter(deg_avg_resp_time_meter, (user_ios_deg == 0)? 0: user_resp_time_sum_deg/user_ios_deg);
|
||||
break;
|
||||
|
||||
default: /*do nothing -- finish_recon_demo will update rest of meters */;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
/****************************************************************************************
|
||||
* reconstruction demo code
|
||||
***************************************************************************************/
|
||||
|
||||
|
||||
void rf_startup_recon_demo(meter_vpos, C, G, init)
|
||||
int meter_vpos;
|
||||
int C;
|
||||
int G;
|
||||
int init;
|
||||
{
|
||||
char buf[100], title[100];
|
||||
int rc;
|
||||
|
||||
vpos = meter_vpos;
|
||||
if (init) {
|
||||
/* init demo -- display ff resp time meter */
|
||||
sprintf(buf, "%dx%d-%d+%d",RF_DEMO_METER_WIDTH, RF_DEMO_METER_HEIGHT, (rf_demoMeterHpos+3) * (RF_DEMO_METER_WIDTH+RF_DEMO_METER_SPACING), vpos * (RF_DEMO_METER_HEIGHT+RF_DEMO_METER_VSPACE));
|
||||
sprintf(title,"%s %d/%d Fault-Free Avg User Resp Time (ms)",demoMeterTags[rf_demoMeterTag],C,G);
|
||||
ff_avg_resp_time_meter = rf_CreateMeter(title, buf, "blue");
|
||||
}
|
||||
rc = rf_mutex_init(&iops_mutex);
|
||||
if (rc) {
|
||||
RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__,
|
||||
__LINE__, rc);
|
||||
}
|
||||
|
||||
meter_update_terminate = 0;
|
||||
#ifndef SIMULATE
|
||||
pthread_create(&update_thread_desc, raidframe_attr_default, (pthread_startroutine_t)rf_meter_update_thread, NULL);
|
||||
#endif /* !SIMULATE */
|
||||
gettimeofday(&iops_starttime, NULL);
|
||||
recon_initialized = 1;
|
||||
}
|
||||
|
||||
void rf_update_recon_meter(val)
|
||||
int val;
|
||||
{
|
||||
recon_pctg = val;
|
||||
}
|
||||
|
||||
|
||||
void rf_finish_recon_demo(etime)
|
||||
struct timeval *etime;
|
||||
{
|
||||
long status;
|
||||
int hpos;
|
||||
|
||||
hpos = rf_demoMeterHpos;
|
||||
|
||||
recon_initialized = 0; /* make sure any subsequent
|
||||
update calls don't do anything */
|
||||
recon_pctg = etime->tv_sec; /* display recon time on meter */
|
||||
|
||||
rf_UpdateMeter(recon_avg_resp_time_meter, (user_ios_recon == 0)? 0: user_resp_time_sum_recon/user_ios_recon);
|
||||
|
||||
rf_UpdateMeter(recon_pctg_meter, etime->tv_sec);
|
||||
|
||||
meter_update_terminate = 1;
|
||||
|
||||
#ifndef SIMULATE
|
||||
pthread_join(update_thread_desc, (pthread_addr_t)&status); /* join the meter update thread */
|
||||
#endif /* !SIMULATE */
|
||||
rf_DestroyMeter(recon_pctg_meter, 0);
|
||||
rf_DestroyMeter(ff_avg_resp_time_meter, 0);
|
||||
rf_DestroyMeter(deg_avg_resp_time_meter, 0);
|
||||
rf_DestroyMeter(recon_avg_resp_time_meter, 0);
|
||||
rf_mutex_destroy(&iops_mutex);
|
||||
}
|
||||
|
||||
|
||||
/****************************************************************************************
|
||||
* meter manipulation code
|
||||
***************************************************************************************/
|
||||
|
||||
#define MAXMETERS 50
|
||||
static struct meter_info { int sd; int pid; char name[100]; } minfo[MAXMETERS];
|
||||
static int meter_num = 0;
|
||||
|
||||
int rf_ConfigureMeters()
|
||||
{
|
||||
int i;
|
||||
for (i=0; i<MAXMETERS; i++)
|
||||
minfo[i].sd = -1;
|
||||
return(0);
|
||||
}
|
||||
|
||||
/* forks a dmeter process to create a 4-digit meter window
|
||||
* "title" appears in the title bar of the meter window
|
||||
* returns an integer handle (really a socket descriptor) by which
|
||||
* the new meter can be accessed.
|
||||
*/
|
||||
static int rf_CreateMeter(title, geom, color)
|
||||
char *title;
|
||||
char *geom;
|
||||
char *color;
|
||||
{
|
||||
char geombuf[100], *clr;
|
||||
int sd, pid, i, status;
|
||||
struct sockaddr sa;
|
||||
|
||||
if (!geom) sprintf(geombuf,"120x40-0+%d", 50*meter_num); else sprintf(geombuf, "%s", geom);
|
||||
clr = (color) ? color : "black";
|
||||
sprintf(minfo[meter_num].name,"/tmp/xm_%d",meter_num);
|
||||
unlink(minfo[meter_num].name);
|
||||
|
||||
if ( !(pid = fork()) ) {
|
||||
execlp("dmeter","dmeter","-noscroll","-t",title,"-geometry",geombuf,"-sa",minfo[meter_num].name,"-fg",clr,NULL);
|
||||
perror("rf_CreateMeter: exec failed");
|
||||
return(-1);
|
||||
}
|
||||
|
||||
sd = socket(AF_UNIX,SOCK_STREAM,0);
|
||||
sa.sa_family = AF_UNIX;
|
||||
strcpy(sa.sa_data, minfo[meter_num].name);
|
||||
for (i=0; i<50; i++) { /* this give us 25 seconds to get the meter running */
|
||||
if ( (status = connect(sd,&sa,sizeof(sa))) != -1) break;
|
||||
#ifdef SIMULATE
|
||||
sleep (1);
|
||||
#else /* SIMULATE */
|
||||
RF_DELAY_THREAD(0, 500);
|
||||
#endif /* SIMULATE */
|
||||
}
|
||||
if (status == -1) {
|
||||
perror("Unable to connect to meter");
|
||||
exit(1);
|
||||
}
|
||||
minfo[meter_num].sd = sd;
|
||||
minfo[meter_num].pid = pid;
|
||||
return(meter_num++);
|
||||
}
|
||||
|
||||
/* causes the meter to display the given value */
|
||||
void rf_UpdateMeter(meterid, value)
|
||||
int meterid;
|
||||
int value;
|
||||
{
|
||||
if (write(minfo[meterid].sd, &value, sizeof(int)) < sizeof(int)) {
|
||||
fprintf(stderr,"Unable to write to meter %d\n",meterid);
|
||||
}
|
||||
}
|
||||
|
||||
void rf_DestroyMeter(meterid, killproc)
|
||||
int meterid;
|
||||
int killproc;
|
||||
{
|
||||
close(minfo[meterid].sd);
|
||||
if (killproc) kill(minfo[meterid].pid, SIGTERM);
|
||||
minfo[meterid].sd = -1;
|
||||
}
|
||||
|
||||
int rf_ShutdownAllMeters()
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i=0; i<MAXMETERS; i++)
|
||||
if (minfo[i].sd >= 0)
|
||||
rf_DestroyMeter(i, 0);
|
||||
return(0);
|
||||
}
|
||||
|
||||
#endif /* RF_DEMO > 0 */
|
|
@ -0,0 +1,82 @@
|
|||
/* $NetBSD: rf_demo.h,v 1.1 1998/11/13 04:20:28 oster Exp $ */
|
||||
/*
|
||||
* Copyright (c) 1995 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Author: Mark Holland, Khalil Amiri
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and
|
||||
* its documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
||||
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*/
|
||||
|
||||
/* rf_demo.h
|
||||
* some constants for demo'ing software
|
||||
*/
|
||||
|
||||
/* :
|
||||
* Log: rf_demo.h,v
|
||||
* Revision 1.8 1996/06/14 23:15:38 jimz
|
||||
* attempt to deal with thread GC problem
|
||||
*
|
||||
* Revision 1.7 1996/06/09 02:36:46 jimz
|
||||
* lots of little crufty cleanup- fixup whitespace
|
||||
* issues, comment #ifdefs, improve typing in some
|
||||
* places (esp size-related)
|
||||
*
|
||||
* Revision 1.6 1996/05/27 18:56:37 jimz
|
||||
* more code cleanup
|
||||
* better typing
|
||||
* compiles in all 3 environments
|
||||
*
|
||||
* Revision 1.5 1996/05/18 19:51:34 jimz
|
||||
* major code cleanup- fix syntax, make some types consistent,
|
||||
* add prototypes, clean out dead code, et cetera
|
||||
*
|
||||
* Revision 1.4 1995/12/01 15:58:53 root
|
||||
* added copyright info
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _RF__RF_DEMO_H_
|
||||
#define _RF__RF_DEMO_H_
|
||||
|
||||
#include "rf_types.h"
|
||||
|
||||
#define RF_DEMO_METER_WIDTH 300 /* how wide each meter is */
|
||||
#define RF_DEMO_METER_HEIGHT 150 /* how tall */
|
||||
#define RF_DEMO_METER_SPACING 15 /* how much space between horizontally */
|
||||
#define RF_DEMO_METER_VSPACE 20 /* how much space between vertically */
|
||||
#define RF_DEMO_FAULT_FREE 0
|
||||
#define RF_DEMO_DEGRADED 1
|
||||
#define RF_DEMO_RECON 2
|
||||
|
||||
void rf_startup_iops_demo(int meter_vpos, int C, int G);
|
||||
void rf_update_user_stats(int resptime);
|
||||
void rf_update_disk_iops(int val);
|
||||
void rf_meter_update_thread(void);
|
||||
void rf_finish_iops_demo(void);
|
||||
void rf_demo_update_mode(int arg_mode);
|
||||
void rf_startup_recon_demo(int meter_vpos, int C, int G, int init);
|
||||
void rf_update_recon_meter(int val);
|
||||
void rf_finish_recon_demo(struct timeval *etime);
|
||||
|
||||
extern int rf_demo_op_mode;
|
||||
|
||||
#endif /* !_RF__RF_DEMO_H_ */
|
|
@ -0,0 +1,180 @@
|
|||
/* $NetBSD: rf_desc.h,v 1.1 1998/11/13 04:20:28 oster Exp $ */
|
||||
/*
|
||||
* Copyright (c) 1995 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Author: Mark Holland
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and
|
||||
* its documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
||||
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*/
|
||||
|
||||
/*
|
||||
* :
|
||||
* Log: rf_desc.h,v
|
||||
* Revision 1.29 1996/07/22 19:52:16 jimz
|
||||
* switched node params to RF_DagParam_t, a union of
|
||||
* a 64-bit int and a void *, for better portability
|
||||
* attempted hpux port, but failed partway through for
|
||||
* lack of a single C compiler capable of compiling all
|
||||
* source files
|
||||
*
|
||||
* Revision 1.28 1996/06/07 22:49:22 jimz
|
||||
* fix up raidPtr typing
|
||||
*
|
||||
* Revision 1.27 1996/06/07 21:33:04 jimz
|
||||
* begin using consistent types for sector numbers,
|
||||
* stripe numbers, row+col numbers, recon unit numbers
|
||||
*
|
||||
* Revision 1.26 1996/06/05 18:06:02 jimz
|
||||
* Major code cleanup. The Great Renaming is now done.
|
||||
* Better modularity. Better typing. Fixed a bunch of
|
||||
* synchronization bugs. Made a lot of global stuff
|
||||
* per-desc or per-array. Removed dead code.
|
||||
*
|
||||
* Revision 1.25 1996/06/02 17:31:48 jimz
|
||||
* Moved a lot of global stuff into array structure, where it belongs.
|
||||
* Fixed up paritylogging, pss modules in this manner. Some general
|
||||
* code cleanup. Removed lots of dead code, some dead files.
|
||||
*
|
||||
* Revision 1.24 1996/05/30 11:29:41 jimz
|
||||
* Numerous bug fixes. Stripe lock release code disagreed with the taking code
|
||||
* about when stripes should be locked (I made it consistent: no parity, no lock)
|
||||
* There was a lot of extra serialization of I/Os which I've removed- a lot of
|
||||
* it was to calculate values for the cache code, which is no longer with us.
|
||||
* More types, function, macro cleanup. Added code to properly quiesce the array
|
||||
* on shutdown. Made a lot of stuff array-specific which was (bogusly) general
|
||||
* before. Fixed memory allocation, freeing bugs.
|
||||
*
|
||||
* Revision 1.23 1996/05/27 18:56:37 jimz
|
||||
* more code cleanup
|
||||
* better typing
|
||||
* compiles in all 3 environments
|
||||
*
|
||||
* Revision 1.22 1996/05/24 22:17:04 jimz
|
||||
* continue code + namespace cleanup
|
||||
* typed a bunch of flags
|
||||
*
|
||||
* Revision 1.21 1996/05/24 04:28:55 jimz
|
||||
* release cleanup ckpt
|
||||
*
|
||||
* Revision 1.20 1996/05/23 21:46:35 jimz
|
||||
* checkpoint in code cleanup (release prep)
|
||||
* lots of types, function names have been fixed
|
||||
*
|
||||
* Revision 1.19 1996/05/23 00:33:23 jimz
|
||||
* code cleanup: move all debug decls to rf_options.c, all extern
|
||||
* debug decls to rf_options.h, all debug vars preceded by rf_
|
||||
*
|
||||
* Revision 1.18 1996/05/18 19:51:34 jimz
|
||||
* major code cleanup- fix syntax, make some types consistent,
|
||||
* add prototypes, clean out dead code, et cetera
|
||||
*
|
||||
* Revision 1.17 1995/12/01 15:58:43 root
|
||||
* added copyright info
|
||||
*
|
||||
* Revision 1.16 1995/11/19 16:31:30 wvcii
|
||||
* descriptors now contain an array of dag lists as opposed to a dag header
|
||||
*
|
||||
* Revision 1.15 1995/11/07 16:24:17 wvcii
|
||||
* updated def of _AccessState
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _RF__RF_DESC_H_
|
||||
#define _RF__RF_DESC_H_
|
||||
|
||||
#include "rf_archs.h"
|
||||
#include "rf_types.h"
|
||||
#include "rf_etimer.h"
|
||||
#include "rf_dag.h"
|
||||
|
||||
struct RF_RaidReconDesc_s {
|
||||
RF_Raid_t *raidPtr; /* raid device descriptor */
|
||||
RF_RowCol_t row; /* row of failed disk */
|
||||
RF_RowCol_t col; /* col of failed disk */
|
||||
int state; /* how far along the reconstruction operation has gotten */
|
||||
RF_RaidDisk_t *spareDiskPtr; /* describes target disk for recon (not used in dist sparing) */
|
||||
int numDisksDone; /* the number of surviving disks that have completed their work */
|
||||
RF_RowCol_t srow; /* row ID of the spare disk (not used in dist sparing) */
|
||||
RF_RowCol_t scol; /* col ID of the spare disk (not used in dist sparing) */
|
||||
#ifdef KERNEL
|
||||
/*
|
||||
* Prevent recon from hogging CPU
|
||||
*/
|
||||
RF_Etimer_t recon_exec_timer;
|
||||
RF_uint64 reconExecTimerRunning;
|
||||
RF_uint64 reconExecTicks;
|
||||
RF_uint64 maxReconExecTicks;
|
||||
#endif /* KERNEL */
|
||||
|
||||
#if RF_RECON_STATS > 0
|
||||
RF_uint64 hsStallCount; /* head sep stall count */
|
||||
RF_uint64 numReconExecDelays;
|
||||
RF_uint64 numReconEventWaits;
|
||||
#endif /* RF_RECON_STATS > 0 */
|
||||
RF_RaidReconDesc_t *next;
|
||||
};
|
||||
|
||||
struct RF_RaidAccessDesc_s {
|
||||
RF_Raid_t *raidPtr; /* raid device descriptor */
|
||||
RF_IoType_t type; /* read or write */
|
||||
RF_RaidAddr_t raidAddress; /* starting address in raid address space */
|
||||
RF_SectorCount_t numBlocks; /* number of blocks (sectors) to transfer */
|
||||
RF_StripeCount_t numStripes; /* number of stripes involved in access */
|
||||
caddr_t bufPtr; /* pointer to data buffer */
|
||||
|
||||
#if !defined(KERNEL) && !defined(SIMULATE)
|
||||
caddr_t obufPtr; /* real pointer to data buffer */
|
||||
#endif /* !KERNEL && !SIMULATE */
|
||||
|
||||
RF_RaidAccessFlags_t flags; /* flags controlling operation */
|
||||
int state; /* index into states telling how far along the RAID operation has gotten */
|
||||
RF_AccessState_t *states; /* array of states to be run */
|
||||
int status; /* pass/fail status of the last operation */
|
||||
RF_DagList_t *dagArray; /* array of dag lists, one list per stripe */
|
||||
RF_AccessStripeMapHeader_t *asmap; /* the asm for this I/O */
|
||||
void *bp; /* buf pointer for this RAID acc. ignored outside the kernel */
|
||||
RF_DagHeader_t **paramDAG; /* allows the DAG to be returned to the caller after I/O completion */
|
||||
RF_AccessStripeMapHeader_t **paramASM; /* allows the ASM to be returned to the caller after I/O completion */
|
||||
RF_AccTraceEntry_t tracerec; /* perf monitoring information for a user access (not for dag stats) */
|
||||
void (*callbackFunc)(RF_CBParam_t); /* callback function for this I/O */
|
||||
void *callbackArg; /* arg to give to callback func */
|
||||
int tid; /* debug only, user-level only: thread id of thr that did this access */
|
||||
|
||||
RF_AllocListElem_t *cleanupList; /* memory to be freed at the end of the access*/
|
||||
|
||||
RF_RaidAccessDesc_t *next;
|
||||
RF_RaidAccessDesc_t *head;
|
||||
|
||||
int numPending;
|
||||
|
||||
RF_DECLARE_MUTEX(mutex) /* these are used to implement blocking I/O */
|
||||
RF_DECLARE_COND(cond)
|
||||
|
||||
#ifdef SIMULATE
|
||||
RF_Owner_t owner;
|
||||
int async_flag;
|
||||
#endif /* SIMULATE */
|
||||
|
||||
RF_Etimer_t timer; /* used for timing this access */
|
||||
};
|
||||
|
||||
#endif /* !_RF__RF_DESC_H_ */
|
|
@ -0,0 +1,290 @@
|
|||
/* $NetBSD: rf_diskevent.c,v 1.1 1998/11/13 04:20:28 oster Exp $ */
|
||||
/*
|
||||
* Copyright (c) 1995 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Author: Rachad Youssef
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and
|
||||
* its documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
||||
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*/
|
||||
|
||||
/*
|
||||
* rf_diskevent. - support for disk device, by managing a heap of future events
|
||||
* adapted from original code by David Kotz, Song Bac Toh (1994)
|
||||
*/
|
||||
|
||||
/* :
|
||||
* Log: rf_diskevent.c,v
|
||||
* Revision 1.18 1996/07/28 20:31:39 jimz
|
||||
* i386netbsd port
|
||||
* true/false fixup
|
||||
*
|
||||
* Revision 1.17 1996/07/27 16:05:19 jimz
|
||||
* return ENOMEM if DDEventInit fails its call to InitHeap
|
||||
*
|
||||
* Revision 1.16 1996/06/10 12:06:24 jimz
|
||||
* fix spelling errors
|
||||
*
|
||||
* Revision 1.15 1996/06/10 11:55:47 jimz
|
||||
* Straightened out some per-array/not-per-array distinctions, fixed
|
||||
* a couple bugs related to confusion. Added shutdown lists. Removed
|
||||
* layout shutdown function (now subsumed by shutdown lists).
|
||||
*
|
||||
* Revision 1.14 1996/06/07 21:33:04 jimz
|
||||
* begin using consistent types for sector numbers,
|
||||
* stripe numbers, row+col numbers, recon unit numbers
|
||||
*
|
||||
* Revision 1.13 1996/06/02 17:31:48 jimz
|
||||
* Moved a lot of global stuff into array structure, where it belongs.
|
||||
* Fixed up paritylogging, pss modules in this manner. Some general
|
||||
* code cleanup. Removed lots of dead code, some dead files.
|
||||
*
|
||||
* Revision 1.12 1996/05/30 11:29:41 jimz
|
||||
* Numerous bug fixes. Stripe lock release code disagreed with the taking code
|
||||
* about when stripes should be locked (I made it consistent: no parity, no lock)
|
||||
* There was a lot of extra serialization of I/Os which I've removed- a lot of
|
||||
* it was to calculate values for the cache code, which is no longer with us.
|
||||
* More types, function, macro cleanup. Added code to properly quiesce the array
|
||||
* on shutdown. Made a lot of stuff array-specific which was (bogusly) general
|
||||
* before. Fixed memory allocation, freeing bugs.
|
||||
*
|
||||
* Revision 1.11 1996/05/27 18:56:37 jimz
|
||||
* more code cleanup
|
||||
* better typing
|
||||
* compiles in all 3 environments
|
||||
*
|
||||
* Revision 1.10 1996/05/24 04:28:55 jimz
|
||||
* release cleanup ckpt
|
||||
*
|
||||
* Revision 1.9 1996/05/23 00:33:23 jimz
|
||||
* code cleanup: move all debug decls to rf_options.c, all extern
|
||||
* debug decls to rf_options.h, all debug vars preceded by rf_
|
||||
*
|
||||
* Revision 1.8 1996/05/18 19:51:34 jimz
|
||||
* major code cleanup- fix syntax, make some types consistent,
|
||||
* add prototypes, clean out dead code, et cetera
|
||||
*
|
||||
* Revision 1.7 1995/12/01 15:57:56 root
|
||||
* added copyright info
|
||||
*
|
||||
*/
|
||||
|
||||
#include "rf_types.h"
|
||||
#include "rf_heap.h"
|
||||
#include "rf_diskevent.h"
|
||||
#include "rf_general.h"
|
||||
#include "rf_dag.h"
|
||||
#include "rf_diskthreads.h"
|
||||
#include "rf_states.h"
|
||||
#include "rf_shutdown.h"
|
||||
|
||||
/* trace printing can be turned on/off in the Makefile */
|
||||
|
||||
RF_TICS_t rf_cur_time;
|
||||
static RF_Owner_t cur_owner;
|
||||
static RF_Heap_t heap;
|
||||
|
||||
static void rf_DDEventShutdown(ignored)
|
||||
void *ignored;
|
||||
{
|
||||
rf_FreeHeap(heap);
|
||||
}
|
||||
|
||||
/* ======================================================================== */
|
||||
/* DDEventInit
|
||||
*
|
||||
* Initialize the event heap.
|
||||
*/
|
||||
int rf_DDEventInit(listp)
|
||||
RF_ShutdownList_t **listp;
|
||||
{
|
||||
int rc;
|
||||
|
||||
heap = rf_InitHeap(RF_HEAP_MAX); /* initialize the heap */
|
||||
if (heap == NULL)
|
||||
return(ENOMEM);
|
||||
rc = rf_ShutdownCreate(listp, rf_DDEventShutdown, NULL);
|
||||
if (rc) {
|
||||
RF_ERRORMSG3("RAIDFRAME: failed creating shutdown event file %s line %d rc=%d\n",
|
||||
__FILE__, __LINE__, rc);
|
||||
rf_FreeHeap(heap);
|
||||
return(rc);
|
||||
}
|
||||
rf_cur_time=(RF_TICS_t)0;
|
||||
return(0);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* DDEventRequest
|
||||
*
|
||||
* Put an event request into the event heap.
|
||||
*/
|
||||
void rf_DDEventRequest(
|
||||
RF_TICS_t eventTime,
|
||||
int (*CompleteFunc)(),
|
||||
void *argument,
|
||||
RF_Owner_t owner,
|
||||
RF_RowCol_t row,
|
||||
RF_RowCol_t col,
|
||||
RF_Raid_t *raidPtr,
|
||||
void *diskid)
|
||||
{
|
||||
RF_HeapData_t *hpdat;
|
||||
|
||||
RF_Malloc(hpdat,sizeof(RF_HeapData_t),(RF_HeapData_t *) );
|
||||
if (hpdat == NULL) {
|
||||
fprintf(stderr, "DDEventRequest: malloc failed\n");
|
||||
RF_PANIC();
|
||||
}
|
||||
|
||||
hpdat->eventTime = eventTime;
|
||||
hpdat->CompleteFunc = CompleteFunc;
|
||||
hpdat->argument = argument;
|
||||
hpdat->owner = owner;
|
||||
hpdat->row = row;
|
||||
hpdat->col = col;
|
||||
hpdat->raidPtr = raidPtr;
|
||||
hpdat->diskid = diskid;
|
||||
rf_AddHeap(heap, hpdat, (hpdat->eventTime));
|
||||
}
|
||||
|
||||
void rf_DAGEventRequest(
|
||||
RF_TICS_t eventTime,
|
||||
RF_Owner_t owner,
|
||||
RF_RowCol_t row,
|
||||
RF_RowCol_t col,
|
||||
RF_RaidAccessDesc_t *desc,
|
||||
RF_Raid_t *raidPtr)
|
||||
{
|
||||
RF_HeapData_t *hpdat;
|
||||
|
||||
RF_Malloc(hpdat,sizeof(RF_HeapData_t),(RF_HeapData_t *) );
|
||||
if (hpdat == NULL) {
|
||||
fprintf(stderr, "DDEventRequest: malloc failed\n");
|
||||
RF_PANIC();
|
||||
}
|
||||
|
||||
hpdat->eventTime = eventTime;
|
||||
hpdat->CompleteFunc = NULL;
|
||||
hpdat->argument = NULL;
|
||||
hpdat->owner = owner;
|
||||
hpdat->row = row;
|
||||
hpdat->col = col;
|
||||
hpdat->desc=desc;
|
||||
hpdat->raidPtr = raidPtr;
|
||||
|
||||
rf_AddHeap(heap, hpdat, (hpdat->eventTime));
|
||||
}
|
||||
|
||||
|
||||
/* ------------------------------------------------------------------------ */
|
||||
/* @SUBTITLE "Print out the request queue" */
|
||||
/* There is only 1 request queue so no argument is needed for this
|
||||
function */
|
||||
void rf_DDPrintRequests()
|
||||
{
|
||||
RF_HeapData_t *Hpdat;
|
||||
RF_HeapKey_t Hpkey;
|
||||
RF_Heap_t tempHp;
|
||||
|
||||
printf("Events on heap:\n");
|
||||
|
||||
tempHp = rf_InitHeap(RF_HEAP_MAX);
|
||||
while (rf_RemHeap(heap, &Hpdat, &Hpkey) != RF_HEAP_NONE)
|
||||
{
|
||||
printf ("at %5g HpKey there is: something for owner %d at disk %d %d\n",Hpkey,
|
||||
Hpdat->owner,Hpdat->row,Hpdat->col);
|
||||
rf_AddHeap(tempHp, Hpdat, Hpdat->eventTime);
|
||||
}
|
||||
|
||||
printf("END heap:\n");
|
||||
rf_FreeHeap(heap); /* free the empty old heap */
|
||||
|
||||
heap = tempHp; /* restore the recycled heap */
|
||||
}
|
||||
/* ------------------------------------------------------------------------ */
|
||||
|
||||
int rf_ProcessEvent()
|
||||
{
|
||||
RF_HeapData_t *Hpdat;
|
||||
RF_HeapKey_t Hpkey;
|
||||
int retcode;
|
||||
|
||||
retcode = rf_RemHeap(heap, &Hpdat, &Hpkey);
|
||||
|
||||
if (retcode==RF_HEAP_FOUND) {
|
||||
if (rf_eventDebug) {
|
||||
rf_DDPrintRequests();
|
||||
printf ("Now processing: at %5g something for owner %d at disk %d %d\n",
|
||||
Hpkey, Hpdat->owner, Hpdat->row, Hpdat->col);
|
||||
}
|
||||
rf_cur_time=Hpkey;
|
||||
|
||||
rf_SetCurrentOwner(Hpdat->owner);
|
||||
|
||||
if (Hpdat->row>=0) {/* ongoing dag event */
|
||||
rf_SetDiskIdle (Hpdat->raidPtr, Hpdat->row, Hpdat->col);
|
||||
if (Hpdat->diskid != NULL) {
|
||||
rf_simulator_complete_io(Hpdat->diskid);
|
||||
}
|
||||
retcode=(Hpdat->CompleteFunc)(Hpdat->argument,0);
|
||||
if (retcode==RF_HEAP_FOUND)
|
||||
(((RF_DagNode_t *) (Hpdat->argument))->dagHdr->cbFunc)(((RF_DagNode_t *) (Hpdat->argument))->dagHdr->cbArg);
|
||||
RF_Free(Hpdat,sizeof(RF_HeapData_t));
|
||||
return(retcode);
|
||||
}
|
||||
else {
|
||||
/* this is a dag event or reconstruction event */
|
||||
if (Hpdat->row==RF_DD_DAGEVENT_ROW){ /* dag event */
|
||||
rf_ContinueRaidAccess(Hpdat->desc);
|
||||
retcode = RF_FALSE;
|
||||
RF_Free(Hpdat,sizeof(RF_HeapData_t));
|
||||
return (RF_FALSE);
|
||||
}
|
||||
else {
|
||||
/* recon event */
|
||||
retcode=(Hpdat->CompleteFunc)(Hpdat->argument,0);
|
||||
retcode = RF_FALSE;
|
||||
RF_Free(Hpdat,sizeof(RF_HeapData_t));
|
||||
return (RF_FALSE);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (rf_eventDebug)
|
||||
printf("HEAP is empty\n");
|
||||
return(RF_DD_NOTHING_THERE);
|
||||
}
|
||||
|
||||
RF_Owner_t rf_GetCurrentOwner()
|
||||
{
|
||||
return(cur_owner);
|
||||
}
|
||||
|
||||
void rf_SetCurrentOwner(RF_Owner_t owner)
|
||||
{
|
||||
cur_owner=owner;
|
||||
}
|
||||
|
||||
RF_TICS_t rf_CurTime()
|
||||
{
|
||||
return(rf_cur_time);
|
||||
}
|
|
@ -0,0 +1,96 @@
|
|||
/* $NetBSD: rf_diskevent.h,v 1.1 1998/11/13 04:20:28 oster Exp $ */
|
||||
/*
|
||||
* rf_diskevent.h
|
||||
* Adapted from original code by David Kotz (1994)
|
||||
*
|
||||
* The disk-device module is event driven. This module keeps the event
|
||||
* request mechanism, which is based on proteus SimRequests,
|
||||
* abstracted away from the bulk of the disk device code.
|
||||
*
|
||||
* Functions
|
||||
* DDEventInit
|
||||
* DDEventRequest
|
||||
* DDEventPrint
|
||||
* DDEventCancel
|
||||
*/
|
||||
|
||||
/* :
|
||||
* Log: rf_diskevent.h,v
|
||||
* Revision 1.10 1996/06/10 11:55:47 jimz
|
||||
* Straightened out some per-array/not-per-array distinctions, fixed
|
||||
* a couple bugs related to confusion. Added shutdown lists. Removed
|
||||
* layout shutdown function (now subsumed by shutdown lists).
|
||||
*
|
||||
* Revision 1.9 1996/06/07 21:33:04 jimz
|
||||
* begin using consistent types for sector numbers,
|
||||
* stripe numbers, row+col numbers, recon unit numbers
|
||||
*
|
||||
* Revision 1.8 1996/06/02 17:31:48 jimz
|
||||
* Moved a lot of global stuff into array structure, where it belongs.
|
||||
* Fixed up paritylogging, pss modules in this manner. Some general
|
||||
* code cleanup. Removed lots of dead code, some dead files.
|
||||
*
|
||||
* Revision 1.7 1996/05/30 11:29:41 jimz
|
||||
* Numerous bug fixes. Stripe lock release code disagreed with the taking code
|
||||
* about when stripes should be locked (I made it consistent: no parity, no lock)
|
||||
* There was a lot of extra serialization of I/Os which I've removed- a lot of
|
||||
* it was to calculate values for the cache code, which is no longer with us.
|
||||
* More types, function, macro cleanup. Added code to properly quiesce the array
|
||||
* on shutdown. Made a lot of stuff array-specific which was (bogusly) general
|
||||
* before. Fixed memory allocation, freeing bugs.
|
||||
*
|
||||
* Revision 1.6 1996/05/27 18:56:37 jimz
|
||||
* more code cleanup
|
||||
* better typing
|
||||
* compiles in all 3 environments
|
||||
*
|
||||
* Revision 1.5 1996/05/18 19:51:34 jimz
|
||||
* major code cleanup- fix syntax, make some types consistent,
|
||||
* add prototypes, clean out dead code, et cetera
|
||||
*
|
||||
* Revision 1.4 1995/12/01 15:57:16 root
|
||||
* added copyright info
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _RF__RF_DISKEVENT_H_
|
||||
#define _RF__RF_DISKEVENT_H_
|
||||
|
||||
#include "rf_types.h"
|
||||
#include "rf_heap.h"
|
||||
#ifndef __NetBSD__
|
||||
#include "time.h"
|
||||
#endif
|
||||
|
||||
#define RF_DD_NOTHING_THERE (-1)
|
||||
#define RF_DD_DAGEVENT_ROW (-3)
|
||||
#define RF_DD_DAGEVENT_COL RF_DD_DAGEVENT_ROW
|
||||
|
||||
extern RF_TICS_t rf_cur_time;
|
||||
|
||||
/*
|
||||
* list of disk-device request types,
|
||||
* initialized in diskdevice.c,
|
||||
* used in diskevent.c
|
||||
*/
|
||||
typedef void (*RF_DDhandler)(int disk, RF_TICS_t eventTime);
|
||||
struct RF_dd_handlers_s {
|
||||
RF_DDhandler handler; /* function implementing this event type */
|
||||
char name[20]; /* name of that event type */
|
||||
};
|
||||
extern struct RF_dd_handlers_s rf_DDhandlers[];
|
||||
|
||||
int rf_DDEventInit(RF_ShutdownList_t **listp);
|
||||
void rf_DDEventRequest(RF_TICS_t eventTime, int (*CompleteFunc)(),
|
||||
void *argument, RF_Owner_t owner, RF_RowCol_t row, RF_RowCol_t col,
|
||||
RF_Raid_t *raidPtr, void *diskid);
|
||||
void rf_DAGEventRequest(RF_TICS_t eventTime, RF_Owner_t owner,
|
||||
RF_RowCol_t row, RF_RowCol_t col, RF_RaidAccessDesc_t *desc,
|
||||
RF_Raid_t *raidPtr);
|
||||
void rf_DDPrintRequests(void);
|
||||
int rf_ProcessEvent(void);
|
||||
RF_Owner_t rf_GetCurrentOwner(void);
|
||||
void rf_SetCurrentOwner(RF_Owner_t owner);
|
||||
RF_TICS_t rf_CurTime(void);
|
||||
|
||||
#endif /* !_RF__RF_DISKEVENT_H_ */
|
|
@ -0,0 +1,924 @@
|
|||
/* $NetBSD: rf_diskqueue.c,v 1.1 1998/11/13 04:20:29 oster Exp $ */
|
||||
/*
|
||||
* Copyright (c) 1995 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Author: Mark Holland
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and
|
||||
* its documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
||||
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*/
|
||||
|
||||
/****************************************************************************************
|
||||
*
|
||||
* rf_diskqueue.c -- higher-level disk queue code
|
||||
*
|
||||
* the routines here are a generic wrapper around the actual queueing
|
||||
* routines. The code here implements thread scheduling, synchronization,
|
||||
* and locking ops (see below) on top of the lower-level queueing code.
|
||||
*
|
||||
* to support atomic RMW, we implement "locking operations". When a locking op
|
||||
* is dispatched to the lower levels of the driver, the queue is locked, and no further
|
||||
* I/Os are dispatched until the queue receives & completes a corresponding "unlocking
|
||||
* operation". This code relies on the higher layers to guarantee that a locking
|
||||
* op will always be eventually followed by an unlocking op. The model is that
|
||||
* the higher layers are structured so locking and unlocking ops occur in pairs, i.e.
|
||||
* an unlocking op cannot be generated until after a locking op reports completion.
|
||||
* There is no good way to check to see that an unlocking op "corresponds" to the
|
||||
* op that currently has the queue locked, so we make no such attempt. Since by
|
||||
* definition there can be only one locking op outstanding on a disk, this should
|
||||
* not be a problem.
|
||||
*
|
||||
* In the kernel, we allow multiple I/Os to be concurrently dispatched to the disk
|
||||
* driver. In order to support locking ops in this environment, when we decide to
|
||||
* do a locking op, we stop dispatching new I/Os and wait until all dispatched I/Os
|
||||
* have completed before dispatching the locking op.
|
||||
*
|
||||
* Unfortunately, the code is different in the 3 different operating states
|
||||
* (user level, kernel, simulator). In the kernel, I/O is non-blocking, and
|
||||
* we have no disk threads to dispatch for us. Therefore, we have to dispatch
|
||||
* new I/Os to the scsi driver at the time of enqueue, and also at the time
|
||||
* of completion. At user level, I/O is blocking, and so only the disk threads
|
||||
* may dispatch I/Os. Thus at user level, all we can do at enqueue time is
|
||||
* enqueue and wake up the disk thread to do the dispatch.
|
||||
*
|
||||
***************************************************************************************/
|
||||
|
||||
/*
|
||||
* :
|
||||
*
|
||||
* Log: rf_diskqueue.c,v
|
||||
* Revision 1.50 1996/08/07 21:08:38 jimz
|
||||
* b_proc -> kb_proc
|
||||
*
|
||||
* Revision 1.49 1996/07/05 20:36:14 jimz
|
||||
* make rf_ConfigureDiskQueueSystem return 0
|
||||
*
|
||||
* Revision 1.48 1996/06/18 20:53:11 jimz
|
||||
* fix up disk queueing (remove configure routine,
|
||||
* add shutdown list arg to create routines)
|
||||
*
|
||||
* Revision 1.47 1996/06/14 14:16:36 jimz
|
||||
* fix handling of bogus queue type
|
||||
*
|
||||
* Revision 1.46 1996/06/13 20:41:44 jimz
|
||||
* add scan, cscan, random queueing
|
||||
*
|
||||
* Revision 1.45 1996/06/11 01:27:50 jimz
|
||||
* Fixed bug where diskthread shutdown would crash or hang. This
|
||||
* turned out to be two distinct bugs:
|
||||
* (1) [crash] The thread shutdown code wasn't properly waiting for
|
||||
* all the diskthreads to complete. This caused diskthreads that were
|
||||
* exiting+cleaning up to unlock a destroyed mutex.
|
||||
* (2) [hang] TerminateDiskQueues wasn't locking, and DiskIODequeue
|
||||
* only checked for termination _after_ a wakeup if the queues were
|
||||
* empty. This was a race where the termination wakeup could be lost
|
||||
* by the dequeueing thread, and the system would hang waiting for the
|
||||
* thread to exit, while the thread waited for an I/O or a signal to
|
||||
* check the termination flag.
|
||||
*
|
||||
* Revision 1.44 1996/06/10 11:55:47 jimz
|
||||
* Straightened out some per-array/not-per-array distinctions, fixed
|
||||
* a couple bugs related to confusion. Added shutdown lists. Removed
|
||||
* layout shutdown function (now subsumed by shutdown lists).
|
||||
*
|
||||
* Revision 1.43 1996/06/09 02:36:46 jimz
|
||||
* lots of little crufty cleanup- fixup whitespace
|
||||
* issues, comment #ifdefs, improve typing in some
|
||||
* places (esp size-related)
|
||||
*
|
||||
* Revision 1.42 1996/06/07 22:26:27 jimz
|
||||
* type-ify which_ru (RF_ReconUnitNum_t)
|
||||
*
|
||||
* Revision 1.41 1996/06/07 21:33:04 jimz
|
||||
* begin using consistent types for sector numbers,
|
||||
* stripe numbers, row+col numbers, recon unit numbers
|
||||
*
|
||||
* Revision 1.40 1996/06/06 17:28:04 jimz
|
||||
* track sector number of last I/O dequeued
|
||||
*
|
||||
* Revision 1.39 1996/06/06 01:14:13 jimz
|
||||
* fix crashing bug when tracerec is NULL (ie, from copyback)
|
||||
* initialize req->queue
|
||||
*
|
||||
* Revision 1.38 1996/06/05 19:38:32 jimz
|
||||
* fixed up disk queueing types config
|
||||
* added sstf disk queueing
|
||||
* fixed exit bug on diskthreads (ref-ing bad mem)
|
||||
*
|
||||
* Revision 1.37 1996/06/05 18:06:02 jimz
|
||||
* Major code cleanup. The Great Renaming is now done.
|
||||
* Better modularity. Better typing. Fixed a bunch of
|
||||
* synchronization bugs. Made a lot of global stuff
|
||||
* per-desc or per-array. Removed dead code.
|
||||
*
|
||||
* Revision 1.36 1996/05/30 23:22:16 jimz
|
||||
* bugfixes of serialization, timing problems
|
||||
* more cleanup
|
||||
*
|
||||
* Revision 1.35 1996/05/30 12:59:18 jimz
|
||||
* make etimer happier, more portable
|
||||
*
|
||||
* Revision 1.34 1996/05/30 11:29:41 jimz
|
||||
* Numerous bug fixes. Stripe lock release code disagreed with the taking code
|
||||
* about when stripes should be locked (I made it consistent: no parity, no lock)
|
||||
* There was a lot of extra serialization of I/Os which I've removed- a lot of
|
||||
* it was to calculate values for the cache code, which is no longer with us.
|
||||
* More types, function, macro cleanup. Added code to properly quiesce the array
|
||||
* on shutdown. Made a lot of stuff array-specific which was (bogusly) general
|
||||
* before. Fixed memory allocation, freeing bugs.
|
||||
*
|
||||
* Revision 1.33 1996/05/27 18:56:37 jimz
|
||||
* more code cleanup
|
||||
* better typing
|
||||
* compiles in all 3 environments
|
||||
*
|
||||
* Revision 1.32 1996/05/24 22:17:04 jimz
|
||||
* continue code + namespace cleanup
|
||||
* typed a bunch of flags
|
||||
*
|
||||
* Revision 1.31 1996/05/24 01:59:45 jimz
|
||||
* another checkpoint in code cleanup for release
|
||||
* time to sync kernel tree
|
||||
*
|
||||
* Revision 1.30 1996/05/23 21:46:35 jimz
|
||||
* checkpoint in code cleanup (release prep)
|
||||
* lots of types, function names have been fixed
|
||||
*
|
||||
* Revision 1.29 1996/05/23 00:33:23 jimz
|
||||
* code cleanup: move all debug decls to rf_options.c, all extern
|
||||
* debug decls to rf_options.h, all debug vars preceded by rf_
|
||||
*
|
||||
* Revision 1.28 1996/05/20 16:14:29 jimz
|
||||
* switch to rf_{mutex,cond}_{init,destroy}
|
||||
*
|
||||
* Revision 1.27 1996/05/18 19:51:34 jimz
|
||||
* major code cleanup- fix syntax, make some types consistent,
|
||||
* add prototypes, clean out dead code, et cetera
|
||||
*
|
||||
* Revision 1.26 1996/05/16 19:21:49 wvcii
|
||||
* fixed typo in init_dqd
|
||||
*
|
||||
* Revision 1.25 1996/05/16 16:02:51 jimz
|
||||
* switch to RF_FREELIST stuff for DiskQueueData
|
||||
*
|
||||
* Revision 1.24 1996/05/10 16:24:14 jimz
|
||||
* new cvscan function names
|
||||
*
|
||||
* Revision 1.23 1996/05/01 16:27:54 jimz
|
||||
* don't use ccmn bp management
|
||||
*
|
||||
* Revision 1.22 1995/12/12 18:10:06 jimz
|
||||
* MIN -> RF_MIN, MAX -> RF_MAX, ASSERT -> RF_ASSERT
|
||||
* fix 80-column brain damage in comments
|
||||
*
|
||||
* Revision 1.21 1995/12/01 15:59:59 root
|
||||
* added copyright info
|
||||
*
|
||||
* Revision 1.20 1995/11/07 16:27:20 wvcii
|
||||
* added Peek() function to diskqueuesw
|
||||
* non-locking accesses are never blocked (assume clients enforce proper
|
||||
* respect for lock acquisition)
|
||||
*
|
||||
* Revision 1.19 1995/10/05 18:56:52 jimz
|
||||
* fix req handling in IOComplete
|
||||
*
|
||||
* Revision 1.18 1995/10/04 20:13:50 wvcii
|
||||
* added asserts to monitor numOutstanding queueLength
|
||||
*
|
||||
* Revision 1.17 1995/10/04 07:43:52 wvcii
|
||||
* queue->numOutstanding now valid for user & sim
|
||||
* added queue->queueLength
|
||||
* user tested & verified, sim untested
|
||||
*
|
||||
* Revision 1.16 1995/09/12 00:21:19 wvcii
|
||||
* added support for tracing disk queue time
|
||||
*
|
||||
*/
|
||||
|
||||
#include "rf_types.h"
|
||||
#include "rf_threadstuff.h"
|
||||
#include "rf_threadid.h"
|
||||
#include "rf_raid.h"
|
||||
#include "rf_diskqueue.h"
|
||||
#include "rf_alloclist.h"
|
||||
#include "rf_acctrace.h"
|
||||
#include "rf_etimer.h"
|
||||
#include "rf_configure.h"
|
||||
#include "rf_general.h"
|
||||
#include "rf_freelist.h"
|
||||
#include "rf_debugprint.h"
|
||||
#include "rf_shutdown.h"
|
||||
#include "rf_cvscan.h"
|
||||
#include "rf_sstf.h"
|
||||
#include "rf_fifo.h"
|
||||
|
||||
#ifdef SIMULATE
|
||||
#include "rf_diskevent.h"
|
||||
#endif /* SIMULATE */
|
||||
|
||||
#if !defined(__NetBSD__)
|
||||
extern struct buf *ubc_bufget();
|
||||
#endif
|
||||
|
||||
static int init_dqd(RF_DiskQueueData_t *);
|
||||
static void clean_dqd(RF_DiskQueueData_t *);
|
||||
static void rf_ShutdownDiskQueueSystem(void *);
|
||||
/* From rf_kintf.c */
|
||||
int rf_DispatchKernelIO(RF_DiskQueue_t *,RF_DiskQueueData_t *);
|
||||
|
||||
|
||||
#define Dprintf1(s,a) if (rf_queueDebug) rf_debug_printf(s,(void *)((unsigned long)a),NULL,NULL,NULL,NULL,NULL,NULL,NULL)
|
||||
#define Dprintf2(s,a,b) if (rf_queueDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),NULL,NULL,NULL,NULL,NULL,NULL)
|
||||
#define Dprintf3(s,a,b,c) if (rf_queueDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),NULL,NULL,NULL,NULL,NULL)
|
||||
#define Dprintf4(s,a,b,c,d) if (rf_queueDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),(void *)((unsigned long)d),NULL,NULL,NULL,NULL)
|
||||
#define Dprintf5(s,a,b,c,d,e) if (rf_queueDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),(void *)((unsigned long)d),(void *)((unsigned long)e),NULL,NULL,NULL)
|
||||
|
||||
#if !defined(KERNEL) && !defined(SIMULATE)
|
||||
|
||||
/* queue must be locked before invoking this */
|
||||
#define SIGNAL_DISK_QUEUE(_q_,_wh_) \
|
||||
{ \
|
||||
if ( (_q_)->numWaiting > 0) { \
|
||||
(_q_)->numWaiting--; \
|
||||
RF_SIGNAL_COND( ((_q_)->cond) ); \
|
||||
} \
|
||||
}
|
||||
|
||||
/* queue must be locked before invoking this */
|
||||
#define WAIT_DISK_QUEUE(_q_,_wh_) \
|
||||
{ \
|
||||
(_q_)->numWaiting++; \
|
||||
RF_WAIT_COND( ((_q_)->cond), ((_q_)->mutex) ); \
|
||||
}
|
||||
|
||||
#else /* !defined(KERNEL) && !defined(SIMULATE) */
|
||||
|
||||
#define SIGNAL_DISK_QUEUE(_q_,_wh_)
|
||||
#define WAIT_DISK_QUEUE(_q_,_wh_)
|
||||
|
||||
#endif /* !defined(KERNEL) && !defined(SIMULATE) */
|
||||
|
||||
/*****************************************************************************************
|
||||
*
|
||||
* the disk queue switch defines all the functions used in the different queueing
|
||||
* disciplines
|
||||
* queue ID, init routine, enqueue routine, dequeue routine
|
||||
*
|
||||
****************************************************************************************/
|
||||
|
||||
static RF_DiskQueueSW_t diskqueuesw[] = {
|
||||
{"fifo", /* FIFO */
|
||||
rf_FifoCreate,
|
||||
rf_FifoEnqueue,
|
||||
rf_FifoDequeue,
|
||||
rf_FifoPeek,
|
||||
rf_FifoPromote},
|
||||
|
||||
{"cvscan", /* cvscan */
|
||||
rf_CvscanCreate,
|
||||
rf_CvscanEnqueue,
|
||||
rf_CvscanDequeue,
|
||||
rf_CvscanPeek,
|
||||
rf_CvscanPromote },
|
||||
|
||||
{"sstf", /* shortest seek time first */
|
||||
rf_SstfCreate,
|
||||
rf_SstfEnqueue,
|
||||
rf_SstfDequeue,
|
||||
rf_SstfPeek,
|
||||
rf_SstfPromote},
|
||||
|
||||
{"scan", /* SCAN (two-way elevator) */
|
||||
rf_ScanCreate,
|
||||
rf_SstfEnqueue,
|
||||
rf_ScanDequeue,
|
||||
rf_ScanPeek,
|
||||
rf_SstfPromote},
|
||||
|
||||
{"cscan", /* CSCAN (one-way elevator) */
|
||||
rf_CscanCreate,
|
||||
rf_SstfEnqueue,
|
||||
rf_CscanDequeue,
|
||||
rf_CscanPeek,
|
||||
rf_SstfPromote},
|
||||
|
||||
#if !defined(KERNEL) && RF_INCLUDE_QUEUE_RANDOM > 0
|
||||
/* to make a point to Chris :-> */
|
||||
{"random", /* random */
|
||||
rf_FifoCreate,
|
||||
rf_FifoEnqueue,
|
||||
rf_RandomDequeue,
|
||||
rf_RandomPeek,
|
||||
rf_FifoPromote},
|
||||
#endif /* !KERNEL && RF_INCLUDE_QUEUE_RANDOM > 0 */
|
||||
};
|
||||
#define NUM_DISK_QUEUE_TYPES (sizeof(diskqueuesw)/sizeof(RF_DiskQueueSW_t))
|
||||
|
||||
static RF_FreeList_t *rf_dqd_freelist;
|
||||
|
||||
#define RF_MAX_FREE_DQD 256
|
||||
#define RF_DQD_INC 16
|
||||
#define RF_DQD_INITIAL 64
|
||||
|
||||
#ifdef __NetBSD__
|
||||
#ifdef _KERNEL
|
||||
#include <sys/buf.h>
|
||||
#endif
|
||||
#endif
|
||||
|
||||
static int init_dqd(dqd)
|
||||
RF_DiskQueueData_t *dqd;
|
||||
{
|
||||
#ifdef KERNEL
|
||||
#ifdef __NetBSD__
|
||||
/* XXX not sure if the following malloc is appropriate... probably not quite... */
|
||||
dqd->bp = (struct buf *) malloc( sizeof(struct buf), M_DEVBUF, M_NOWAIT);
|
||||
memset(dqd->bp,0,sizeof(struct buf)); /* if you don't do it, nobody else will.. */
|
||||
/* XXX */
|
||||
/* printf("NEED TO IMPLEMENT THIS BETTER!\n"); */
|
||||
#else
|
||||
dqd->bp = ubc_bufget();
|
||||
#endif
|
||||
if (dqd->bp == NULL) {
|
||||
return(ENOMEM);
|
||||
}
|
||||
#endif /* KERNEL */
|
||||
return(0);
|
||||
}
|
||||
|
||||
static void clean_dqd(dqd)
|
||||
RF_DiskQueueData_t *dqd;
|
||||
{
|
||||
#ifdef KERNEL
|
||||
#ifdef __NetBSD__
|
||||
/* printf("NEED TO IMPLEMENT THIS BETTER(2)!\n"); */
|
||||
/* XXX ? */
|
||||
free( dqd->bp, M_DEVBUF );
|
||||
#else
|
||||
ubc_buffree(dqd->bp);
|
||||
#endif
|
||||
|
||||
#endif /* KERNEL */
|
||||
}
|
||||
|
||||
/* configures a single disk queue */
|
||||
static int config_disk_queue(
|
||||
RF_Raid_t *raidPtr,
|
||||
RF_DiskQueue_t *diskqueue,
|
||||
RF_RowCol_t r, /* row & col -- debug only. BZZT not any more... */
|
||||
RF_RowCol_t c,
|
||||
RF_DiskQueueSW_t *p,
|
||||
RF_SectorCount_t sectPerDisk,
|
||||
dev_t dev,
|
||||
int maxOutstanding,
|
||||
RF_ShutdownList_t **listp,
|
||||
RF_AllocListElem_t *clList)
|
||||
{
|
||||
int rc;
|
||||
|
||||
diskqueue->row = r;
|
||||
diskqueue->col = c;
|
||||
diskqueue->qPtr = p;
|
||||
diskqueue->qHdr = (p->Create)(sectPerDisk, clList, listp);
|
||||
diskqueue->dev = dev;
|
||||
diskqueue->numOutstanding = 0;
|
||||
diskqueue->queueLength = 0;
|
||||
diskqueue->maxOutstanding = maxOutstanding;
|
||||
diskqueue->curPriority = RF_IO_NORMAL_PRIORITY;
|
||||
diskqueue->nextLockingOp = NULL;
|
||||
diskqueue->unlockingOp = NULL;
|
||||
diskqueue->numWaiting=0;
|
||||
diskqueue->flags = 0;
|
||||
diskqueue->raidPtr = raidPtr;
|
||||
#if defined(__NetBSD__) && defined(_KERNEL)
|
||||
diskqueue->rf_cinfo = &raidPtr->raid_cinfo[r][c];
|
||||
#endif
|
||||
rc = rf_create_managed_mutex(listp, &diskqueue->mutex);
|
||||
if (rc) {
|
||||
RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__,
|
||||
__LINE__, rc);
|
||||
return(rc);
|
||||
}
|
||||
rc = rf_create_managed_cond(listp, &diskqueue->cond);
|
||||
if (rc) {
|
||||
RF_ERRORMSG3("Unable to init cond file %s line %d rc=%d\n", __FILE__,
|
||||
__LINE__, rc);
|
||||
return(rc);
|
||||
}
|
||||
return(0);
|
||||
}
|
||||
|
||||
static void rf_ShutdownDiskQueueSystem(ignored)
|
||||
void *ignored;
|
||||
{
|
||||
RF_FREELIST_DESTROY_CLEAN(rf_dqd_freelist,next,(RF_DiskQueueData_t *),clean_dqd);
|
||||
}
|
||||
|
||||
int rf_ConfigureDiskQueueSystem(listp)
|
||||
RF_ShutdownList_t **listp;
|
||||
{
|
||||
int rc;
|
||||
|
||||
RF_FREELIST_CREATE(rf_dqd_freelist, RF_MAX_FREE_DQD,
|
||||
RF_DQD_INC, sizeof(RF_DiskQueueData_t));
|
||||
if (rf_dqd_freelist == NULL)
|
||||
return(ENOMEM);
|
||||
rc = rf_ShutdownCreate(listp, rf_ShutdownDiskQueueSystem, NULL);
|
||||
if (rc) {
|
||||
RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n",
|
||||
__FILE__, __LINE__, rc);
|
||||
rf_ShutdownDiskQueueSystem(NULL);
|
||||
return(rc);
|
||||
}
|
||||
RF_FREELIST_PRIME_INIT(rf_dqd_freelist, RF_DQD_INITIAL,next,
|
||||
(RF_DiskQueueData_t *),init_dqd);
|
||||
return(0);
|
||||
}
|
||||
|
||||
#ifndef KERNEL
|
||||
/* this is called prior to shutdown to wakeup everyone waiting on a disk queue
|
||||
* and tell them to exit
|
||||
*/
|
||||
void rf_TerminateDiskQueues(raidPtr)
|
||||
RF_Raid_t *raidPtr;
|
||||
{
|
||||
RF_RowCol_t r, c;
|
||||
|
||||
raidPtr->terminate_disk_queues = 1;
|
||||
for (r=0; r<raidPtr->numRow; r++) {
|
||||
for (c=0; c<raidPtr->numCol + ((r==0) ? raidPtr->numSpare : 0); c++) {
|
||||
RF_LOCK_QUEUE_MUTEX(&raidPtr->Queues[r][c], "TerminateDiskQueues");
|
||||
RF_BROADCAST_COND(raidPtr->Queues[r][c].cond);
|
||||
RF_UNLOCK_QUEUE_MUTEX(&raidPtr->Queues[r][c], "TerminateDiskQueues");
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif /* !KERNEL */
|
||||
|
||||
int rf_ConfigureDiskQueues(
|
||||
RF_ShutdownList_t **listp,
|
||||
RF_Raid_t *raidPtr,
|
||||
RF_Config_t *cfgPtr)
|
||||
{
|
||||
RF_DiskQueue_t **diskQueues, *spareQueues;
|
||||
RF_DiskQueueSW_t *p;
|
||||
RF_RowCol_t r, c;
|
||||
int rc, i;
|
||||
|
||||
raidPtr->maxQueueDepth = cfgPtr->maxOutstandingDiskReqs;
|
||||
|
||||
for(p=NULL,i=0;i<NUM_DISK_QUEUE_TYPES;i++) {
|
||||
if (!strcmp(diskqueuesw[i].queueType, cfgPtr->diskQueueType)) {
|
||||
p = &diskqueuesw[i];
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (p == NULL) {
|
||||
RF_ERRORMSG2("Unknown queue type \"%s\". Using %s\n",cfgPtr->diskQueueType, diskqueuesw[0].queueType);
|
||||
p = &diskqueuesw[0];
|
||||
}
|
||||
|
||||
RF_CallocAndAdd(diskQueues, raidPtr->numRow, sizeof(RF_DiskQueue_t *), (RF_DiskQueue_t **), raidPtr->cleanupList);
|
||||
if (diskQueues == NULL) {
|
||||
return(ENOMEM);
|
||||
}
|
||||
raidPtr->Queues = diskQueues;
|
||||
for (r=0; r<raidPtr->numRow; r++) {
|
||||
RF_CallocAndAdd(diskQueues[r], raidPtr->numCol + ((r==0) ? raidPtr->numSpare : 0), sizeof(RF_DiskQueue_t), (RF_DiskQueue_t *), raidPtr->cleanupList);
|
||||
if (diskQueues[r] == NULL)
|
||||
return(ENOMEM);
|
||||
for (c=0; c<raidPtr->numCol; c++) {
|
||||
rc = config_disk_queue(raidPtr, &diskQueues[r][c], r, c, p,
|
||||
raidPtr->sectorsPerDisk, raidPtr->Disks[r][c].dev,
|
||||
cfgPtr->maxOutstandingDiskReqs, listp, raidPtr->cleanupList);
|
||||
if (rc)
|
||||
return(rc);
|
||||
}
|
||||
}
|
||||
|
||||
spareQueues = &raidPtr->Queues[0][raidPtr->numCol];
|
||||
for (r=0; r<raidPtr->numSpare; r++) {
|
||||
rc = config_disk_queue(raidPtr, &spareQueues[r],
|
||||
0, raidPtr->numCol+r, p,
|
||||
raidPtr->sectorsPerDisk,
|
||||
raidPtr->Disks[0][raidPtr->numCol+r].dev,
|
||||
cfgPtr->maxOutstandingDiskReqs, listp,
|
||||
raidPtr->cleanupList);
|
||||
if (rc)
|
||||
return(rc);
|
||||
}
|
||||
return(0);
|
||||
}
|
||||
|
||||
/* Enqueue a disk I/O
|
||||
*
|
||||
* Unfortunately, we have to do things differently in the different
|
||||
* environments (simulator, user-level, kernel).
|
||||
* At user level, all I/O is blocking, so we have 1 or more threads/disk
|
||||
* and the thread that enqueues is different from the thread that dequeues.
|
||||
* In the kernel, I/O is non-blocking and so we'd like to have multiple
|
||||
* I/Os outstanding on the physical disks when possible.
|
||||
*
|
||||
* when any request arrives at a queue, we have two choices:
|
||||
* dispatch it to the lower levels
|
||||
* queue it up
|
||||
*
|
||||
* kernel rules for when to do what:
|
||||
* locking request: queue empty => dispatch and lock queue,
|
||||
* else queue it
|
||||
* unlocking req : always dispatch it
|
||||
* normal req : queue empty => dispatch it & set priority
|
||||
* queue not full & priority is ok => dispatch it
|
||||
* else queue it
|
||||
*
|
||||
* user-level rules:
|
||||
* always enqueue. In the special case of an unlocking op, enqueue
|
||||
* in a special way that will cause the unlocking op to be the next
|
||||
* thing dequeued.
|
||||
*
|
||||
* simulator rules:
|
||||
* Do the same as at user level, with the sleeps and wakeups suppressed.
|
||||
*/
|
||||
void rf_DiskIOEnqueue(queue, req, pri)
|
||||
RF_DiskQueue_t *queue;
|
||||
RF_DiskQueueData_t *req;
|
||||
int pri;
|
||||
{
|
||||
int tid;
|
||||
|
||||
RF_ETIMER_START(req->qtime);
|
||||
rf_get_threadid(tid);
|
||||
RF_ASSERT(req->type == RF_IO_TYPE_NOP || req->numSector);
|
||||
req->priority = pri;
|
||||
|
||||
if (rf_queueDebug && (req->numSector == 0)) {
|
||||
printf("Warning: Enqueueing zero-sector access\n");
|
||||
}
|
||||
|
||||
#ifdef KERNEL
|
||||
/*
|
||||
* kernel
|
||||
*/
|
||||
RF_LOCK_QUEUE_MUTEX( queue, "DiskIOEnqueue" );
|
||||
/* locking request */
|
||||
if (RF_LOCKING_REQ(req)) {
|
||||
if (RF_QUEUE_EMPTY(queue)) {
|
||||
Dprintf3("Dispatching pri %d locking op to r %d c %d (queue empty)\n",pri,queue->row, queue->col);
|
||||
RF_LOCK_QUEUE(queue);
|
||||
rf_DispatchKernelIO(queue, req);
|
||||
} else {
|
||||
queue->queueLength++; /* increment count of number of requests waiting in this queue */
|
||||
Dprintf3("Enqueueing pri %d locking op to r %d c %d (queue not empty)\n",pri,queue->row, queue->col);
|
||||
req->queue = (void *)queue;
|
||||
(queue->qPtr->Enqueue)(queue->qHdr, req, pri);
|
||||
}
|
||||
}
|
||||
/* unlocking request */
|
||||
else if (RF_UNLOCKING_REQ(req)) { /* we'll do the actual unlock when this I/O completes */
|
||||
Dprintf3("Dispatching pri %d unlocking op to r %d c %d\n",pri,queue->row, queue->col);
|
||||
RF_ASSERT(RF_QUEUE_LOCKED(queue));
|
||||
rf_DispatchKernelIO(queue, req);
|
||||
}
|
||||
/* normal request */
|
||||
else if (RF_OK_TO_DISPATCH(queue, req)) {
|
||||
Dprintf3("Dispatching pri %d regular op to r %d c %d (ok to dispatch)\n",pri,queue->row, queue->col);
|
||||
rf_DispatchKernelIO(queue, req);
|
||||
} else {
|
||||
queue->queueLength++; /* increment count of number of requests waiting in this queue */
|
||||
Dprintf3("Enqueueing pri %d regular op to r %d c %d (not ok to dispatch)\n",pri,queue->row, queue->col);
|
||||
req->queue = (void *)queue;
|
||||
(queue->qPtr->Enqueue)(queue->qHdr, req, pri);
|
||||
}
|
||||
RF_UNLOCK_QUEUE_MUTEX( queue, "DiskIOEnqueue" );
|
||||
|
||||
#else /* KERNEL */
|
||||
/*
|
||||
* user-level
|
||||
*/
|
||||
RF_LOCK_QUEUE_MUTEX( queue, "DiskIOEnqueue" );
|
||||
queue->queueLength++; /* increment count of number of requests waiting in this queue */
|
||||
/* unlocking request */
|
||||
if (RF_UNLOCKING_REQ(req)) {
|
||||
Dprintf4("[%d] enqueueing pri %d unlocking op & signalling r %d c %d\n", tid, pri, queue->row, queue->col);
|
||||
RF_ASSERT(RF_QUEUE_LOCKED(queue) && queue->unlockingOp == NULL);
|
||||
queue->unlockingOp = req;
|
||||
}
|
||||
/* locking and normal requests */
|
||||
else {
|
||||
req->queue = (void *)queue;
|
||||
Dprintf5("[%d] enqueueing pri %d %s op & signalling r %d c %d\n", tid, pri,
|
||||
(RF_LOCKING_REQ(req)) ? "locking" : "regular",queue->row,queue->col);
|
||||
(queue->qPtr->Enqueue)(queue->qHdr, req, pri);
|
||||
}
|
||||
SIGNAL_DISK_QUEUE( queue, "DiskIOEnqueue");
|
||||
RF_UNLOCK_QUEUE_MUTEX( queue, "DiskIOEnqueue" );
|
||||
#endif /* KERNEL */
|
||||
}
|
||||
|
||||
#if !defined(KERNEL) && !defined(SIMULATE)
|
||||
/* user-level only: tell all threads to wake up & recheck the queue */
|
||||
void rf_BroadcastOnQueue(queue)
|
||||
RF_DiskQueue_t *queue;
|
||||
{
|
||||
int i;
|
||||
|
||||
if (queue->maxOutstanding > 1) for (i=0; i<queue->maxOutstanding; i++) {
|
||||
SIGNAL_DISK_QUEUE(queue, "BroadcastOnQueue" );
|
||||
}
|
||||
}
|
||||
#endif /* !KERNEL && !SIMULATE */
|
||||
|
||||
#ifndef KERNEL /* not used in kernel */
|
||||
|
||||
RF_DiskQueueData_t *rf_DiskIODequeue(queue)
|
||||
RF_DiskQueue_t *queue;
|
||||
{
|
||||
RF_DiskQueueData_t *p, *headItem;
|
||||
int tid;
|
||||
|
||||
rf_get_threadid(tid);
|
||||
RF_LOCK_QUEUE_MUTEX( queue, "DiskIODequeue" );
|
||||
for (p=NULL; !p; ) {
|
||||
if (queue->unlockingOp) {
|
||||
/* unlocking request */
|
||||
RF_ASSERT(RF_QUEUE_LOCKED(queue));
|
||||
p = queue->unlockingOp;
|
||||
queue->unlockingOp = NULL;
|
||||
Dprintf4("[%d] dequeueing pri %d unlocking op r %d c %d\n", tid, p->priority, queue->row,queue->col);
|
||||
}
|
||||
else {
|
||||
headItem = (queue->qPtr->Peek)(queue->qHdr);
|
||||
if (headItem) {
|
||||
if (RF_LOCKING_REQ(headItem)) {
|
||||
/* locking request */
|
||||
if (!RF_QUEUE_LOCKED(queue)) {
|
||||
/* queue isn't locked, so dequeue the request & lock the queue */
|
||||
p = (queue->qPtr->Dequeue)( queue->qHdr );
|
||||
if (p)
|
||||
Dprintf4("[%d] dequeueing pri %d locking op r %d c %d\n", tid, p->priority, queue->row, queue->col);
|
||||
else
|
||||
Dprintf3("[%d] no dequeue -- raw queue empty r %d c %d\n", tid, queue->row, queue->col);
|
||||
}
|
||||
else {
|
||||
/* queue already locked, no dequeue occurs */
|
||||
Dprintf3("[%d] no dequeue -- queue is locked r %d c %d\n", tid, queue->row, queue->col);
|
||||
p = NULL;
|
||||
}
|
||||
}
|
||||
else {
|
||||
/* normal request, always dequeue and assume caller already has lock (if needed) */
|
||||
p = (queue->qPtr->Dequeue)( queue->qHdr );
|
||||
if (p)
|
||||
Dprintf4("[%d] dequeueing pri %d regular op r %d c %d\n", tid, p->priority, queue->row, queue->col);
|
||||
else
|
||||
Dprintf3("[%d] no dequeue -- raw queue empty r %d c %d\n", tid, queue->row, queue->col);
|
||||
}
|
||||
}
|
||||
else {
|
||||
Dprintf3("[%d] no dequeue -- raw queue empty r %d c %d\n", tid, queue->row, queue->col);
|
||||
}
|
||||
}
|
||||
|
||||
if (queue->raidPtr->terminate_disk_queues) {
|
||||
p = NULL;
|
||||
break;
|
||||
}
|
||||
#ifdef SIMULATE
|
||||
break; /* in simulator, return NULL on empty queue instead of blocking */
|
||||
#else /* SIMULATE */
|
||||
if (!p) {
|
||||
Dprintf3("[%d] nothing to dequeue: waiting r %d c %d\n", tid, queue->row, queue->col);
|
||||
WAIT_DISK_QUEUE( queue, "DiskIODequeue" );
|
||||
}
|
||||
#endif /* SIMULATE */
|
||||
}
|
||||
|
||||
if (p) {
|
||||
queue->queueLength--; /* decrement count of number of requests waiting in this queue */
|
||||
RF_ASSERT(queue->queueLength >= 0);
|
||||
queue->numOutstanding++;
|
||||
queue->last_deq_sector = p->sectorOffset;
|
||||
/* record the amount of time this request spent in the disk queue */
|
||||
RF_ETIMER_STOP(p->qtime);
|
||||
RF_ETIMER_EVAL(p->qtime);
|
||||
if (p->tracerec)
|
||||
p->tracerec->diskqueue_us += RF_ETIMER_VAL_US(p->qtime);
|
||||
}
|
||||
|
||||
if (p && RF_LOCKING_REQ(p)) {
|
||||
RF_ASSERT(!RF_QUEUE_LOCKED(queue));
|
||||
Dprintf3("[%d] locking queue r %d c %d\n",tid,queue->row,queue->col);
|
||||
RF_LOCK_QUEUE(queue);
|
||||
}
|
||||
RF_UNLOCK_QUEUE_MUTEX( queue, "DiskIODequeue" );
|
||||
|
||||
return(p);
|
||||
}
|
||||
|
||||
#else /* !KERNEL */
|
||||
|
||||
/* get the next set of I/Os started, kernel version only */
|
||||
void rf_DiskIOComplete(queue, req, status)
|
||||
RF_DiskQueue_t *queue;
|
||||
RF_DiskQueueData_t *req;
|
||||
int status;
|
||||
{
|
||||
int done=0;
|
||||
|
||||
RF_LOCK_QUEUE_MUTEX( queue, "DiskIOComplete" );
|
||||
|
||||
/* unlock the queue:
|
||||
(1) after an unlocking req completes
|
||||
(2) after a locking req fails
|
||||
*/
|
||||
if (RF_UNLOCKING_REQ(req) || (RF_LOCKING_REQ(req) && status)) {
|
||||
Dprintf2("DiskIOComplete: unlocking queue at r %d c %d\n", queue->row, queue->col);
|
||||
RF_ASSERT(RF_QUEUE_LOCKED(queue) && (queue->unlockingOp == NULL));
|
||||
RF_UNLOCK_QUEUE(queue);
|
||||
}
|
||||
|
||||
queue->numOutstanding--;
|
||||
RF_ASSERT(queue->numOutstanding >= 0);
|
||||
|
||||
/* dispatch requests to the disk until we find one that we can't. */
|
||||
/* no reason to continue once we've filled up the queue */
|
||||
/* no reason to even start if the queue is locked */
|
||||
|
||||
while (!done && !RF_QUEUE_FULL(queue) && !RF_QUEUE_LOCKED(queue)) {
|
||||
if (queue->nextLockingOp) {
|
||||
req = queue->nextLockingOp; queue->nextLockingOp = NULL;
|
||||
Dprintf3("DiskIOComplete: a pri %d locking req was pending at r %d c %d\n",req->priority,queue->row, queue->col);
|
||||
} else {
|
||||
req = (queue->qPtr->Dequeue)( queue->qHdr );
|
||||
Dprintf3("DiskIOComplete: extracting pri %d req from queue at r %d c %d\n",req->priority,queue->row, queue->col);
|
||||
}
|
||||
if (req) {
|
||||
queue->queueLength--; /* decrement count of number of requests waiting in this queue */
|
||||
RF_ASSERT(queue->queueLength >= 0);
|
||||
}
|
||||
if (!req) done=1;
|
||||
else if (RF_LOCKING_REQ(req)) {
|
||||
if (RF_QUEUE_EMPTY(queue)) { /* dispatch it */
|
||||
Dprintf3("DiskIOComplete: dispatching pri %d locking req to r %d c %d (queue empty)\n",req->priority,queue->row, queue->col);
|
||||
RF_LOCK_QUEUE(queue);
|
||||
rf_DispatchKernelIO(queue, req);
|
||||
done = 1;
|
||||
} else { /* put it aside to wait for the queue to drain */
|
||||
Dprintf3("DiskIOComplete: postponing pri %d locking req to r %d c %d\n",req->priority,queue->row, queue->col);
|
||||
RF_ASSERT(queue->nextLockingOp == NULL);
|
||||
queue->nextLockingOp = req;
|
||||
done = 1;
|
||||
}
|
||||
} else if (RF_UNLOCKING_REQ(req)) { /* should not happen: unlocking ops should not get queued */
|
||||
RF_ASSERT(RF_QUEUE_LOCKED(queue)); /* support it anyway for the future */
|
||||
Dprintf3("DiskIOComplete: dispatching pri %d unl req to r %d c %d (SHOULD NOT SEE THIS)\n",req->priority,queue->row, queue->col);
|
||||
rf_DispatchKernelIO(queue, req);
|
||||
done = 1;
|
||||
} else if (RF_OK_TO_DISPATCH(queue, req)) {
|
||||
Dprintf3("DiskIOComplete: dispatching pri %d regular req to r %d c %d (ok to dispatch)\n",req->priority,queue->row, queue->col);
|
||||
rf_DispatchKernelIO(queue, req);
|
||||
} else { /* we can't dispatch it, so just re-enqueue it. */
|
||||
/* potential trouble here if disk queues batch reqs */
|
||||
Dprintf3("DiskIOComplete: re-enqueueing pri %d regular req to r %d c %d\n",req->priority,queue->row, queue->col);
|
||||
queue->queueLength++;
|
||||
(queue->qPtr->Enqueue)(queue->qHdr, req, req->priority);
|
||||
done = 1;
|
||||
}
|
||||
}
|
||||
|
||||
RF_UNLOCK_QUEUE_MUTEX( queue, "DiskIOComplete" );
|
||||
}
|
||||
#endif /* !KERNEL */
|
||||
|
||||
/* promotes accesses tagged with the given parityStripeID from low priority
|
||||
* to normal priority. This promotion is optional, meaning that a queue
|
||||
* need not implement it. If there is no promotion routine associated with
|
||||
* a queue, this routine does nothing and returns -1.
|
||||
*/
|
||||
int rf_DiskIOPromote(queue, parityStripeID, which_ru)
|
||||
RF_DiskQueue_t *queue;
|
||||
RF_StripeNum_t parityStripeID;
|
||||
RF_ReconUnitNum_t which_ru;
|
||||
{
|
||||
int retval;
|
||||
|
||||
if (!queue->qPtr->Promote)
|
||||
return(-1);
|
||||
RF_LOCK_QUEUE_MUTEX( queue, "DiskIOPromote" );
|
||||
retval = (queue->qPtr->Promote)( queue->qHdr, parityStripeID, which_ru );
|
||||
RF_UNLOCK_QUEUE_MUTEX( queue, "DiskIOPromote" );
|
||||
return(retval);
|
||||
}
|
||||
|
||||
RF_DiskQueueData_t *rf_CreateDiskQueueData(
|
||||
RF_IoType_t typ,
|
||||
RF_SectorNum_t ssect,
|
||||
RF_SectorCount_t nsect,
|
||||
caddr_t buf,
|
||||
RF_StripeNum_t parityStripeID,
|
||||
RF_ReconUnitNum_t which_ru,
|
||||
int (*wakeF)(void *,int),
|
||||
void *arg,
|
||||
RF_DiskQueueData_t *next,
|
||||
RF_AccTraceEntry_t *tracerec,
|
||||
void *raidPtr,
|
||||
RF_DiskQueueDataFlags_t flags,
|
||||
void *kb_proc)
|
||||
{
|
||||
RF_DiskQueueData_t *p;
|
||||
|
||||
RF_FREELIST_GET_INIT(rf_dqd_freelist,p,next,(RF_DiskQueueData_t *),init_dqd);
|
||||
|
||||
p->sectorOffset = ssect + rf_protectedSectors;
|
||||
p->numSector = nsect;
|
||||
p->type = typ;
|
||||
p->buf = buf;
|
||||
p->parityStripeID= parityStripeID;
|
||||
p->which_ru = which_ru;
|
||||
p->CompleteFunc = wakeF;
|
||||
p->argument = arg;
|
||||
p->next = next;
|
||||
p->tracerec = tracerec;
|
||||
p->priority = RF_IO_NORMAL_PRIORITY;
|
||||
p->AuxFunc = NULL;
|
||||
p->buf2 = NULL;
|
||||
#ifdef SIMULATE
|
||||
p->owner = rf_GetCurrentOwner();
|
||||
#endif /* SIMULATE */
|
||||
p->raidPtr = raidPtr;
|
||||
p->flags = flags;
|
||||
#ifdef KERNEL
|
||||
p->b_proc = kb_proc;
|
||||
#endif /* KERNEL */
|
||||
return(p);
|
||||
}
|
||||
|
||||
RF_DiskQueueData_t *rf_CreateDiskQueueDataFull(
|
||||
RF_IoType_t typ,
|
||||
RF_SectorNum_t ssect,
|
||||
RF_SectorCount_t nsect,
|
||||
caddr_t buf,
|
||||
RF_StripeNum_t parityStripeID,
|
||||
RF_ReconUnitNum_t which_ru,
|
||||
int (*wakeF)(void *,int),
|
||||
void *arg,
|
||||
RF_DiskQueueData_t *next,
|
||||
RF_AccTraceEntry_t *tracerec,
|
||||
int priority,
|
||||
int (*AuxFunc)(void *,...),
|
||||
caddr_t buf2,
|
||||
void *raidPtr,
|
||||
RF_DiskQueueDataFlags_t flags,
|
||||
void *kb_proc)
|
||||
{
|
||||
RF_DiskQueueData_t *p;
|
||||
|
||||
RF_FREELIST_GET_INIT(rf_dqd_freelist,p,next,(RF_DiskQueueData_t *),init_dqd);
|
||||
|
||||
p->sectorOffset = ssect + rf_protectedSectors;
|
||||
p->numSector = nsect;
|
||||
p->type = typ;
|
||||
p->buf = buf;
|
||||
p->parityStripeID= parityStripeID;
|
||||
p->which_ru = which_ru;
|
||||
p->CompleteFunc = wakeF;
|
||||
p->argument = arg;
|
||||
p->next = next;
|
||||
p->tracerec = tracerec;
|
||||
p->priority = priority;
|
||||
p->AuxFunc = AuxFunc;
|
||||
p->buf2 = buf2;
|
||||
#ifdef SIMULATE
|
||||
p->owner = rf_GetCurrentOwner();
|
||||
#endif /* SIMULATE */
|
||||
p->raidPtr = raidPtr;
|
||||
p->flags = flags;
|
||||
#ifdef KERNEL
|
||||
p->b_proc = kb_proc;
|
||||
#endif /* KERNEL */
|
||||
return(p);
|
||||
}
|
||||
|
||||
void rf_FreeDiskQueueData(p)
|
||||
RF_DiskQueueData_t *p;
|
||||
{
|
||||
RF_FREELIST_FREE_CLEAN(rf_dqd_freelist,p,next,clean_dqd);
|
||||
}
|
|
@ -0,0 +1,310 @@
|
|||
/* $NetBSD: rf_diskqueue.h,v 1.1 1998/11/13 04:20:29 oster Exp $ */
|
||||
/*
|
||||
* Copyright (c) 1995 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Author: Mark Holland
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and
|
||||
* its documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
||||
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*/
|
||||
|
||||
/*****************************************************************************************
|
||||
*
|
||||
* rf_diskqueue.h -- header file for disk queues
|
||||
*
|
||||
* see comments in rf_diskqueue.c
|
||||
*
|
||||
****************************************************************************************/
|
||||
/*
|
||||
*
|
||||
* :
|
||||
*
|
||||
* Log: rf_diskqueue.h,v
|
||||
* Revision 1.31 1996/08/07 21:08:49 jimz
|
||||
* b_proc -> kb_proc (IRIX complained)
|
||||
*
|
||||
* Revision 1.30 1996/06/18 20:53:11 jimz
|
||||
* fix up disk queueing (remove configure routine,
|
||||
* add shutdown list arg to create routines)
|
||||
*
|
||||
* Revision 1.29 1996/06/13 20:38:19 jimz
|
||||
* fix queue type in DiskQueueData
|
||||
*
|
||||
* Revision 1.28 1996/06/10 11:55:47 jimz
|
||||
* Straightened out some per-array/not-per-array distinctions, fixed
|
||||
* a couple bugs related to confusion. Added shutdown lists. Removed
|
||||
* layout shutdown function (now subsumed by shutdown lists).
|
||||
*
|
||||
* Revision 1.27 1996/06/07 22:26:27 jimz
|
||||
* type-ify which_ru (RF_ReconUnitNum_t)
|
||||
*
|
||||
* Revision 1.26 1996/06/07 21:33:04 jimz
|
||||
* begin using consistent types for sector numbers,
|
||||
* stripe numbers, row+col numbers, recon unit numbers
|
||||
*
|
||||
* Revision 1.25 1996/06/06 17:29:12 jimz
|
||||
* track arm position of last I/O dequeued
|
||||
*
|
||||
* Revision 1.24 1996/06/05 18:06:02 jimz
|
||||
* Major code cleanup. The Great Renaming is now done.
|
||||
* Better modularity. Better typing. Fixed a bunch of
|
||||
* synchronization bugs. Made a lot of global stuff
|
||||
* per-desc or per-array. Removed dead code.
|
||||
*
|
||||
* Revision 1.23 1996/06/02 17:31:48 jimz
|
||||
* Moved a lot of global stuff into array structure, where it belongs.
|
||||
* Fixed up paritylogging, pss modules in this manner. Some general
|
||||
* code cleanup. Removed lots of dead code, some dead files.
|
||||
*
|
||||
* Revision 1.22 1996/05/30 23:22:16 jimz
|
||||
* bugfixes of serialization, timing problems
|
||||
* more cleanup
|
||||
*
|
||||
* Revision 1.21 1996/05/30 11:29:41 jimz
|
||||
* Numerous bug fixes. Stripe lock release code disagreed with the taking code
|
||||
* about when stripes should be locked (I made it consistent: no parity, no lock)
|
||||
* There was a lot of extra serialization of I/Os which I've removed- a lot of
|
||||
* it was to calculate values for the cache code, which is no longer with us.
|
||||
* More types, function, macro cleanup. Added code to properly quiesce the array
|
||||
* on shutdown. Made a lot of stuff array-specific which was (bogusly) general
|
||||
* before. Fixed memory allocation, freeing bugs.
|
||||
*
|
||||
* Revision 1.20 1996/05/24 22:17:04 jimz
|
||||
* continue code + namespace cleanup
|
||||
* typed a bunch of flags
|
||||
*
|
||||
* Revision 1.19 1996/05/24 01:59:45 jimz
|
||||
* another checkpoint in code cleanup for release
|
||||
* time to sync kernel tree
|
||||
*
|
||||
* Revision 1.18 1996/05/23 21:46:35 jimz
|
||||
* checkpoint in code cleanup (release prep)
|
||||
* lots of types, function names have been fixed
|
||||
*
|
||||
* Revision 1.17 1996/05/23 00:33:23 jimz
|
||||
* code cleanup: move all debug decls to rf_options.c, all extern
|
||||
* debug decls to rf_options.h, all debug vars preceded by rf_
|
||||
*
|
||||
* Revision 1.16 1996/05/18 19:51:34 jimz
|
||||
* major code cleanup- fix syntax, make some types consistent,
|
||||
* add prototypes, clean out dead code, et cetera
|
||||
*
|
||||
* Revision 1.15 1996/05/10 19:39:31 jimz
|
||||
* add prev pointer to DiskQueueData
|
||||
*
|
||||
* Revision 1.14 1996/05/10 16:24:04 jimz
|
||||
* mark old defines as deprecated, add RF_ defines
|
||||
*
|
||||
* Revision 1.13 1995/12/01 15:59:04 root
|
||||
* added copyright info
|
||||
*
|
||||
* Revision 1.12 1995/11/07 16:26:44 wvcii
|
||||
* added Peek() function to diskqueuesw
|
||||
*
|
||||
* Revision 1.11 1995/10/05 02:33:15 jimz
|
||||
* made queue lens longs (less instructions to read :-)
|
||||
*
|
||||
* Revision 1.10 1995/10/04 07:07:07 wvcii
|
||||
* queue->numOutstanding now valid for user & sim
|
||||
* user tested & verified, sim untested
|
||||
*
|
||||
* Revision 1.9 1995/09/12 00:21:37 wvcii
|
||||
* added support for tracing disk queue time
|
||||
*
|
||||
* Revision 1.8 95/04/24 13:25:51 holland
|
||||
* rewrite to move disk queues, recon, & atomic RMW to kernel
|
||||
*
|
||||
* Revision 1.6.10.2 1995/04/03 20:13:56 holland
|
||||
* added numOutstanding and maxOutstanding to support moving
|
||||
* disk queues into kernel code
|
||||
*
|
||||
* Revision 1.6.10.1 1995/04/03 20:03:56 holland
|
||||
* initial checkin on branch
|
||||
*
|
||||
* Revision 1.6 1995/03/03 18:34:33 rachad
|
||||
* Simulator mechanism added
|
||||
*
|
||||
* Revision 1.5 1995/03/01 20:25:48 holland
|
||||
* kernelization changes
|
||||
*
|
||||
* Revision 1.4 1995/02/03 22:31:36 holland
|
||||
* many changes related to kernelization
|
||||
*
|
||||
* Revision 1.3 1995/02/01 14:25:19 holland
|
||||
* began changes for kernelization:
|
||||
* changed all instances of mutex_t and cond_t to DECLARE macros
|
||||
* converted configuration code to use config structure
|
||||
*
|
||||
* Revision 1.2 1994/11/29 20:36:02 danner
|
||||
* Added symbolic constants for io_type (e.g,IO_TYPE_READ)
|
||||
* and support for READ_OP_WRITE
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#ifndef _RF__RF_DISKQUEUE_H_
|
||||
#define _RF__RF_DISKQUEUE_H_
|
||||
|
||||
#include "rf_threadstuff.h"
|
||||
#include "rf_acctrace.h"
|
||||
#include "rf_alloclist.h"
|
||||
#include "rf_types.h"
|
||||
#include "rf_etimer.h"
|
||||
|
||||
|
||||
#if defined(__NetBSD__) && defined(_KERNEL)
|
||||
#include "rf_netbsd.h"
|
||||
#endif
|
||||
|
||||
|
||||
#define RF_IO_NORMAL_PRIORITY 1
|
||||
#define RF_IO_LOW_PRIORITY 0
|
||||
|
||||
/* the data held by a disk queue entry */
|
||||
struct RF_DiskQueueData_s {
|
||||
RF_SectorNum_t sectorOffset; /* sector offset into the disk */
|
||||
RF_SectorCount_t numSector; /* number of sectors to read/write */
|
||||
RF_IoType_t type; /* read/write/nop */
|
||||
caddr_t buf; /* buffer pointer */
|
||||
RF_StripeNum_t parityStripeID; /* the RAID parity stripe ID this access is for */
|
||||
RF_ReconUnitNum_t which_ru; /* which RU within this parity stripe */
|
||||
int priority; /* the priority of this request */
|
||||
int (*CompleteFunc)(void *,int);/* function to be called upon completion */
|
||||
int (*AuxFunc)(void *,...); /* function called upon completion of the first I/O of a Read_Op_Write pair*/
|
||||
void *argument; /* argument to be passed to CompleteFunc */
|
||||
#ifdef SIMULATE
|
||||
RF_Owner_t owner; /* which task is responsible for this request */
|
||||
#endif /* SIMULATE */
|
||||
void *raidPtr; /* needed for simulation */
|
||||
RF_AccTraceEntry_t *tracerec; /* perf mon only */
|
||||
RF_Etimer_t qtime; /* perf mon only - time request is in queue */
|
||||
long entryTime;
|
||||
RF_DiskQueueData_t *next;
|
||||
RF_DiskQueueData_t *prev;
|
||||
caddr_t buf2; /* for read-op-write */
|
||||
dev_t dev; /* the device number for in-kernel version */
|
||||
RF_DiskQueue_t *queue; /* the disk queue to which this req is targeted */
|
||||
RF_DiskQueueDataFlags_t flags; /* flags controlling operation */
|
||||
|
||||
#ifdef KERNEL
|
||||
struct proc *b_proc; /* the b_proc from the original bp passed into the driver for this I/O */
|
||||
struct buf *bp; /* a bp to use to get this I/O done */
|
||||
#endif /* KERNEL */
|
||||
};
|
||||
|
||||
#define RF_LOCK_DISK_QUEUE 0x01
|
||||
#define RF_UNLOCK_DISK_QUEUE 0x02
|
||||
|
||||
/* note: "Create" returns type-specific queue header pointer cast to (void *) */
|
||||
struct RF_DiskQueueSW_s {
|
||||
RF_DiskQueueType_t queueType;
|
||||
void *(*Create)(RF_SectorCount_t, RF_AllocListElem_t *, RF_ShutdownList_t **); /* creation routine -- one call per queue in system */
|
||||
void (*Enqueue)(void *,RF_DiskQueueData_t * ,int); /* enqueue routine */
|
||||
RF_DiskQueueData_t *(*Dequeue)(void *); /* dequeue routine */
|
||||
RF_DiskQueueData_t *(*Peek)(void *); /* peek at head of queue */
|
||||
|
||||
/* the rest are optional: they improve performance, but the driver will deal with it if they don't exist */
|
||||
int (*Promote)(void *, RF_StripeNum_t, RF_ReconUnitNum_t); /* promotes priority of tagged accesses */
|
||||
};
|
||||
|
||||
struct RF_DiskQueue_s {
|
||||
RF_DiskQueueSW_t *qPtr; /* access point to queue functions */
|
||||
void *qHdr; /* queue header, of whatever type */
|
||||
RF_DECLARE_MUTEX(mutex) /* mutex locking data structures */
|
||||
RF_DECLARE_COND(cond) /* condition variable for synchronization */
|
||||
long numOutstanding; /* number of I/Os currently outstanding on disk */
|
||||
long maxOutstanding; /* max # of I/Os that can be outstanding on a disk (in-kernel only) */
|
||||
int curPriority; /* the priority of accs all that are currently outstanding */
|
||||
long queueLength; /* number of requests in queue */
|
||||
RF_DiskQueueData_t *nextLockingOp; /* a locking op that has arrived at the head of the queue & is waiting for drainage */
|
||||
RF_DiskQueueData_t *unlockingOp; /* used at user level to communicate unlocking op b/w user (or dag exec) & disk threads */
|
||||
int numWaiting; /* number of threads waiting on this variable. user-level only */
|
||||
RF_DiskQueueFlags_t flags; /* terminate, locked */
|
||||
RF_Raid_t *raidPtr; /* associated array */
|
||||
dev_t dev; /* device number for kernel version */
|
||||
RF_SectorNum_t last_deq_sector; /* last sector number dequeued or dispatched */
|
||||
int row, col; /* debug only */
|
||||
#if defined(__NetBSD__) && defined(_KERNEL)
|
||||
struct raidcinfo *rf_cinfo; /* disks component info.. */
|
||||
#endif
|
||||
};
|
||||
|
||||
#define RF_DQ_LOCKED 0x02 /* no new accs allowed until queue is explicitly unlocked */
|
||||
|
||||
/* macros setting & returning information about queues and requests */
|
||||
#define RF_QUEUE_LOCKED(_q) ((_q)->flags & RF_DQ_LOCKED)
|
||||
#define RF_QUEUE_EMPTY(_q) (((_q)->numOutstanding == 0) && ((_q)->nextLockingOp == NULL) && !RF_QUEUE_LOCKED(_q))
|
||||
#define RF_QUEUE_FULL(_q) ((_q)->numOutstanding == (_q)->maxOutstanding)
|
||||
|
||||
#define RF_LOCK_QUEUE(_q) (_q)->flags |= RF_DQ_LOCKED
|
||||
#define RF_UNLOCK_QUEUE(_q) (_q)->flags &= ~RF_DQ_LOCKED
|
||||
|
||||
#define RF_LOCK_QUEUE_MUTEX(_q_,_wh_) RF_LOCK_MUTEX((_q_)->mutex)
|
||||
#define RF_UNLOCK_QUEUE_MUTEX(_q_,_wh_) RF_UNLOCK_MUTEX((_q_)->mutex)
|
||||
|
||||
#define RF_LOCKING_REQ(_r) ((_r)->flags & RF_LOCK_DISK_QUEUE)
|
||||
#define RF_UNLOCKING_REQ(_r) ((_r)->flags & RF_UNLOCK_DISK_QUEUE)
|
||||
|
||||
/* whether it is ok to dispatch a regular request */
|
||||
#define RF_OK_TO_DISPATCH(_q_,_r_) \
|
||||
(RF_QUEUE_EMPTY(_q_) || \
|
||||
(!RF_QUEUE_FULL(_q_) && ((_r_)->priority >= (_q_)->curPriority)))
|
||||
|
||||
int rf_ConfigureDiskQueueSystem(RF_ShutdownList_t **listp);
|
||||
|
||||
void rf_TerminateDiskQueues(RF_Raid_t *raidPtr);
|
||||
|
||||
int rf_ConfigureDiskQueues(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
|
||||
RF_Config_t *cfgPtr);
|
||||
|
||||
void rf_DiskIOEnqueue(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req, int pri);
|
||||
|
||||
#if !defined(KERNEL) && !defined(SIMULATE)
|
||||
void rf_BroadcastOnQueue(RF_DiskQueue_t *queue);
|
||||
#endif /* !KERNEL && !SIMULATE */
|
||||
|
||||
#ifndef KERNEL
|
||||
RF_DiskQueueData_t *rf_DiskIODequeue(RF_DiskQueue_t *queue);
|
||||
#else /* !KERNEL */
|
||||
void rf_DiskIOComplete(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req, int status);
|
||||
#endif /* !KERNEL */
|
||||
|
||||
int rf_DiskIOPromote(RF_DiskQueue_t *queue, RF_StripeNum_t parityStripeID,
|
||||
RF_ReconUnitNum_t which_ru);
|
||||
|
||||
RF_DiskQueueData_t *rf_CreateDiskQueueData(RF_IoType_t typ,
|
||||
RF_SectorNum_t ssect, RF_SectorCount_t nsect, caddr_t buf,
|
||||
RF_StripeNum_t parityStripeID, RF_ReconUnitNum_t which_ru,
|
||||
int (*wakeF)(void *, int),
|
||||
void *arg, RF_DiskQueueData_t *next, RF_AccTraceEntry_t *tracerec,
|
||||
void *raidPtr, RF_DiskQueueDataFlags_t flags, void *kb_proc);
|
||||
|
||||
RF_DiskQueueData_t *rf_CreateDiskQueueDataFull(RF_IoType_t typ,
|
||||
RF_SectorNum_t ssect, RF_SectorCount_t nsect, caddr_t buf,
|
||||
RF_StripeNum_t parityStripeID, RF_ReconUnitNum_t which_ru,
|
||||
int (*wakeF)(void *, int),
|
||||
void *arg, RF_DiskQueueData_t *next, RF_AccTraceEntry_t *tracerec,
|
||||
int priority, int (*AuxFunc)(void *,...), caddr_t buf2,
|
||||
void *raidPtr, RF_DiskQueueDataFlags_t flags, void *kb_proc);
|
||||
|
||||
void rf_FreeDiskQueueData(RF_DiskQueueData_t *p);
|
||||
|
||||
#endif /* !_RF__RF_DISKQUEUE_H_ */
|
|
@ -0,0 +1,632 @@
|
|||
/* $NetBSD: rf_disks.c,v 1.1 1998/11/13 04:20:29 oster Exp $ */
|
||||
/*
|
||||
* Copyright (c) 1995 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Author: Mark Holland
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and
|
||||
* its documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
||||
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*/
|
||||
|
||||
/***************************************************************
|
||||
* rf_disks.c -- code to perform operations on the actual disks
|
||||
***************************************************************/
|
||||
|
||||
/* :
|
||||
* Log: rf_disks.c,v
|
||||
* Revision 1.32 1996/07/27 18:40:24 jimz
|
||||
* cleanup sweep
|
||||
*
|
||||
* Revision 1.31 1996/07/22 19:52:16 jimz
|
||||
* switched node params to RF_DagParam_t, a union of
|
||||
* a 64-bit int and a void *, for better portability
|
||||
* attempted hpux port, but failed partway through for
|
||||
* lack of a single C compiler capable of compiling all
|
||||
* source files
|
||||
*
|
||||
* Revision 1.30 1996/07/19 16:11:21 jimz
|
||||
* pass devname to DoReadCapacity
|
||||
*
|
||||
* Revision 1.29 1996/07/18 22:57:14 jimz
|
||||
* port simulator to AIX
|
||||
*
|
||||
* Revision 1.28 1996/07/10 22:28:38 jimz
|
||||
* get rid of obsolete row statuses (dead,degraded2)
|
||||
*
|
||||
* Revision 1.27 1996/06/10 12:06:14 jimz
|
||||
* don't do any SCSI op stuff in simulator at all
|
||||
*
|
||||
* Revision 1.26 1996/06/10 11:55:47 jimz
|
||||
* Straightened out some per-array/not-per-array distinctions, fixed
|
||||
* a couple bugs related to confusion. Added shutdown lists. Removed
|
||||
* layout shutdown function (now subsumed by shutdown lists).
|
||||
*
|
||||
* Revision 1.25 1996/06/09 02:36:46 jimz
|
||||
* lots of little crufty cleanup- fixup whitespace
|
||||
* issues, comment #ifdefs, improve typing in some
|
||||
* places (esp size-related)
|
||||
*
|
||||
* Revision 1.24 1996/06/07 21:33:04 jimz
|
||||
* begin using consistent types for sector numbers,
|
||||
* stripe numbers, row+col numbers, recon unit numbers
|
||||
*
|
||||
* Revision 1.23 1996/06/03 23:28:26 jimz
|
||||
* more bugfixes
|
||||
* check in tree to sync for IPDS runs with current bugfixes
|
||||
* there still may be a problem with threads in the script test
|
||||
* getting I/Os stuck- not trivially reproducible (runs ~50 times
|
||||
* in a row without getting stuck)
|
||||
*
|
||||
* Revision 1.22 1996/06/02 17:31:48 jimz
|
||||
* Moved a lot of global stuff into array structure, where it belongs.
|
||||
* Fixed up paritylogging, pss modules in this manner. Some general
|
||||
* code cleanup. Removed lots of dead code, some dead files.
|
||||
*
|
||||
* Revision 1.21 1996/05/30 23:22:16 jimz
|
||||
* bugfixes of serialization, timing problems
|
||||
* more cleanup
|
||||
*
|
||||
* Revision 1.20 1996/05/30 11:29:41 jimz
|
||||
* Numerous bug fixes. Stripe lock release code disagreed with the taking code
|
||||
* about when stripes should be locked (I made it consistent: no parity, no lock)
|
||||
* There was a lot of extra serialization of I/Os which I've removed- a lot of
|
||||
* it was to calculate values for the cache code, which is no longer with us.
|
||||
* More types, function, macro cleanup. Added code to properly quiesce the array
|
||||
* on shutdown. Made a lot of stuff array-specific which was (bogusly) general
|
||||
* before. Fixed memory allocation, freeing bugs.
|
||||
*
|
||||
* Revision 1.19 1996/05/27 18:56:37 jimz
|
||||
* more code cleanup
|
||||
* better typing
|
||||
* compiles in all 3 environments
|
||||
*
|
||||
* Revision 1.18 1996/05/24 22:17:04 jimz
|
||||
* continue code + namespace cleanup
|
||||
* typed a bunch of flags
|
||||
*
|
||||
* Revision 1.17 1996/05/24 01:59:45 jimz
|
||||
* another checkpoint in code cleanup for release
|
||||
* time to sync kernel tree
|
||||
*
|
||||
* Revision 1.16 1996/05/23 21:46:35 jimz
|
||||
* checkpoint in code cleanup (release prep)
|
||||
* lots of types, function names have been fixed
|
||||
*
|
||||
* Revision 1.15 1996/05/23 00:33:23 jimz
|
||||
* code cleanup: move all debug decls to rf_options.c, all extern
|
||||
* debug decls to rf_options.h, all debug vars preceded by rf_
|
||||
*
|
||||
* Revision 1.14 1996/05/18 19:51:34 jimz
|
||||
* major code cleanup- fix syntax, make some types consistent,
|
||||
* add prototypes, clean out dead code, et cetera
|
||||
*
|
||||
* Revision 1.13 1996/05/02 14:57:43 jimz
|
||||
* initialize sectorMask
|
||||
*
|
||||
* Revision 1.12 1995/12/01 15:57:04 root
|
||||
* added copyright info
|
||||
*
|
||||
*/
|
||||
|
||||
#include "rf_types.h"
|
||||
#include "rf_raid.h"
|
||||
#include "rf_alloclist.h"
|
||||
#include "rf_utils.h"
|
||||
#include "rf_configure.h"
|
||||
#include "rf_general.h"
|
||||
#if !defined(__NetBSD__)
|
||||
#include "rf_camlayer.h"
|
||||
#endif
|
||||
#include "rf_options.h"
|
||||
#include "rf_sys.h"
|
||||
|
||||
#if defined(__NetBSD__) && defined(_KERNEL)
|
||||
#include <sys/types.h>
|
||||
#include <sys/param.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/proc.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <sys/fcntl.h>
|
||||
#include <sys/vnode.h>
|
||||
|
||||
int raidlookup __P((char *, struct proc *p, struct vnode **));
|
||||
#endif
|
||||
|
||||
#ifdef SIMULATE
|
||||
static char disk_db_file_name[120], disk_type_name[120];
|
||||
static double init_offset;
|
||||
#endif /* SIMULATE */
|
||||
|
||||
#define DPRINTF6(a,b,c,d,e,f) if (rf_diskDebug) printf(a,b,c,d,e,f)
|
||||
#define DPRINTF7(a,b,c,d,e,f,g) if (rf_diskDebug) printf(a,b,c,d,e,f,g)
|
||||
|
||||
#include "rf_ccmn.h"
|
||||
|
||||
/****************************************************************************************
|
||||
*
|
||||
* initialize the disks comprising the array
|
||||
*
|
||||
* We want the spare disks to have regular row,col numbers so that we can easily
|
||||
* substitue a spare for a failed disk. But, the driver code assumes throughout
|
||||
* that the array contains numRow by numCol _non-spare_ disks, so it's not clear
|
||||
* how to fit in the spares. This is an unfortunate holdover from raidSim. The
|
||||
* quick and dirty fix is to make row zero bigger than the rest, and put all the
|
||||
* spares in it. This probably needs to get changed eventually.
|
||||
*
|
||||
***************************************************************************************/
|
||||
int rf_ConfigureDisks(
|
||||
RF_ShutdownList_t **listp,
|
||||
RF_Raid_t *raidPtr,
|
||||
RF_Config_t *cfgPtr)
|
||||
{
|
||||
RF_RaidDisk_t **disks;
|
||||
RF_SectorCount_t min_numblks = (RF_SectorCount_t)0x7FFFFFFFFFFFLL;
|
||||
RF_RowCol_t r, c;
|
||||
int bs, ret;
|
||||
unsigned i, count, foundone=0, numFailuresThisRow;
|
||||
RF_DiskOp_t *rdcap_op = NULL, *tur_op = NULL;
|
||||
int num_rows_done,num_cols_done;
|
||||
|
||||
#if defined(__NetBSD__) && defined(_KERNEL)
|
||||
struct proc *proc = 0;
|
||||
#endif
|
||||
#ifndef SIMULATE
|
||||
#ifndef __NetBSD__
|
||||
ret = rf_SCSI_AllocReadCapacity(&rdcap_op);
|
||||
if (ret)
|
||||
goto fail;
|
||||
ret = rf_SCSI_AllocTUR(&tur_op);
|
||||
if (ret)
|
||||
goto fail;
|
||||
#endif /* !__NetBSD__ */
|
||||
#endif /* !SIMULATE */
|
||||
|
||||
num_rows_done = 0;
|
||||
num_cols_done = 0;
|
||||
|
||||
|
||||
RF_CallocAndAdd(disks, raidPtr->numRow, sizeof(RF_RaidDisk_t *), (RF_RaidDisk_t **), raidPtr->cleanupList);
|
||||
if (disks == NULL) {
|
||||
ret = ENOMEM;
|
||||
goto fail;
|
||||
}
|
||||
raidPtr->Disks = disks;
|
||||
|
||||
#if defined(__NetBSD__) && defined(_KERNEL)
|
||||
|
||||
proc = raidPtr->proc; /* Blah XXX */
|
||||
|
||||
/* get space for the device-specific stuff... */
|
||||
RF_CallocAndAdd(raidPtr->raid_cinfo, raidPtr->numRow,
|
||||
sizeof(struct raidcinfo *), (struct raidcinfo **),
|
||||
raidPtr->cleanupList);
|
||||
if (raidPtr->raid_cinfo == NULL) {
|
||||
ret = ENOMEM;
|
||||
goto fail;
|
||||
}
|
||||
#endif
|
||||
|
||||
for (r=0; r<raidPtr->numRow; r++) {
|
||||
numFailuresThisRow = 0;
|
||||
RF_CallocAndAdd(disks[r], raidPtr->numCol + ((r==0) ? raidPtr->numSpare : 0), sizeof(RF_RaidDisk_t), (RF_RaidDisk_t *), raidPtr->cleanupList);
|
||||
if (disks[r] == NULL) {
|
||||
ret = ENOMEM;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
/* get more space for device specific stuff.. */
|
||||
RF_CallocAndAdd(raidPtr->raid_cinfo[r],
|
||||
raidPtr->numCol + ((r==0) ? raidPtr->numSpare : 0),
|
||||
sizeof(struct raidcinfo), (struct raidcinfo *),
|
||||
raidPtr->cleanupList);
|
||||
if (raidPtr->raid_cinfo[r] == NULL) {
|
||||
ret = ENOMEM;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
|
||||
for (c=0; c<raidPtr->numCol; c++) {
|
||||
ret = rf_ConfigureDisk(raidPtr,&cfgPtr->devnames[r][c][0],
|
||||
&disks[r][c], rdcap_op, tur_op,
|
||||
cfgPtr->devs[r][c],r,c);
|
||||
if (ret)
|
||||
goto fail;
|
||||
if (disks[r][c].status != rf_ds_optimal) {
|
||||
numFailuresThisRow++;
|
||||
}
|
||||
else {
|
||||
if (disks[r][c].numBlocks < min_numblks)
|
||||
min_numblks = disks[r][c].numBlocks;
|
||||
DPRINTF7("Disk at row %d col %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n",
|
||||
r,c,disks[r][c].devname,
|
||||
(long int) disks[r][c].numBlocks,
|
||||
disks[r][c].blockSize,
|
||||
(long int) disks[r][c].numBlocks * disks[r][c].blockSize / 1024 / 1024);
|
||||
}
|
||||
num_cols_done++;
|
||||
}
|
||||
/* XXX fix for n-fault tolerant */
|
||||
if (numFailuresThisRow > 0)
|
||||
raidPtr->status[r] = rf_rs_degraded;
|
||||
num_rows_done++;
|
||||
}
|
||||
#ifndef SIMULATE
|
||||
#if defined(__NetBSD__) && defined(_KERNEL)
|
||||
/* we do nothing */
|
||||
#else
|
||||
rf_SCSI_FreeDiskOp(rdcap_op, 1); rdcap_op = NULL;
|
||||
rf_SCSI_FreeDiskOp(tur_op, 0); tur_op = NULL;
|
||||
#endif
|
||||
#endif /* !SIMULATE */
|
||||
/* all disks must be the same size & have the same block size, bs must be a power of 2 */
|
||||
bs = 0;
|
||||
for (foundone=r=0; !foundone && r<raidPtr->numRow; r++) {
|
||||
for (c=0; !foundone && c<raidPtr->numCol; c++) {
|
||||
if (disks[r][c].status == rf_ds_optimal) {
|
||||
bs = disks[r][c].blockSize;
|
||||
foundone = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!foundone) {
|
||||
RF_ERRORMSG("RAIDFRAME: Did not find any live disks in the array.\n");
|
||||
ret = EINVAL;
|
||||
goto fail;
|
||||
}
|
||||
for (count=0,i=1; i; i<<=1) if (bs & i)
|
||||
count++;
|
||||
if (count != 1) {
|
||||
RF_ERRORMSG1("Error: block size on disks (%d) must be a power of 2\n",bs);
|
||||
ret = EINVAL;
|
||||
goto fail;
|
||||
}
|
||||
for (r=0; r<raidPtr->numRow; r++) {
|
||||
for (c=0; c<raidPtr->numCol; c++) {
|
||||
if (disks[r][c].status == rf_ds_optimal) {
|
||||
if (disks[r][c].blockSize != bs) {
|
||||
RF_ERRORMSG2("Error: block size of disk at r %d c %d different from disk at r 0 c 0\n",r,c);
|
||||
ret = EINVAL;
|
||||
goto fail;
|
||||
}
|
||||
if (disks[r][c].numBlocks != min_numblks) {
|
||||
RF_ERRORMSG3("WARNING: truncating disk at r %d c %d to %d blocks\n",
|
||||
r,c,(int) min_numblks);
|
||||
disks[r][c].numBlocks = min_numblks;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
raidPtr->sectorsPerDisk = min_numblks;
|
||||
raidPtr->logBytesPerSector = ffs(bs) - 1;
|
||||
raidPtr->bytesPerSector = bs;
|
||||
raidPtr->sectorMask = bs-1;
|
||||
return(0);
|
||||
|
||||
fail:
|
||||
|
||||
#ifndef SIMULATE
|
||||
#if defined(__NetBSD__) && defined(_KERNEL)
|
||||
|
||||
for(r=0;r<raidPtr->numRow;r++) {
|
||||
for(c=0;c<raidPtr->numCol;c++) {
|
||||
/* Cleanup.. */
|
||||
#ifdef DEBUG
|
||||
printf("Cleaning up row: %d col: %d\n",r,c);
|
||||
#endif
|
||||
if (raidPtr->raid_cinfo[r][c].ci_vp) {
|
||||
(void)vn_close(raidPtr->raid_cinfo[r][c].ci_vp,
|
||||
FREAD|FWRITE, proc->p_ucred, proc);
|
||||
}
|
||||
}
|
||||
}
|
||||
/* Space allocated for raid_vpp will get cleaned up at some other point */
|
||||
/* XXX Need more #ifdefs in the above... */
|
||||
|
||||
#else
|
||||
|
||||
if (rdcap_op) rf_SCSI_FreeDiskOp(rdcap_op, 1);
|
||||
if (tur_op) rf_SCSI_FreeDiskOp(tur_op, 0);
|
||||
|
||||
#endif
|
||||
#endif /* !SIMULATE */
|
||||
return(ret);
|
||||
}
|
||||
|
||||
|
||||
/****************************************************************************************
|
||||
* set up the data structures describing the spare disks in the array
|
||||
* recall from the above comment that the spare disk descriptors are stored
|
||||
* in row zero, which is specially expanded to hold them.
|
||||
***************************************************************************************/
|
||||
int rf_ConfigureSpareDisks(
|
||||
RF_ShutdownList_t **listp,
|
||||
RF_Raid_t *raidPtr,
|
||||
RF_Config_t *cfgPtr)
|
||||
{
|
||||
char buf[256];
|
||||
int i, ret;
|
||||
RF_DiskOp_t *rdcap_op = NULL, *tur_op = NULL;
|
||||
unsigned bs;
|
||||
RF_RaidDisk_t *disks;
|
||||
int num_spares_done;
|
||||
|
||||
#if defined(__NetBSD__) && defined(_KERNEL)
|
||||
struct proc *proc;
|
||||
#endif
|
||||
|
||||
#ifndef SIMULATE
|
||||
#ifndef __NetBSD__
|
||||
ret = rf_SCSI_AllocReadCapacity(&rdcap_op);
|
||||
if (ret)
|
||||
goto fail;
|
||||
ret = rf_SCSI_AllocTUR(&tur_op);
|
||||
if (ret)
|
||||
goto fail;
|
||||
#endif /* !__NetBSD__ */
|
||||
#endif /* !SIMULATE */
|
||||
|
||||
num_spares_done = 0;
|
||||
|
||||
#if defined(__NetBSD__) && defined(_KERNEL)
|
||||
proc = raidPtr->proc;
|
||||
/* The space for the spares should have already been
|
||||
allocated by ConfigureDisks() */
|
||||
#endif
|
||||
|
||||
disks = &raidPtr->Disks[0][raidPtr->numCol];
|
||||
for (i=0; i<raidPtr->numSpare; i++) {
|
||||
ret = rf_ConfigureDisk(raidPtr,&cfgPtr->spare_names[i][0],
|
||||
&disks[i], rdcap_op, tur_op,
|
||||
cfgPtr->spare_devs[i],0,raidPtr->numCol+i);
|
||||
if (ret)
|
||||
goto fail;
|
||||
if (disks[i].status != rf_ds_optimal) {
|
||||
RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",buf);
|
||||
} else {
|
||||
disks[i].status = rf_ds_spare; /* change status to spare */
|
||||
DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n",i,
|
||||
disks[i].devname,
|
||||
(long int) disks[i].numBlocks,disks[i].blockSize,
|
||||
(long int) disks[i].numBlocks * disks[i].blockSize / 1024 / 1024);
|
||||
}
|
||||
num_spares_done++;
|
||||
}
|
||||
#ifndef SIMULATE
|
||||
#if defined(__NetBSD__) && (_KERNEL)
|
||||
|
||||
#else
|
||||
rf_SCSI_FreeDiskOp(rdcap_op, 1); rdcap_op = NULL;
|
||||
rf_SCSI_FreeDiskOp(tur_op, 0); tur_op = NULL;
|
||||
#endif
|
||||
#endif /* !SIMULATE */
|
||||
|
||||
/* check sizes and block sizes on spare disks */
|
||||
bs = 1 << raidPtr->logBytesPerSector;
|
||||
for (i=0; i<raidPtr->numSpare; i++) {
|
||||
if (disks[i].blockSize != bs) {
|
||||
RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n",disks[i].blockSize, disks[i].devname, bs);
|
||||
ret = EINVAL;
|
||||
goto fail;
|
||||
}
|
||||
if (disks[i].numBlocks < raidPtr->sectorsPerDisk) {
|
||||
RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %ld blocks)\n",
|
||||
disks[i].devname, disks[i].blockSize, (long int)raidPtr->sectorsPerDisk);
|
||||
ret = EINVAL;
|
||||
goto fail;
|
||||
} else if (disks[i].numBlocks > raidPtr->sectorsPerDisk) {
|
||||
RF_ERRORMSG2("Warning: truncating spare disk %s to %ld blocks\n",disks[i].devname, (long int) raidPtr->sectorsPerDisk);
|
||||
|
||||
disks[i].numBlocks = raidPtr->sectorsPerDisk;
|
||||
}
|
||||
}
|
||||
|
||||
return(0);
|
||||
|
||||
fail:
|
||||
#ifndef SIMULATE
|
||||
#if defined(__NetBSD__) && defined(_KERNEL)
|
||||
|
||||
for(i=0;i<raidPtr->numSpare;i++) {
|
||||
/* Cleanup.. */
|
||||
#ifdef DEBUG
|
||||
printf("Cleaning up spare: %d\n",i);
|
||||
#endif
|
||||
if (raidPtr->raid_cinfo[0][raidPtr->numCol+i].ci_vp) {
|
||||
(void)vn_close(raidPtr->raid_cinfo[0][raidPtr->numCol+i].ci_vp,
|
||||
FREAD|FWRITE, proc->p_ucred, proc);
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
if (rdcap_op) rf_SCSI_FreeDiskOp(rdcap_op, 1);
|
||||
if (tur_op) rf_SCSI_FreeDiskOp(tur_op, 0);
|
||||
|
||||
#endif
|
||||
|
||||
#endif /* !SIMULATE */
|
||||
return(ret);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* configure a single disk in the array */
|
||||
int rf_ConfigureDisk(raidPtr, buf, diskPtr, rdcap_op, tur_op, dev, row, col)
|
||||
RF_Raid_t *raidPtr; /* We need this down here too!! GO */
|
||||
char *buf;
|
||||
RF_RaidDisk_t *diskPtr;
|
||||
RF_DiskOp_t *rdcap_op;
|
||||
RF_DiskOp_t *tur_op;
|
||||
dev_t dev; /* device number used only in kernel */
|
||||
RF_RowCol_t row;
|
||||
RF_RowCol_t col;
|
||||
{
|
||||
char *p;
|
||||
#ifdef SIMULATE
|
||||
double init_offset;
|
||||
#else /* SIMULATE */
|
||||
#if defined(__NetBSD__) && defined(_KERNEL)
|
||||
int retcode;
|
||||
#else
|
||||
int busid, targid, lun, retcode;
|
||||
#endif
|
||||
#endif /* SIMULATE */
|
||||
|
||||
#if defined(__NetBSD__) && defined(_KERNEL)
|
||||
struct partinfo dpart;
|
||||
struct vnode *vp;
|
||||
struct vattr va;
|
||||
struct proc *proc;
|
||||
int error;
|
||||
#endif
|
||||
|
||||
retcode = 0;
|
||||
p = rf_find_non_white(buf);
|
||||
if (p[strlen(p)-1] == '\n') {
|
||||
/* strip off the newline */
|
||||
p[strlen(p)-1] = '\0';
|
||||
}
|
||||
(void) strcpy(diskPtr->devname, p);
|
||||
|
||||
#ifdef SIMULATE
|
||||
|
||||
init_offset = 0.0;
|
||||
rf_InitDisk(&diskPtr->diskState, disk_db_file_name,diskPtr->devname,0,0,init_offset,row,col);
|
||||
rf_GeometryDoReadCapacity(&diskPtr->diskState, &diskPtr->numBlocks, &diskPtr->blockSize);
|
||||
diskPtr->numBlocks = diskPtr->numBlocks * rf_sizePercentage / 100;
|
||||
|
||||
/* we allow the user to specify that only a fraction of the disks should be used
|
||||
* this is just for debug: it speeds up the parity scan
|
||||
*/
|
||||
|
||||
#else /* SIMULATE */
|
||||
#ifndef __NetBSD__
|
||||
/* get bus, target, lun */
|
||||
retcode = rf_extract_ids(p, &busid, &targid, &lun);
|
||||
if (retcode)
|
||||
return(retcode);
|
||||
|
||||
/* required in kernel, nop at user level */
|
||||
retcode = rf_SCSI_OpenUnit(dev);
|
||||
if (retcode)
|
||||
return(retcode);
|
||||
|
||||
diskPtr->dev = dev;
|
||||
if (rf_SCSI_DoTUR(tur_op, (u_char)busid, (u_char)targid, (u_char)lun, dev)) {
|
||||
RF_ERRORMSG1("Disk %s failed TUR. Marked as dead.\n",diskPtr->devname);
|
||||
diskPtr->status = rf_ds_failed;
|
||||
} else {
|
||||
diskPtr->status = rf_ds_optimal;
|
||||
retcode = rf_SCSI_DoReadCapacity(raidPtr,rdcap_op, busid, targid, lun, dev,
|
||||
&diskPtr->numBlocks, &diskPtr->blockSize, diskPtr->devname);
|
||||
if (retcode)
|
||||
return(retcode);
|
||||
|
||||
/* we allow the user to specify that only a fraction of the disks should be used
|
||||
* this is just for debug: it speeds up the parity scan
|
||||
*/
|
||||
diskPtr->numBlocks = diskPtr->numBlocks * rf_sizePercentage / 100;
|
||||
}
|
||||
#endif
|
||||
#if defined(__NetBSD__) && defined(_KERNEL)
|
||||
|
||||
proc = raidPtr->proc; /* XXX Yes, this is not nice.. */
|
||||
|
||||
/* Let's start by claiming the component is fine and well... */
|
||||
/* XXX not the case if the disk is toast.. */
|
||||
diskPtr->status = rf_ds_optimal;
|
||||
|
||||
|
||||
raidPtr->raid_cinfo[row][col].ci_vp = NULL;
|
||||
raidPtr->raid_cinfo[row][col].ci_dev = NULL;
|
||||
|
||||
error = raidlookup(diskPtr->devname, proc, &vp);
|
||||
if (error) {
|
||||
printf("raidlookup on device: %s failed!\n",diskPtr->devname);
|
||||
if (error == ENXIO) {
|
||||
/* XXX the component isn't there... must be dead :-( */
|
||||
diskPtr->status = rf_ds_failed;
|
||||
} else {
|
||||
return(error);
|
||||
}
|
||||
}
|
||||
|
||||
if (diskPtr->status == rf_ds_optimal) {
|
||||
|
||||
if ((error = VOP_GETATTR(vp, &va, proc->p_ucred, proc)) != 0) {
|
||||
return(error);
|
||||
}
|
||||
|
||||
error = VOP_IOCTL(vp, DIOCGPART, (caddr_t)&dpart,
|
||||
FREAD, proc->p_ucred, proc);
|
||||
if (error) {
|
||||
return(error);
|
||||
}
|
||||
|
||||
|
||||
diskPtr->blockSize = dpart.disklab->d_secsize;
|
||||
|
||||
diskPtr->numBlocks = dpart.part->p_size - rf_protectedSectors;
|
||||
|
||||
raidPtr->raid_cinfo[row][col].ci_vp = vp;
|
||||
raidPtr->raid_cinfo[row][col].ci_dev = va.va_rdev;
|
||||
|
||||
#if 0
|
||||
diskPtr->dev = dev;
|
||||
#endif
|
||||
|
||||
diskPtr->dev = va.va_rdev; /* XXX or the above? */
|
||||
|
||||
/* we allow the user to specify that only a fraction of the disks should be used
|
||||
* this is just for debug: it speeds up the parity scan
|
||||
*/
|
||||
diskPtr->numBlocks = diskPtr->numBlocks * rf_sizePercentage / 100;
|
||||
|
||||
}
|
||||
|
||||
#endif /* !__NetBSD__ */
|
||||
#endif /* SIMULATE */
|
||||
|
||||
return(0);
|
||||
}
|
||||
|
||||
#ifdef SIMULATE
|
||||
|
||||
void rf_default_disk_names()
|
||||
{
|
||||
sprintf(disk_db_file_name,"disk.db");
|
||||
sprintf(disk_type_name,"HP2247");
|
||||
}
|
||||
|
||||
void rf_set_disk_db_name(s)
|
||||
char *s;
|
||||
{
|
||||
strcpy(disk_db_file_name,s);
|
||||
}
|
||||
|
||||
void rf_set_disk_type_name(s)
|
||||
char *s;
|
||||
{
|
||||
strcpy(disk_type_name,s);
|
||||
}
|
||||
|
||||
#endif /* SIMULATE */
|
|
@ -0,0 +1,160 @@
|
|||
/* $NetBSD: rf_disks.h,v 1.1 1998/11/13 04:20:29 oster Exp $ */
|
||||
/*
|
||||
* Copyright (c) 1995 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Author: Mark Holland
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and
|
||||
* its documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
||||
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*/
|
||||
|
||||
/*
|
||||
* rf_disks.h -- header file for code related to physical disks
|
||||
*/
|
||||
|
||||
/* :
|
||||
* Log: rf_disks.h,v
|
||||
* Revision 1.15 1996/08/20 23:05:13 jimz
|
||||
* add nreads, nwrites to RaidDisk
|
||||
*
|
||||
* Revision 1.14 1996/06/17 03:20:15 jimz
|
||||
* increase devname len to 56
|
||||
*
|
||||
* Revision 1.13 1996/06/10 11:55:47 jimz
|
||||
* Straightened out some per-array/not-per-array distinctions, fixed
|
||||
* a couple bugs related to confusion. Added shutdown lists. Removed
|
||||
* layout shutdown function (now subsumed by shutdown lists).
|
||||
*
|
||||
* Revision 1.12 1996/06/09 02:36:46 jimz
|
||||
* lots of little crufty cleanup- fixup whitespace
|
||||
* issues, comment #ifdefs, improve typing in some
|
||||
* places (esp size-related)
|
||||
*
|
||||
* Revision 1.11 1996/06/07 21:33:04 jimz
|
||||
* begin using consistent types for sector numbers,
|
||||
* stripe numbers, row+col numbers, recon unit numbers
|
||||
*
|
||||
* Revision 1.10 1996/05/30 11:29:41 jimz
|
||||
* Numerous bug fixes. Stripe lock release code disagreed with the taking code
|
||||
* about when stripes should be locked (I made it consistent: no parity, no lock)
|
||||
* There was a lot of extra serialization of I/Os which I've removed- a lot of
|
||||
* it was to calculate values for the cache code, which is no longer with us.
|
||||
* More types, function, macro cleanup. Added code to properly quiesce the array
|
||||
* on shutdown. Made a lot of stuff array-specific which was (bogusly) general
|
||||
* before. Fixed memory allocation, freeing bugs.
|
||||
*
|
||||
* Revision 1.9 1996/05/27 18:56:37 jimz
|
||||
* more code cleanup
|
||||
* better typing
|
||||
* compiles in all 3 environments
|
||||
*
|
||||
* Revision 1.8 1996/05/24 01:59:45 jimz
|
||||
* another checkpoint in code cleanup for release
|
||||
* time to sync kernel tree
|
||||
*
|
||||
* Revision 1.7 1996/05/18 19:51:34 jimz
|
||||
* major code cleanup- fix syntax, make some types consistent,
|
||||
* add prototypes, clean out dead code, et cetera
|
||||
*
|
||||
* Revision 1.6 1996/05/02 22:06:57 jimz
|
||||
* add RF_RaidDisk_t
|
||||
*
|
||||
* Revision 1.5 1995/12/01 15:56:53 root
|
||||
* added copyright info
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _RF__RF_DISKS_H_
|
||||
#define _RF__RF_DISKS_H_
|
||||
|
||||
#include <sys/types.h>
|
||||
|
||||
#include "rf_archs.h"
|
||||
#include "rf_types.h"
|
||||
#ifdef SIMULATE
|
||||
#include "rf_geometry.h"
|
||||
#endif /* SIMULATE */
|
||||
|
||||
/*
|
||||
* A physical disk can be in one of several states:
|
||||
* IF YOU ADD A STATE, CHECK TO SEE IF YOU NEED TO MODIFY RF_DEAD_DISK() BELOW.
|
||||
*/
|
||||
enum RF_DiskStatus_e {
|
||||
rf_ds_optimal, /* no problems */
|
||||
rf_ds_failed, /* reconstruction ongoing */
|
||||
rf_ds_reconstructing, /* reconstruction complete to spare, dead disk not yet replaced */
|
||||
rf_ds_dist_spared, /* reconstruction complete to distributed spare space, dead disk not yet replaced */
|
||||
rf_ds_spared, /* reconstruction complete to distributed spare space, dead disk not yet replaced */
|
||||
rf_ds_spare, /* an available spare disk */
|
||||
rf_ds_used_spare /* a spare which has been used, and hence is not available */
|
||||
};
|
||||
typedef enum RF_DiskStatus_e RF_DiskStatus_t;
|
||||
|
||||
struct RF_RaidDisk_s {
|
||||
char devname[56]; /* name of device file */
|
||||
RF_DiskStatus_t status; /* whether it is up or down */
|
||||
RF_RowCol_t spareRow; /* if in status "spared", this identifies the spare disk */
|
||||
RF_RowCol_t spareCol; /* if in status "spared", this identifies the spare disk */
|
||||
RF_SectorCount_t numBlocks; /* number of blocks, obtained via READ CAPACITY */
|
||||
int blockSize;
|
||||
/* XXX the folling is needed since we seem to need SIMULATE defined
|
||||
in order to get user-land stuff to compile, but we *don't* want
|
||||
this in the structure for the user-land utilities, as the
|
||||
kernel doesn't know about it!! (and it messes up the size of
|
||||
the structure, so there is a communication problem between
|
||||
the kernel and the userland utils :-( GO */
|
||||
#if defined(SIMULATE) && !defined(RF_UTILITY)
|
||||
RF_DiskState_t diskState; /* the name of the disk as used in the disk module */
|
||||
#endif /* SIMULATE */
|
||||
#if RF_KEEP_DISKSTATS > 0
|
||||
RF_uint64 nreads;
|
||||
RF_uint64 nwrites;
|
||||
#endif /* RF_KEEP_DISKSTATS > 0 */
|
||||
dev_t dev;
|
||||
};
|
||||
|
||||
/*
|
||||
* An RF_DiskOp_t ptr is really a pointer to a UAGT_CCB, but I want
|
||||
* to isolate the cam layer from all other layers, so I typecast to/from
|
||||
* RF_DiskOp_t * (i.e. void *) at the interfaces.
|
||||
*/
|
||||
typedef void RF_DiskOp_t;
|
||||
|
||||
/* if a disk is in any of these states, it is inaccessible */
|
||||
#define RF_DEAD_DISK(_dstat_) (((_dstat_) == rf_ds_spared) || \
|
||||
((_dstat_) == rf_ds_reconstructing) || ((_dstat_) == rf_ds_failed) || \
|
||||
((_dstat_) == rf_ds_dist_spared))
|
||||
|
||||
int rf_ConfigureDisks(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
|
||||
RF_Config_t *cfgPtr);
|
||||
int rf_ConfigureSpareDisks(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
|
||||
RF_Config_t *cfgPtr);
|
||||
int rf_ConfigureDisk(RF_Raid_t *raidPtr, char *buf, RF_RaidDisk_t *diskPtr,
|
||||
RF_DiskOp_t *rdcap_op, RF_DiskOp_t *tur_op, dev_t dev,
|
||||
RF_RowCol_t row, RF_RowCol_t col);
|
||||
|
||||
#ifdef SIMULATE
|
||||
void rf_default_disk_names(void);
|
||||
void rf_set_disk_db_name(char *s);
|
||||
void rf_set_disk_type_name(char *s);
|
||||
#endif /* SIMULATE */
|
||||
|
||||
#endif /* !_RF__RF_DISKS_H_ */
|
|
@ -0,0 +1,102 @@
|
|||
/* $NetBSD: rf_diskthreads.h,v 1.1 1998/11/13 04:20:29 oster Exp $ */
|
||||
/*
|
||||
* rf_diskthreads.h
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 1996 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Author: Jim Zelenka
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and
|
||||
* its documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
||||
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*/
|
||||
/*
|
||||
* :
|
||||
* Log: rf_diskthreads.h,v
|
||||
* Revision 1.7 1996/06/10 11:55:47 jimz
|
||||
* Straightened out some per-array/not-per-array distinctions, fixed
|
||||
* a couple bugs related to confusion. Added shutdown lists. Removed
|
||||
* layout shutdown function (now subsumed by shutdown lists).
|
||||
*
|
||||
* Revision 1.6 1996/06/09 02:36:46 jimz
|
||||
* lots of little crufty cleanup- fixup whitespace
|
||||
* issues, comment #ifdefs, improve typing in some
|
||||
* places (esp size-related)
|
||||
*
|
||||
* Revision 1.5 1996/06/07 21:33:04 jimz
|
||||
* begin using consistent types for sector numbers,
|
||||
* stripe numbers, row+col numbers, recon unit numbers
|
||||
*
|
||||
* Revision 1.4 1996/05/30 23:22:16 jimz
|
||||
* bugfixes of serialization, timing problems
|
||||
* more cleanup
|
||||
*
|
||||
* Revision 1.3 1996/05/30 11:29:41 jimz
|
||||
* Numerous bug fixes. Stripe lock release code disagreed with the taking code
|
||||
* about when stripes should be locked (I made it consistent: no parity, no lock)
|
||||
* There was a lot of extra serialization of I/Os which I've removed- a lot of
|
||||
* it was to calculate values for the cache code, which is no longer with us.
|
||||
* More types, function, macro cleanup. Added code to properly quiesce the array
|
||||
* on shutdown. Made a lot of stuff array-specific which was (bogusly) general
|
||||
* before. Fixed memory allocation, freeing bugs.
|
||||
*
|
||||
* Revision 1.2 1996/05/23 21:46:35 jimz
|
||||
* checkpoint in code cleanup (release prep)
|
||||
* lots of types, function names have been fixed
|
||||
*
|
||||
* Revision 1.1 1996/05/18 19:55:58 jimz
|
||||
* Initial revision
|
||||
*
|
||||
*/
|
||||
/*
|
||||
* rf_diskthreads.h -- types and prototypes for disk thread system
|
||||
*/
|
||||
|
||||
#ifndef _RF__RF_DISKTHREADS_H_
|
||||
#define _RF__RF_DISKTHREADS_H_
|
||||
|
||||
#include "rf_types.h"
|
||||
|
||||
/* this is the information that a disk thread needs to do its job */
|
||||
struct RF_DiskId_s {
|
||||
RF_DiskQueue_t *queue;
|
||||
RF_Raid_t *raidPtr;
|
||||
RF_RaidDisk_t *disk;
|
||||
int fd; /* file descriptor */
|
||||
RF_RowCol_t row, col; /* debug only */
|
||||
#ifdef SIMULATE
|
||||
int state;
|
||||
#endif /* SIMULATE */
|
||||
};
|
||||
|
||||
int rf_ConfigureDiskThreads(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
|
||||
RF_Config_t *cfgPtr);
|
||||
|
||||
#ifdef SIMULATE
|
||||
int rf_SetDiskIdle(RF_Raid_t *raidPtr, RF_RowCol_t r, RF_RowCol_t c);
|
||||
int rf_ScanDiskQueues(RF_Raid_t *raidPtr);
|
||||
void rf_simulator_complete_io(RF_DiskId_t *id);
|
||||
void rf_PrintDiskStat(RF_Raid_t *raidPtr);
|
||||
#else /* SIMULATE */
|
||||
int rf_ShutdownDiskThreads(RF_Raid_t *raidPtr);
|
||||
#endif /* SIMULATE */
|
||||
|
||||
#endif /* !_RF__RF_DISKTHREADS_H_ */
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,125 @@
|
|||
/* $NetBSD: rf_driver.h,v 1.1 1998/11/13 04:20:29 oster Exp $ */
|
||||
/*
|
||||
* rf_driver.h
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 1996 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Author: Jim Zelenka
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and
|
||||
* its documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
||||
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*/
|
||||
/*
|
||||
* :
|
||||
* Log: rf_driver.h,v
|
||||
* Revision 1.11 1996/07/11 19:08:00 jimz
|
||||
* generalize reconstruction mechanism
|
||||
* allow raid1 reconstructs via copyback (done with array
|
||||
* quiesced, not online, therefore not disk-directed)
|
||||
*
|
||||
* Revision 1.10 1996/06/10 14:18:58 jimz
|
||||
* move user, throughput stats into per-array structure
|
||||
*
|
||||
* Revision 1.9 1996/06/07 21:33:04 jimz
|
||||
* begin using consistent types for sector numbers,
|
||||
* stripe numbers, row+col numbers, recon unit numbers
|
||||
*
|
||||
* Revision 1.8 1996/06/05 18:06:02 jimz
|
||||
* Major code cleanup. The Great Renaming is now done.
|
||||
* Better modularity. Better typing. Fixed a bunch of
|
||||
* synchronization bugs. Made a lot of global stuff
|
||||
* per-desc or per-array. Removed dead code.
|
||||
*
|
||||
* Revision 1.7 1996/05/30 11:29:41 jimz
|
||||
* Numerous bug fixes. Stripe lock release code disagreed with the taking code
|
||||
* about when stripes should be locked (I made it consistent: no parity, no lock)
|
||||
* There was a lot of extra serialization of I/Os which I've removed- a lot of
|
||||
* it was to calculate values for the cache code, which is no longer with us.
|
||||
* More types, function, macro cleanup. Added code to properly quiesce the array
|
||||
* on shutdown. Made a lot of stuff array-specific which was (bogusly) general
|
||||
* before. Fixed memory allocation, freeing bugs.
|
||||
*
|
||||
* Revision 1.6 1996/05/27 18:56:37 jimz
|
||||
* more code cleanup
|
||||
* better typing
|
||||
* compiles in all 3 environments
|
||||
*
|
||||
* Revision 1.5 1996/05/24 22:17:04 jimz
|
||||
* continue code + namespace cleanup
|
||||
* typed a bunch of flags
|
||||
*
|
||||
* Revision 1.4 1996/05/24 04:28:55 jimz
|
||||
* release cleanup ckpt
|
||||
*
|
||||
* Revision 1.3 1996/05/24 01:59:45 jimz
|
||||
* another checkpoint in code cleanup for release
|
||||
* time to sync kernel tree
|
||||
*
|
||||
* Revision 1.2 1996/05/23 21:46:35 jimz
|
||||
* checkpoint in code cleanup (release prep)
|
||||
* lots of types, function names have been fixed
|
||||
*
|
||||
* Revision 1.1 1996/05/18 19:56:10 jimz
|
||||
* Initial revision
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _RF__RF_DRIVER_H_
|
||||
#define _RF__RF_DRIVER_H_
|
||||
|
||||
#include "rf_threadstuff.h"
|
||||
#include "rf_types.h"
|
||||
|
||||
RF_DECLARE_EXTERN_MUTEX(rf_printf_mutex)
|
||||
|
||||
int rf_BootRaidframe(void);
|
||||
int rf_UnbootRaidframe(void);
|
||||
int rf_Shutdown(RF_Raid_t *raidPtr);
|
||||
int rf_Configure(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr);
|
||||
RF_RaidAccessDesc_t *rf_AllocRaidAccDesc(RF_Raid_t *raidPtr, RF_IoType_t type,
|
||||
RF_RaidAddr_t raidAddress, RF_SectorCount_t numBlocks, caddr_t bufPtr,
|
||||
void *bp, RF_DagHeader_t **paramDAG, RF_AccessStripeMapHeader_t **paramASM,
|
||||
RF_RaidAccessFlags_t flags, void (*cbF)(struct buf *), void *cbA,
|
||||
RF_AccessState_t *states);
|
||||
void rf_FreeRaidAccDesc(RF_RaidAccessDesc_t *desc);
|
||||
int rf_DoAccess(RF_Raid_t *raidPtr, RF_IoType_t type, int async_flag,
|
||||
RF_RaidAddr_t raidAddress, RF_SectorCount_t numBlocks, caddr_t bufPtr,
|
||||
void *bp_in, RF_DagHeader_t **paramDAG,
|
||||
RF_AccessStripeMapHeader_t **paramASM, RF_RaidAccessFlags_t flags,
|
||||
RF_RaidAccessDesc_t **paramDesc, void (*cbF)(struct buf *), void *cbA);
|
||||
int rf_SetReconfiguredMode(RF_Raid_t *raidPtr, RF_RowCol_t row,
|
||||
RF_RowCol_t col);
|
||||
int rf_FailDisk(RF_Raid_t *raidPtr, RF_RowCol_t frow, RF_RowCol_t fcol,
|
||||
int initRecon);
|
||||
#ifdef SIMULATE
|
||||
void rf_ScheduleContinueReconstructFailedDisk(RF_RaidReconDesc_t *reconDesc);
|
||||
#endif /* SIMULATE */
|
||||
void rf_SignalQuiescenceLock(RF_Raid_t *raidPtr, RF_RaidReconDesc_t *reconDesc);
|
||||
int rf_SuspendNewRequestsAndWait(RF_Raid_t *raidPtr);
|
||||
void rf_ResumeNewRequests(RF_Raid_t *raidPtr);
|
||||
void rf_StartThroughputStats(RF_Raid_t *raidPtr);
|
||||
void rf_StartUserStats(RF_Raid_t *raidPtr);
|
||||
void rf_StopUserStats(RF_Raid_t *raidPtr);
|
||||
void rf_UpdateUserStats(RF_Raid_t *raidPtr, int rt, int numsect);
|
||||
void rf_PrintUserStats(RF_Raid_t *raidPtr);
|
||||
|
||||
#endif /* !_RF__RF_DRIVER_H_ */
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,74 @@
|
|||
/* $NetBSD: rf_engine.h,v 1.1 1998/11/13 04:20:29 oster Exp $ */
|
||||
/*
|
||||
* Copyright (c) 1995 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Author: William V. Courtright II, Mark Holland, Jim Zelenka
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and
|
||||
* its documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
||||
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*/
|
||||
|
||||
/**********************************************************
|
||||
* *
|
||||
* engine.h -- header file for execution engine functions *
|
||||
* *
|
||||
**********************************************************/
|
||||
|
||||
/* :
|
||||
* Log: rf_engine.h,v
|
||||
* Revision 1.11 1996/06/14 14:16:22 jimz
|
||||
* new decl of ConfigureEngine
|
||||
*
|
||||
* Revision 1.10 1996/06/10 11:55:47 jimz
|
||||
* Straightened out some per-array/not-per-array distinctions, fixed
|
||||
* a couple bugs related to confusion. Added shutdown lists. Removed
|
||||
* layout shutdown function (now subsumed by shutdown lists).
|
||||
*
|
||||
* Revision 1.9 1996/05/30 12:59:18 jimz
|
||||
* make etimer happier, more portable
|
||||
*
|
||||
* Revision 1.8 1996/05/24 04:28:55 jimz
|
||||
* release cleanup ckpt
|
||||
*
|
||||
* Revision 1.7 1996/05/23 21:46:35 jimz
|
||||
* checkpoint in code cleanup (release prep)
|
||||
* lots of types, function names have been fixed
|
||||
*
|
||||
* Revision 1.6 1996/05/18 19:51:34 jimz
|
||||
* major code cleanup- fix syntax, make some types consistent,
|
||||
* add prototypes, clean out dead code, et cetera
|
||||
*
|
||||
* Revision 1.5 1995/12/01 18:12:17 root
|
||||
* added copyright info
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _RF__RF_ENGINE_H_
|
||||
#define _RF__RF_ENGINE_H_
|
||||
|
||||
int rf_ConfigureEngine(RF_ShutdownList_t **listp,
|
||||
RF_Raid_t *raidPtr, RF_Config_t *cfgPtr);
|
||||
|
||||
int rf_FinishNode(RF_DagNode_t *node, int context); /* return finished node to engine */
|
||||
|
||||
int rf_DispatchDAG(RF_DagHeader_t *dag, void (*cbFunc)(void *), void *cbArg); /* execute dag */
|
||||
|
||||
#endif /* !_RF__RF_ENGINE_H_ */
|
|
@ -0,0 +1,352 @@
|
|||
/* $NetBSD: rf_etimer.h,v 1.1 1998/11/13 04:20:29 oster Exp $ */
|
||||
/*
|
||||
* Copyright (c) 1995 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Author: Mark Holland
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and
|
||||
* its documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
||||
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*/
|
||||
|
||||
/* rf_etimer.h -- header file for code related to accurate timing
|
||||
* This code currently assumes that the elapsed time between START_TIMER
|
||||
* and START_TIMER is less than the period of the cycle counter. This
|
||||
* means the events you want to time must be less than:
|
||||
* clock speed max time
|
||||
* ---------- --------
|
||||
* 175 MHz 24 sec
|
||||
* 150 MHz 28 sec
|
||||
* 125 MHz 34 sec
|
||||
*
|
||||
*
|
||||
* :
|
||||
* Log: rf_etimer.h,v
|
||||
* Revision 1.32 1996/08/13 18:11:09 jimz
|
||||
* want MACH&&!__osf__, not just MACH for mach timing (MACH defined under OSF/1)
|
||||
*
|
||||
* Revision 1.31 1996/08/12 20:11:38 jimz
|
||||
* use read_real_time() on AIX4+
|
||||
*
|
||||
* Revision 1.30 1996/08/09 18:48:12 jimz
|
||||
* for now, use gettimeofday() on MACH
|
||||
* (should eventually use better clock stuff)
|
||||
*
|
||||
* Revision 1.29 1996/08/07 21:09:08 jimz
|
||||
* add IRIX as a gettimeofday system
|
||||
*
|
||||
* Revision 1.28 1996/08/06 22:25:23 jimz
|
||||
* add LINUX_I386
|
||||
*
|
||||
* Revision 1.27 1996/07/30 04:45:53 jimz
|
||||
* add ultrix stuff
|
||||
*
|
||||
* Revision 1.26 1996/07/28 20:31:39 jimz
|
||||
* i386netbsd port
|
||||
* true/false fixup
|
||||
*
|
||||
* Revision 1.25 1996/07/27 23:36:08 jimz
|
||||
* Solaris port of simulator
|
||||
*
|
||||
* Revision 1.24 1996/07/27 18:40:24 jimz
|
||||
* cleanup sweep
|
||||
*
|
||||
* Revision 1.23 1996/07/22 19:52:16 jimz
|
||||
* switched node params to RF_DagParam_t, a union of
|
||||
* a 64-bit int and a void *, for better portability
|
||||
* attempted hpux port, but failed partway through for
|
||||
* lack of a single C compiler capable of compiling all
|
||||
* source files
|
||||
*
|
||||
* Revision 1.22 1996/07/18 22:57:14 jimz
|
||||
* port simulator to AIX
|
||||
*
|
||||
* Revision 1.21 1996/07/17 21:00:58 jimz
|
||||
* clean up timer interface, tracing
|
||||
*
|
||||
* Revision 1.20 1996/07/17 14:26:28 jimz
|
||||
* rf_scc -> rf_rpcc
|
||||
*
|
||||
* Revision 1.19 1996/06/14 21:24:48 jimz
|
||||
* move out ConfigureEtimer
|
||||
*
|
||||
* Revision 1.18 1996/06/03 23:28:26 jimz
|
||||
* more bugfixes
|
||||
* check in tree to sync for IPDS runs with current bugfixes
|
||||
* there still may be a problem with threads in the script test
|
||||
* getting I/Os stuck- not trivially reproducible (runs ~50 times
|
||||
* in a row without getting stuck)
|
||||
*
|
||||
* Revision 1.17 1996/05/30 23:22:16 jimz
|
||||
* bugfixes of serialization, timing problems
|
||||
* more cleanup
|
||||
*
|
||||
* Revision 1.16 1996/05/30 12:59:18 jimz
|
||||
* make etimer happier, more portable
|
||||
*
|
||||
* Revision 1.15 1996/05/27 18:56:37 jimz
|
||||
* more code cleanup
|
||||
* better typing
|
||||
* compiles in all 3 environments
|
||||
*
|
||||
* Revision 1.14 1996/05/23 21:46:35 jimz
|
||||
* checkpoint in code cleanup (release prep)
|
||||
* lots of types, function names have been fixed
|
||||
*
|
||||
* Revision 1.13 1996/05/23 00:33:23 jimz
|
||||
* code cleanup: move all debug decls to rf_options.c, all extern
|
||||
* debug decls to rf_options.h, all debug vars preceded by rf_
|
||||
*
|
||||
* Revision 1.12 1996/05/18 19:51:34 jimz
|
||||
* major code cleanup- fix syntax, make some types consistent,
|
||||
* add prototypes, clean out dead code, et cetera
|
||||
*
|
||||
* Revision 1.11 1995/12/01 18:10:40 root
|
||||
* added copyright info
|
||||
*
|
||||
* Revision 1.10 1995/09/29 14:27:32 wvcii
|
||||
* removed printfs from ConfigureEtimer()
|
||||
*
|
||||
* Revision 1.9 95/09/19 22:57:31 jimz
|
||||
* added kernel version of ConfigureEtimer
|
||||
*
|
||||
* Revision 1.8 1995/09/14 13:03:04 amiri
|
||||
* set default CPU speed to 125Mhz to avoid divide by zero problems.
|
||||
*
|
||||
* Revision 1.7 1995/09/11 19:04:36 wvcii
|
||||
* timer autoconfigs using pdl routine to check cpu speed
|
||||
* value may still be overridden via config debug var timerTicksPerSec
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#ifndef _RF__RF_TIMER_H_
|
||||
#define _RF__RF_TIMER_H_
|
||||
|
||||
#include "rf_options.h"
|
||||
|
||||
#ifdef _KERNEL
|
||||
#define KERNEL
|
||||
#endif
|
||||
|
||||
#ifdef __NetBSD__
|
||||
|
||||
#ifdef KERNEL
|
||||
extern unsigned int rpcc(void);
|
||||
#define rf_read_cycle_counter rpcc
|
||||
#else /* KERNEL */
|
||||
#ifndef __NetBSD__
|
||||
/* XXX does this function even exist anywhere??? GO */
|
||||
extern unsigned int rf_rpcc();
|
||||
#endif
|
||||
#define rf_read_cycle_counter rf_rpcc
|
||||
#endif /* KERNEL */
|
||||
|
||||
#define RF_DEF_TIMER_MAX_VAL 0xFFFFFFFF
|
||||
|
||||
typedef struct RF_EtimerVal_s {
|
||||
unsigned ccnt; /* cycle count */
|
||||
} RF_EtimerVal_t;
|
||||
|
||||
struct RF_Etimer_s {
|
||||
RF_EtimerVal_t st;
|
||||
RF_EtimerVal_t et;
|
||||
unsigned long ticks; /* elapsed time in ticks */
|
||||
};
|
||||
|
||||
extern long rf_timer_max_val;
|
||||
extern long rf_timer_ticks_per_second;
|
||||
extern unsigned long rf_timer_ticks_per_usec;
|
||||
|
||||
#define RF_ETIMER_TICKS2US(_tcks_) ( (_tcks_) / rf_timer_ticks_per_usec )
|
||||
#define RF_ETIMER_START(_t_) { (_t_).st.ccnt = rf_read_cycle_counter(); }
|
||||
#define RF_ETIMER_STOP(_t_) { (_t_).et.ccnt = rf_read_cycle_counter(); }
|
||||
#define RF_ETIMER_EVAL(_t_) { \
|
||||
if ((_t_).st.ccnt < (_t_).et.ccnt) \
|
||||
(_t_).ticks = (_t_).et.ccnt - (_t_).st.ccnt; \
|
||||
else \
|
||||
(_t_).ticks = rf_timer_max_val - ((_t_).st.ccnt - (_t_).et.ccnt); \
|
||||
}
|
||||
|
||||
#define RF_ETIMER_VAL_TICKS(_t_) ((_t_).ticks)
|
||||
#define RF_ETIMER_VAL_US(_t_) (RF_ETIMER_TICKS2US((_t_).ticks))
|
||||
#define RF_ETIMER_VAL_MS(_t_) (RF_ETIMER_TICKS2US((_t_).ticks)/1000)
|
||||
|
||||
#endif /* __NetBSD__ */
|
||||
|
||||
|
||||
#if defined(__alpha) && !defined(__NetBSD__)
|
||||
|
||||
#ifdef KERNEL
|
||||
extern unsigned int rpcc();
|
||||
#define rf_read_cycle_counter rpcc
|
||||
#else /* KERNEL */
|
||||
extern unsigned int rf_rpcc();
|
||||
#define rf_read_cycle_counter rf_rpcc
|
||||
#endif /* KERNEL */
|
||||
|
||||
#define RF_DEF_TIMER_MAX_VAL 0xFFFFFFFF
|
||||
|
||||
typedef struct RF_EtimerVal_s {
|
||||
unsigned ccnt; /* cycle count */
|
||||
} RF_EtimerVal_t;
|
||||
|
||||
struct RF_Etimer_s {
|
||||
RF_EtimerVal_t st;
|
||||
RF_EtimerVal_t et;
|
||||
unsigned long ticks; /* elapsed time in ticks */
|
||||
};
|
||||
|
||||
extern long rf_timer_max_val;
|
||||
extern long rf_timer_ticks_per_second;
|
||||
extern unsigned long rf_timer_ticks_per_usec;
|
||||
|
||||
#define RF_ETIMER_TICKS2US(_tcks_) ( (_tcks_) / rf_timer_ticks_per_usec )
|
||||
#define RF_ETIMER_START(_t_) { (_t_).st.ccnt = rf_read_cycle_counter(); }
|
||||
#define RF_ETIMER_STOP(_t_) { (_t_).et.ccnt = rf_read_cycle_counter(); }
|
||||
#define RF_ETIMER_EVAL(_t_) { \
|
||||
if ((_t_).st.ccnt < (_t_).et.ccnt) \
|
||||
(_t_).ticks = (_t_).et.ccnt - (_t_).st.ccnt; \
|
||||
else \
|
||||
(_t_).ticks = rf_timer_max_val - ((_t_).st.ccnt - (_t_).et.ccnt); \
|
||||
}
|
||||
|
||||
#define RF_ETIMER_VAL_TICKS(_t_) ((_t_).ticks)
|
||||
#define RF_ETIMER_VAL_US(_t_) (RF_ETIMER_TICKS2US((_t_).ticks))
|
||||
#define RF_ETIMER_VAL_MS(_t_) (RF_ETIMER_TICKS2US((_t_).ticks)/1000)
|
||||
|
||||
#endif /* __alpha */
|
||||
|
||||
#ifdef _IBMR2
|
||||
|
||||
extern void rf_rtclock(unsigned int *secs, unsigned int *nsecs);
|
||||
|
||||
#define RF_MSEC_PER_SEC 1000
|
||||
#define RF_USEC_PER_SEC 1000000
|
||||
#define RF_NSEC_PER_SEC 1000000000
|
||||
|
||||
typedef struct RF_EtimerVal_s {
|
||||
unsigned int secs;
|
||||
unsigned int nsecs;
|
||||
} RF_EtimerVal_t;
|
||||
|
||||
struct RF_Etimer_s {
|
||||
RF_EtimerVal_t start;
|
||||
RF_EtimerVal_t end;
|
||||
RF_EtimerVal_t elapsed;
|
||||
};
|
||||
|
||||
#if RF_AIXVERS >= 4
|
||||
|
||||
#include <sys/time.h>
|
||||
|
||||
#define RF_ETIMER_START(_t_) { \
|
||||
timebasestruct_t tb; \
|
||||
tb.flag = 1; \
|
||||
read_real_time(&tb, TIMEBASE_SZ); \
|
||||
(_t_).start.secs = tb.tb_high; \
|
||||
(_t_).start.nsecs = tb.tb_low; \
|
||||
}
|
||||
|
||||
#define RF_ETIMER_STOP(_t_) { \
|
||||
timebasestruct_t tb; \
|
||||
tb.flag = 1; \
|
||||
read_real_time(&tb, TIMEBASE_SZ); \
|
||||
(_t_).end.secs = tb.tb_high; \
|
||||
(_t_).end.nsecs = tb.tb_low; \
|
||||
}
|
||||
|
||||
#else /* RF_AIXVERS >= 4 */
|
||||
|
||||
#define RF_ETIMER_START(_t_) { \
|
||||
rf_rtclock(&((_t_).start.secs), &((_t_).start.nsecs)); \
|
||||
}
|
||||
|
||||
#define RF_ETIMER_STOP(_t_) { \
|
||||
rf_rtclock(&((_t_).end.secs), &((_t_).end.nsecs)); \
|
||||
}
|
||||
|
||||
#endif /* RF_AIXVERS >= 4 */
|
||||
|
||||
#define RF_ETIMER_EVAL(_t_) { \
|
||||
if ((_t_).end.nsecs >= (_t_).start.nsecs) { \
|
||||
(_t_).elapsed.nsecs = (_t_).end.nsecs - (_t_).start.nsecs; \
|
||||
(_t_).elapsed.secs = (_t_).end.secs - (_t_).start.nsecs; \
|
||||
} \
|
||||
else { \
|
||||
(_t_).elapsed.nsecs = RF_NSEC_PER_SEC + (_t_).end.nsecs; \
|
||||
(_t_).elapsed.nsecs -= (_t_).start.nsecs; \
|
||||
(_t_).elapsed.secs = (_t_).end.secs - (_t_).start.secs + 1; \
|
||||
} \
|
||||
}
|
||||
|
||||
#define RF_ETIMER_VAL_US(_t_) (((_t_).elapsed.secs*RF_USEC_PER_SEC)+((_t_).elapsed.nsecs/1000))
|
||||
#define RF_ETIMER_VAL_MS(_t_) (((_t_).elapsed.secs*RF_MSEC_PER_SEC)+((_t_).elapsed.nsecs/1000000))
|
||||
|
||||
#endif /* _IBMR2 */
|
||||
|
||||
/*
|
||||
* XXX investigate better timing for these
|
||||
*/
|
||||
#if defined(hpux) || defined(sun) || defined(NETBSD_I386) || defined(ultrix) || defined(LINUX_I386) || defined(IRIX) || (defined(MACH) && !defined(__osf__))
|
||||
#include <sys/time.h>
|
||||
|
||||
#define RF_USEC_PER_SEC 1000000
|
||||
|
||||
struct RF_Etimer_s {
|
||||
struct timeval start;
|
||||
struct timeval end;
|
||||
struct timeval elapsed;
|
||||
};
|
||||
#ifndef __NetBSD__
|
||||
#define RF_ETIMER_START(_t_) { \
|
||||
gettimeofday(&((_t_).start), NULL); \
|
||||
}
|
||||
|
||||
#define RF_ETIMER_STOP(_t_) { \
|
||||
gettimeofday(&((_t_).end), NULL); \
|
||||
}
|
||||
|
||||
#else
|
||||
#define RF_ETIMER_START(_t_) { \
|
||||
}
|
||||
/* XXX these just drop off the end of the world... */
|
||||
#define RF_ETIMER_STOP(_t_) { \
|
||||
}
|
||||
#endif
|
||||
|
||||
#define RF_ETIMER_EVAL(_t_) { \
|
||||
if ((_t_).end.tv_usec >= (_t_).start.tv_usec) { \
|
||||
(_t_).elapsed.tv_usec = (_t_).end.tv_usec - (_t_).start.tv_usec; \
|
||||
(_t_).elapsed.tv_sec = (_t_).end.tv_sec - (_t_).start.tv_usec; \
|
||||
} \
|
||||
else { \
|
||||
(_t_).elapsed.tv_usec = RF_USEC_PER_SEC + (_t_).end.tv_usec; \
|
||||
(_t_).elapsed.tv_usec -= (_t_).start.tv_usec; \
|
||||
(_t_).elapsed.tv_sec = (_t_).end.tv_sec - (_t_).start.tv_sec + 1; \
|
||||
} \
|
||||
}
|
||||
|
||||
#define RF_ETIMER_VAL_US(_t_) (((_t_).elapsed.tv_sec*RF_USEC_PER_SEC)+(_t_).elapsed.tv_usec)
|
||||
#define RF_ETIMER_VAL_MS(_t_) (((_t_).elapsed.tv_sec*RF_MSEC_PER_SEC)+((_t_).elapsed.tv_usec/1000))
|
||||
|
||||
#endif /* hpux || sun || NETBSD_I386 || ultrix || LINUX_I386 || IRIX || (MACH && !__osf__) */
|
||||
|
||||
#endif /* !_RF__RF_TIMER_H_ */
|
|
@ -0,0 +1,555 @@
|
|||
/* $NetBSD: rf_evenodd.c,v 1.1 1998/11/13 04:20:29 oster Exp $ */
|
||||
/*
|
||||
* Copyright (c) 1995 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Author: Chang-Ming Wu
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and
|
||||
* its documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
||||
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*/
|
||||
|
||||
/*****************************************************************************************
|
||||
*
|
||||
* rf_evenodd.c -- implements EVENODD array architecture
|
||||
*
|
||||
****************************************************************************************/
|
||||
|
||||
#include "rf_archs.h"
|
||||
|
||||
#if RF_INCLUDE_EVENODD > 0
|
||||
|
||||
#include "rf_types.h"
|
||||
#include "rf_raid.h"
|
||||
#include "rf_dag.h"
|
||||
#include "rf_dagffrd.h"
|
||||
#include "rf_dagffwr.h"
|
||||
#include "rf_dagdegrd.h"
|
||||
#include "rf_dagdegwr.h"
|
||||
#include "rf_dagutils.h"
|
||||
#include "rf_dagfuncs.h"
|
||||
#include "rf_threadid.h"
|
||||
#include "rf_etimer.h"
|
||||
#include "rf_general.h"
|
||||
#include "rf_evenodd.h"
|
||||
#include "rf_configure.h"
|
||||
#include "rf_parityscan.h"
|
||||
#include "rf_utils.h"
|
||||
#include "rf_map.h"
|
||||
#include "rf_pq.h"
|
||||
#include "rf_mcpair.h"
|
||||
#include "rf_sys.h"
|
||||
#include "rf_evenodd.h"
|
||||
#include "rf_evenodd_dagfuncs.h"
|
||||
#include "rf_evenodd_dags.h"
|
||||
#include "rf_engine.h"
|
||||
|
||||
typedef struct RF_EvenOddConfigInfo_s {
|
||||
RF_RowCol_t **stripeIdentifier; /* filled in at config time & used by IdentifyStripe */
|
||||
} RF_EvenOddConfigInfo_t;
|
||||
|
||||
int rf_ConfigureEvenOdd(listp, raidPtr, cfgPtr)
|
||||
RF_ShutdownList_t **listp;
|
||||
RF_Raid_t *raidPtr;
|
||||
RF_Config_t *cfgPtr;
|
||||
{
|
||||
RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
|
||||
RF_EvenOddConfigInfo_t *info;
|
||||
RF_RowCol_t i, j, startdisk;
|
||||
|
||||
RF_MallocAndAdd(info, sizeof(RF_EvenOddConfigInfo_t), (RF_EvenOddConfigInfo_t *), raidPtr->cleanupList);
|
||||
layoutPtr->layoutSpecificInfo = (void *) info;
|
||||
|
||||
RF_ASSERT(raidPtr->numRow == 1);
|
||||
|
||||
info->stripeIdentifier = rf_make_2d_array(raidPtr->numCol, raidPtr->numCol, raidPtr->cleanupList);
|
||||
startdisk = 0;
|
||||
for (i=0; i<raidPtr->numCol; i++) {
|
||||
for (j=0; j<raidPtr->numCol; j++) {
|
||||
info->stripeIdentifier[i][j] = (startdisk + j) % raidPtr->numCol;
|
||||
}
|
||||
if ((startdisk -= 2) < 0) startdisk += raidPtr->numCol;
|
||||
}
|
||||
|
||||
/* fill in the remaining layout parameters */
|
||||
layoutPtr->numStripe = layoutPtr->stripeUnitsPerDisk;
|
||||
layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << raidPtr->logBytesPerSector;
|
||||
layoutPtr->numDataCol = raidPtr->numCol-2; /* ORIG: layoutPtr->numDataCol = raidPtr->numCol-1; */
|
||||
#if RF_EO_MATRIX_DIM > 17
|
||||
if (raidPtr->numCol <= 17){
|
||||
printf("Number of stripe units in a parity stripe is smaller than 17. Please\n");
|
||||
printf("define the macro RF_EO_MATRIX_DIM in file rf_evenodd_dagfuncs.h to \n");
|
||||
printf("be 17 to increase performance. \n");
|
||||
return(EINVAL);
|
||||
}
|
||||
#elif RF_EO_MATRIX_DIM == 17
|
||||
if (raidPtr->numCol > 17) {
|
||||
printf("Number of stripe units in a parity stripe is bigger than 17. Please\n");
|
||||
printf("define the macro RF_EO_MATRIX_DIM in file rf_evenodd_dagfuncs.h to \n");
|
||||
printf("be 257 for encoding and decoding functions to work. \n");
|
||||
return(EINVAL);
|
||||
}
|
||||
#endif
|
||||
layoutPtr->dataSectorsPerStripe = layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit;
|
||||
layoutPtr->numParityCol = 2;
|
||||
layoutPtr->dataStripeUnitsPerDisk = layoutPtr->stripeUnitsPerDisk;
|
||||
raidPtr->sectorsPerDisk = layoutPtr->stripeUnitsPerDisk * layoutPtr->sectorsPerStripeUnit;
|
||||
|
||||
raidPtr->totalSectors = layoutPtr->stripeUnitsPerDisk * layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit;
|
||||
|
||||
return(0);
|
||||
}
|
||||
|
||||
int rf_GetDefaultNumFloatingReconBuffersEvenOdd(RF_Raid_t *raidPtr)
|
||||
{
|
||||
return(20);
|
||||
}
|
||||
|
||||
RF_HeadSepLimit_t rf_GetDefaultHeadSepLimitEvenOdd(RF_Raid_t *raidPtr)
|
||||
{
|
||||
return(10);
|
||||
}
|
||||
|
||||
void rf_IdentifyStripeEvenOdd(
|
||||
RF_Raid_t *raidPtr,
|
||||
RF_RaidAddr_t addr,
|
||||
RF_RowCol_t **diskids,
|
||||
RF_RowCol_t *outRow)
|
||||
{
|
||||
RF_StripeNum_t stripeID = rf_RaidAddressToStripeID(&raidPtr->Layout, addr);
|
||||
RF_EvenOddConfigInfo_t *info = (RF_EvenOddConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
|
||||
|
||||
*outRow = 0;
|
||||
*diskids = info->stripeIdentifier[ stripeID % raidPtr->numCol ];
|
||||
}
|
||||
|
||||
/* The layout of stripe unit on the disks are: c0 c1 c2 c3 c4
|
||||
|
||||
0 1 2 E P
|
||||
5 E P 3 4
|
||||
P 6 7 8 E
|
||||
10 11 E P 9
|
||||
E P 12 13 14
|
||||
....
|
||||
|
||||
We use the MapSectorRAID5 to map data information because the routine can be shown to map exactly
|
||||
the layout of data stripe unit as shown above although we have 2 redundant information now.
|
||||
But for E and P, we use rf_MapEEvenOdd and rf_MapParityEvenOdd which are different method from raid-5.
|
||||
*/
|
||||
|
||||
|
||||
void rf_MapParityEvenOdd(
|
||||
RF_Raid_t *raidPtr,
|
||||
RF_RaidAddr_t raidSector,
|
||||
RF_RowCol_t *row,
|
||||
RF_RowCol_t *col,
|
||||
RF_SectorNum_t *diskSector,
|
||||
int remap)
|
||||
{
|
||||
RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit;
|
||||
RF_StripeNum_t endSUIDofthisStrip = (SUID/raidPtr->Layout.numDataCol + 1)*raidPtr->Layout.numDataCol - 1;
|
||||
|
||||
*row = 0;
|
||||
*col = ( endSUIDofthisStrip + 2)%raidPtr->numCol;
|
||||
*diskSector = (SUID / (raidPtr->Layout.numDataCol)) * raidPtr->Layout.sectorsPerStripeUnit +
|
||||
(raidSector % raidPtr->Layout.sectorsPerStripeUnit);
|
||||
}
|
||||
|
||||
void rf_MapEEvenOdd(
|
||||
RF_Raid_t *raidPtr,
|
||||
RF_RaidAddr_t raidSector,
|
||||
RF_RowCol_t *row,
|
||||
RF_RowCol_t *col,
|
||||
RF_SectorNum_t *diskSector,
|
||||
int remap)
|
||||
{
|
||||
RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit;
|
||||
RF_StripeNum_t endSUIDofthisStrip = (SUID/raidPtr->Layout.numDataCol + 1)*raidPtr->Layout.numDataCol - 1;
|
||||
|
||||
*row = 0;
|
||||
*col = ( endSUIDofthisStrip + 1)%raidPtr->numCol;
|
||||
*diskSector = (SUID / (raidPtr->Layout.numDataCol)) * raidPtr->Layout.sectorsPerStripeUnit +
|
||||
(raidSector % raidPtr->Layout.sectorsPerStripeUnit);
|
||||
}
|
||||
|
||||
void rf_EODagSelect(
|
||||
RF_Raid_t *raidPtr,
|
||||
RF_IoType_t type,
|
||||
RF_AccessStripeMap_t *asmap,
|
||||
RF_VoidFuncPtr *createFunc)
|
||||
{
|
||||
RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
|
||||
unsigned ndfail = asmap->numDataFailed;
|
||||
unsigned npfail = asmap->numParityFailed +asmap->numQFailed;
|
||||
unsigned ntfail = npfail + ndfail;
|
||||
|
||||
RF_ASSERT(RF_IO_IS_R_OR_W(type));
|
||||
if (ntfail > 2)
|
||||
{
|
||||
RF_ERRORMSG("more than two disks failed in a single group! Aborting I/O operation.\n");
|
||||
/* *infoFunc = */ *createFunc = NULL;
|
||||
return;
|
||||
}
|
||||
|
||||
/* ok, we can do this I/O */
|
||||
if (type == RF_IO_TYPE_READ)
|
||||
{
|
||||
switch (ndfail)
|
||||
{
|
||||
case 0:
|
||||
/* fault free read */
|
||||
*createFunc = (RF_VoidFuncPtr)rf_CreateFaultFreeReadDAG; /* same as raid 5 */
|
||||
break;
|
||||
case 1:
|
||||
/* lost a single data unit */
|
||||
/* two cases:
|
||||
(1) parity is not lost.
|
||||
do a normal raid 5 reconstruct read.
|
||||
(2) parity is lost.
|
||||
do a reconstruct read using "e".
|
||||
*/
|
||||
if (ntfail == 2) /* also lost redundancy */
|
||||
{
|
||||
if (asmap->failedPDAs[1]->type == RF_PDA_TYPE_PARITY)
|
||||
*createFunc = (RF_VoidFuncPtr)rf_EO_110_CreateReadDAG;
|
||||
else
|
||||
*createFunc = (RF_VoidFuncPtr)rf_EO_101_CreateReadDAG;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* P and E are ok. But is there a failure
|
||||
in some unaccessed data unit?
|
||||
*/
|
||||
if (rf_NumFailedDataUnitsInStripe(raidPtr,asmap)==2)
|
||||
*createFunc = (RF_VoidFuncPtr)rf_EO_200_CreateReadDAG;
|
||||
else
|
||||
*createFunc = (RF_VoidFuncPtr)rf_EO_100_CreateReadDAG;
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
/* *createFunc = rf_EO_200_CreateReadDAG; */
|
||||
*createFunc = NULL;
|
||||
break;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
/* a write */
|
||||
switch (ntfail)
|
||||
{
|
||||
case 0: /* fault free */
|
||||
if (rf_suppressLocksAndLargeWrites ||
|
||||
(((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) && (layoutPtr->numDataCol != 1)) ||
|
||||
(asmap->parityInfo->next != NULL) || (asmap->qInfo->next != NULL) || rf_CheckStripeForFailures(raidPtr, asmap))) {
|
||||
|
||||
*createFunc = (RF_VoidFuncPtr)rf_EOCreateSmallWriteDAG;
|
||||
}
|
||||
else {
|
||||
*createFunc = (RF_VoidFuncPtr)rf_EOCreateLargeWriteDAG;
|
||||
}
|
||||
break;
|
||||
|
||||
case 1: /* single disk fault */
|
||||
if (npfail==1)
|
||||
{
|
||||
RF_ASSERT ((asmap->failedPDAs[0]->type == RF_PDA_TYPE_PARITY) || (asmap->failedPDAs[0]->type == RF_PDA_TYPE_Q));
|
||||
if (asmap->failedPDAs[0]->type == RF_PDA_TYPE_Q)
|
||||
{ /* q died, treat like normal mode raid5 write.*/
|
||||
if (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) || (asmap->numStripeUnitsAccessed == 1))
|
||||
|| (asmap->parityInfo->next!=NULL) || rf_NumFailedDataUnitsInStripe(raidPtr,asmap))
|
||||
*createFunc = (RF_VoidFuncPtr)rf_EO_001_CreateSmallWriteDAG;
|
||||
else
|
||||
*createFunc = (RF_VoidFuncPtr)rf_EO_001_CreateLargeWriteDAG;
|
||||
}
|
||||
else
|
||||
{ /* parity died, small write only updating Q */
|
||||
if (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) || (asmap->numStripeUnitsAccessed == 1))
|
||||
|| (asmap->qInfo->next!=NULL) || rf_NumFailedDataUnitsInStripe(raidPtr,asmap))
|
||||
*createFunc = (RF_VoidFuncPtr)rf_EO_010_CreateSmallWriteDAG;
|
||||
else
|
||||
*createFunc = (RF_VoidFuncPtr)rf_EO_010_CreateLargeWriteDAG;
|
||||
}
|
||||
}
|
||||
else
|
||||
{ /* data missing.
|
||||
Do a P reconstruct write if only a single data unit
|
||||
is lost in the stripe, otherwise a reconstruct
|
||||
write which employnig both P and E units. */
|
||||
if (rf_NumFailedDataUnitsInStripe(raidPtr,asmap)==2)
|
||||
{
|
||||
if (asmap->numStripeUnitsAccessed == 1)
|
||||
*createFunc = (RF_VoidFuncPtr)rf_EO_200_CreateWriteDAG;
|
||||
else
|
||||
*createFunc = NULL; /* No direct support for this case now, like that in Raid-5 */
|
||||
}
|
||||
else
|
||||
{
|
||||
if (asmap->numStripeUnitsAccessed != 1 && asmap->failedPDAs[0]->numSector != layoutPtr->sectorsPerStripeUnit)
|
||||
*createFunc = NULL; /* No direct support for this case now, like that in Raid-5 */
|
||||
else *createFunc = (RF_VoidFuncPtr)rf_EO_100_CreateWriteDAG;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case 2: /* two disk faults */
|
||||
switch (npfail)
|
||||
{
|
||||
case 2: /* both p and q dead */
|
||||
*createFunc = (RF_VoidFuncPtr)rf_EO_011_CreateWriteDAG;
|
||||
break;
|
||||
case 1: /* either p or q and dead data */
|
||||
RF_ASSERT(asmap->failedPDAs[0]->type == RF_PDA_TYPE_DATA);
|
||||
RF_ASSERT ((asmap->failedPDAs[1]->type == RF_PDA_TYPE_PARITY) || (asmap->failedPDAs[1]->type == RF_PDA_TYPE_Q));
|
||||
if (asmap->failedPDAs[1]->type == RF_PDA_TYPE_Q)
|
||||
{
|
||||
if(asmap->numStripeUnitsAccessed != 1 && asmap->failedPDAs[0]->numSector != layoutPtr->sectorsPerStripeUnit)
|
||||
*createFunc = NULL; /* In both PQ and EvenOdd, no direct support for this case now, like that in Raid-5 */
|
||||
else
|
||||
*createFunc = (RF_VoidFuncPtr)rf_EO_101_CreateWriteDAG;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (asmap->numStripeUnitsAccessed != 1 && asmap->failedPDAs[0]->numSector != layoutPtr->sectorsPerStripeUnit)
|
||||
*createFunc = NULL; /* No direct support for this case, like that in Raid-5 */
|
||||
else
|
||||
*createFunc = (RF_VoidFuncPtr)rf_EO_110_CreateWriteDAG;
|
||||
}
|
||||
break;
|
||||
case 0: /* double data loss */
|
||||
/* if(asmap->failedPDAs[0]->numSector + asmap->failedPDAs[1]->numSector == 2 * layoutPtr->sectorsPerStripeUnit )
|
||||
*createFunc = rf_EOCreateLargeWriteDAG;
|
||||
else */
|
||||
*createFunc = NULL; /* currently, in Evenodd, No support for simultaneous access of both failed SUs */
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
default: /* more than 2 disk faults */
|
||||
*createFunc = NULL;
|
||||
RF_PANIC();
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
int rf_VerifyParityEvenOdd(raidPtr, raidAddr, parityPDA, correct_it, flags)
|
||||
RF_Raid_t *raidPtr;
|
||||
RF_RaidAddr_t raidAddr;
|
||||
RF_PhysDiskAddr_t *parityPDA;
|
||||
int correct_it;
|
||||
RF_RaidAccessFlags_t flags;
|
||||
{
|
||||
RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
|
||||
RF_RaidAddr_t startAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, raidAddr);
|
||||
RF_SectorCount_t numsector = parityPDA->numSector;
|
||||
int numbytes = rf_RaidAddressToByte(raidPtr, numsector);
|
||||
int bytesPerStripe = numbytes * layoutPtr->numDataCol;
|
||||
RF_DagHeader_t *rd_dag_h, *wr_dag_h; /* read, write dag */
|
||||
RF_DagNode_t *blockNode, *unblockNode, *wrBlock, *wrUnblock;
|
||||
RF_AccessStripeMapHeader_t *asm_h;
|
||||
RF_AccessStripeMap_t *asmap;
|
||||
RF_AllocListElem_t *alloclist;
|
||||
RF_PhysDiskAddr_t *pda;
|
||||
char *pbuf, *buf, *end_p, *p;
|
||||
char *redundantbuf2;
|
||||
int redundantTwoErr = 0, redundantOneErr = 0;
|
||||
int parity_cant_correct = RF_FALSE, red2_cant_correct = RF_FALSE, parity_corrected = RF_FALSE, red2_corrected = RF_FALSE;
|
||||
int i, retcode;
|
||||
RF_ReconUnitNum_t which_ru;
|
||||
RF_StripeNum_t psID = rf_RaidAddressToParityStripeID(layoutPtr, raidAddr, &which_ru);
|
||||
int stripeWidth = layoutPtr->numDataCol + layoutPtr->numParityCol;
|
||||
RF_AccTraceEntry_t tracerec;
|
||||
RF_MCPair_t *mcpair;
|
||||
|
||||
retcode = RF_PARITY_OKAY;
|
||||
|
||||
mcpair = rf_AllocMCPair();
|
||||
rf_MakeAllocList(alloclist);
|
||||
RF_MallocAndAdd(buf, numbytes * (layoutPtr->numDataCol + layoutPtr->numParityCol), (char *), alloclist);
|
||||
RF_CallocAndAdd(pbuf, 1, numbytes, (char *), alloclist); /* use calloc to make sure buffer is zeroed */
|
||||
end_p = buf + bytesPerStripe;
|
||||
RF_CallocAndAdd(redundantbuf2, 1, numbytes, (char *), alloclist); /* use calloc to make sure buffer is zeroed */
|
||||
|
||||
rd_dag_h = rf_MakeSimpleDAG(raidPtr, stripeWidth, numbytes, buf, rf_DiskReadFunc, rf_DiskReadUndoFunc,
|
||||
"Rod", alloclist, flags, RF_IO_NORMAL_PRIORITY);
|
||||
blockNode = rd_dag_h->succedents[0];
|
||||
unblockNode = blockNode->succedents[0]->succedents[0];
|
||||
|
||||
/* map the stripe and fill in the PDAs in the dag */
|
||||
asm_h = rf_MapAccess(raidPtr, startAddr, layoutPtr->dataSectorsPerStripe, buf, RF_DONT_REMAP);
|
||||
asmap = asm_h->stripeMap;
|
||||
|
||||
for (pda=asmap->physInfo,i=0; i<layoutPtr->numDataCol; i++,pda=pda->next) {
|
||||
RF_ASSERT(pda);
|
||||
rf_RangeRestrictPDA(raidPtr, parityPDA, pda, 0, 1);
|
||||
RF_ASSERT(pda->numSector != 0);
|
||||
if (rf_TryToRedirectPDA(raidPtr, pda, 0)) goto out; /* no way to verify parity if disk is dead. return w/ good status */
|
||||
blockNode->succedents[i]->params[0].p = pda;
|
||||
blockNode->succedents[i]->params[2].v = psID;
|
||||
blockNode->succedents[i]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
|
||||
}
|
||||
|
||||
RF_ASSERT(!asmap->parityInfo->next);
|
||||
rf_RangeRestrictPDA(raidPtr, parityPDA, asmap->parityInfo, 0, 1);
|
||||
RF_ASSERT(asmap->parityInfo->numSector != 0);
|
||||
if (rf_TryToRedirectPDA(raidPtr, asmap->parityInfo, 1))
|
||||
goto out;
|
||||
blockNode->succedents[ layoutPtr->numDataCol ]->params[0].p = asmap->parityInfo;
|
||||
|
||||
RF_ASSERT(!asmap->qInfo->next);
|
||||
rf_RangeRestrictPDA(raidPtr, parityPDA, asmap->qInfo, 0, 1);
|
||||
RF_ASSERT(asmap->qInfo->numSector != 0);
|
||||
if (rf_TryToRedirectPDA(raidPtr, asmap->qInfo, 1)) goto out;
|
||||
/*
|
||||
* if disk is dead, b/c no reconstruction is implemented right now,
|
||||
* the function "rf_TryToRedirectPDA" always return one, which cause
|
||||
* go to out and return w/ good status
|
||||
*/
|
||||
blockNode->succedents[ layoutPtr->numDataCol +1 ]->params[0].p = asmap->qInfo;
|
||||
|
||||
/* fire off the DAG */
|
||||
bzero((char *)&tracerec,sizeof(tracerec));
|
||||
rd_dag_h->tracerec = &tracerec;
|
||||
|
||||
if (rf_verifyParityDebug) {
|
||||
printf("Parity verify read dag:\n");
|
||||
rf_PrintDAGList(rd_dag_h);
|
||||
}
|
||||
|
||||
RF_LOCK_MUTEX(mcpair->mutex);
|
||||
mcpair->flag = 0;
|
||||
rf_DispatchDAG(rd_dag_h, (void (*)(void *))rf_MCPairWakeupFunc,
|
||||
(void *) mcpair);
|
||||
while (!mcpair->flag) RF_WAIT_COND(mcpair->cond, mcpair->mutex);
|
||||
RF_UNLOCK_MUTEX(mcpair->mutex);
|
||||
if (rd_dag_h->status != rf_enable) {
|
||||
RF_ERRORMSG("Unable to verify parity: can't read the stripe\n");
|
||||
retcode = RF_PARITY_COULD_NOT_VERIFY;
|
||||
goto out;
|
||||
}
|
||||
|
||||
for (p=buf, i=0; p<end_p; p+=numbytes, i++) {
|
||||
rf_e_encToBuf(raidPtr, i, p, RF_EO_MATRIX_DIM - 2, redundantbuf2, numsector);
|
||||
/*
|
||||
* the corresponding columes in EvenOdd encoding Matrix for these p pointers which point
|
||||
* to the databuffer in a full stripe are sequentially from 0 to layoutPtr->numDataCol-1
|
||||
*/
|
||||
rf_bxor(p, pbuf, numbytes, NULL);
|
||||
}
|
||||
RF_ASSERT(i==layoutPtr->numDataCol);
|
||||
|
||||
for (i=0; i<numbytes; i++) {
|
||||
if (pbuf[i] != buf[bytesPerStripe+i]) {
|
||||
if (!correct_it) {
|
||||
RF_ERRORMSG3("Parity verify error: byte %d of parity is 0x%x should be 0x%x\n",
|
||||
i,(u_char) buf[bytesPerStripe+i],(u_char) pbuf[i]);
|
||||
}
|
||||
}
|
||||
redundantOneErr = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
for (i=0; i<numbytes; i++) {
|
||||
if (redundantbuf2[i] != buf[bytesPerStripe+numbytes+i]) {
|
||||
if (!correct_it) {
|
||||
RF_ERRORMSG3("Parity verify error: byte %d of second redundant information is 0x%x should be 0x%x\n",
|
||||
i,(u_char) buf[bytesPerStripe+numbytes+i],(u_char) redundantbuf2[i]);
|
||||
}
|
||||
redundantTwoErr = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (redundantOneErr || redundantTwoErr )
|
||||
retcode = RF_PARITY_BAD;
|
||||
|
||||
/* correct the first redundant disk, ie parity if it is error */
|
||||
if (redundantOneErr && correct_it) {
|
||||
wr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, numbytes, pbuf, rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
|
||||
"Wnp", alloclist, flags, RF_IO_NORMAL_PRIORITY);
|
||||
wrBlock = wr_dag_h->succedents[0]; wrUnblock = wrBlock->succedents[0]->succedents[0];
|
||||
wrBlock->succedents[0]->params[0].p = asmap->parityInfo;
|
||||
wrBlock->succedents[0]->params[2].v = psID;
|
||||
wrBlock->succedents[0]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
|
||||
bzero((char *)&tracerec,sizeof(tracerec));
|
||||
wr_dag_h->tracerec = &tracerec;
|
||||
if (rf_verifyParityDebug) {
|
||||
printf("Parity verify write dag:\n");
|
||||
rf_PrintDAGList(wr_dag_h);
|
||||
}
|
||||
RF_LOCK_MUTEX(mcpair->mutex);
|
||||
mcpair->flag = 0;
|
||||
rf_DispatchDAG(wr_dag_h, (void (*)(void *))rf_MCPairWakeupFunc,
|
||||
(void *) mcpair);
|
||||
while (!mcpair->flag)
|
||||
RF_WAIT_COND(mcpair->cond, mcpair->mutex);
|
||||
RF_UNLOCK_MUTEX(mcpair->mutex);
|
||||
if (wr_dag_h->status != rf_enable) {
|
||||
RF_ERRORMSG("Unable to correct parity in VerifyParity: can't write the stripe\n");
|
||||
parity_cant_correct = RF_TRUE;
|
||||
} else {
|
||||
parity_corrected = RF_TRUE;
|
||||
}
|
||||
rf_FreeDAG(wr_dag_h);
|
||||
}
|
||||
|
||||
if (redundantTwoErr && correct_it) {
|
||||
wr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, numbytes, redundantbuf2, rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
|
||||
"Wnred2", alloclist, flags, RF_IO_NORMAL_PRIORITY);
|
||||
wrBlock = wr_dag_h->succedents[0]; wrUnblock = wrBlock->succedents[0]->succedents[0];
|
||||
wrBlock->succedents[0]->params[0].p = asmap->qInfo;
|
||||
wrBlock->succedents[0]->params[2].v = psID;
|
||||
wrBlock->succedents[0]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
|
||||
bzero((char *)&tracerec,sizeof(tracerec));
|
||||
wr_dag_h->tracerec = &tracerec;
|
||||
if (rf_verifyParityDebug) {
|
||||
printf("Dag of write new second redundant information in parity verify :\n");
|
||||
rf_PrintDAGList(wr_dag_h);
|
||||
}
|
||||
RF_LOCK_MUTEX(mcpair->mutex);
|
||||
mcpair->flag = 0;
|
||||
rf_DispatchDAG(wr_dag_h, (void (*)(void *))rf_MCPairWakeupFunc,
|
||||
(void *) mcpair);
|
||||
while (!mcpair->flag)
|
||||
RF_WAIT_COND(mcpair->cond, mcpair->mutex);
|
||||
RF_UNLOCK_MUTEX(mcpair->mutex);
|
||||
if (wr_dag_h->status != rf_enable) {
|
||||
RF_ERRORMSG("Unable to correct second redundant information in VerifyParity: can't write the stripe\n");
|
||||
red2_cant_correct = RF_TRUE;
|
||||
} else {
|
||||
red2_corrected = RF_TRUE;
|
||||
}
|
||||
rf_FreeDAG(wr_dag_h);
|
||||
}
|
||||
if ( (redundantOneErr && parity_cant_correct) ||
|
||||
(redundantTwoErr && red2_cant_correct ))
|
||||
retcode = RF_PARITY_COULD_NOT_CORRECT;
|
||||
if ( (retcode = RF_PARITY_BAD) && parity_corrected && red2_corrected )
|
||||
retcode = RF_PARITY_CORRECTED;
|
||||
|
||||
|
||||
out:
|
||||
rf_FreeAccessStripeMap(asm_h);
|
||||
rf_FreeAllocList(alloclist);
|
||||
rf_FreeDAG(rd_dag_h);
|
||||
rf_FreeMCPair(mcpair);
|
||||
return(retcode);
|
||||
}
|
||||
|
||||
#endif /* RF_INCLUDE_EVENODD > 0 */
|
|
@ -0,0 +1,48 @@
|
|||
/* $NetBSD: rf_evenodd.h,v 1.1 1998/11/13 04:20:29 oster Exp $ */
|
||||
/*
|
||||
* Copyright (c) 1995, 1996 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Author: Chang-Ming Wu
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and
|
||||
* its documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
||||
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*/
|
||||
|
||||
#ifndef _RF__RF_EVENODD_H_
|
||||
#define _RF__RF_EVENODD_H_
|
||||
|
||||
/* extern declerations of the failure mode functions. */
|
||||
int rf_ConfigureEvenOdd(RF_ShutdownList_t **shutdownListp, RF_Raid_t *raidPtr,
|
||||
RF_Config_t *cfgPtr);
|
||||
int rf_GetDefaultNumFloatingReconBuffersEvenOdd(RF_Raid_t *raidPtr);
|
||||
RF_HeadSepLimit_t rf_GetDefaultHeadSepLimitEvenOdd(RF_Raid_t *raidPtr);
|
||||
void rf_IdentifyStripeEvenOdd(RF_Raid_t *raidPtr, RF_RaidAddr_t addr,
|
||||
RF_RowCol_t **diskids, RF_RowCol_t *outrow);
|
||||
void rf_MapParityEvenOdd(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector,
|
||||
RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap);
|
||||
void rf_MapEEvenOdd(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector,
|
||||
RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap);
|
||||
void rf_EODagSelect(RF_Raid_t *raidPtr, RF_IoType_t type,
|
||||
RF_AccessStripeMap_t *asmap, RF_VoidFuncPtr *createFunc);
|
||||
int rf_VerifyParityEvenOdd(RF_Raid_t *raidPtr, RF_RaidAddr_t raidAddr,
|
||||
RF_PhysDiskAddr_t *parityPDA, int correct_it, RF_RaidAccessFlags_t flags);
|
||||
|
||||
#endif /* !_RF__RF_EVENODD_H_ */
|
|
@ -0,0 +1,886 @@
|
|||
/* $NetBSD: rf_evenodd_dagfuncs.c,v 1.1 1998/11/13 04:20:29 oster Exp $ */
|
||||
/*
|
||||
* Copyright (c) 1995 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Author: ChangMing Wu
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and
|
||||
* its documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
||||
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Code for RAID-EVENODD architecture.
|
||||
*/
|
||||
|
||||
#include "rf_types.h"
|
||||
#include "rf_raid.h"
|
||||
#include "rf_dag.h"
|
||||
#include "rf_dagffrd.h"
|
||||
#include "rf_dagffwr.h"
|
||||
#include "rf_dagdegrd.h"
|
||||
#include "rf_dagdegwr.h"
|
||||
#include "rf_dagutils.h"
|
||||
#include "rf_dagfuncs.h"
|
||||
#include "rf_threadid.h"
|
||||
#include "rf_etimer.h"
|
||||
#include "rf_general.h"
|
||||
#include "rf_configure.h"
|
||||
#include "rf_parityscan.h"
|
||||
#include "rf_sys.h"
|
||||
#include "rf_evenodd.h"
|
||||
#include "rf_evenodd_dagfuncs.h"
|
||||
|
||||
/* These redundant functions are for small write */
|
||||
RF_RedFuncs_t rf_EOSmallWritePFuncs = { rf_RegularXorFunc, "Regular Old-New P", rf_SimpleXorFunc, "Simple Old-New P" };
|
||||
RF_RedFuncs_t rf_EOSmallWriteEFuncs = { rf_RegularONEFunc, "Regular Old-New E", rf_SimpleONEFunc, "Regular Old-New E" };
|
||||
|
||||
/* These redundant functions are for degraded read */
|
||||
RF_RedFuncs_t rf_eoPRecoveryFuncs = { rf_RecoveryXorFunc, "Recovery Xr", rf_RecoveryXorFunc, "Recovery Xr"};
|
||||
RF_RedFuncs_t rf_eoERecoveryFuncs = { rf_RecoveryEFunc, "Recovery E Func", rf_RecoveryEFunc, "Recovery E Func" };
|
||||
|
||||
/**********************************************************************************************
|
||||
* the following encoding node functions is used in EO_000_CreateLargeWriteDAG
|
||||
**********************************************************************************************/
|
||||
int rf_RegularPEFunc(node)
|
||||
RF_DagNode_t *node;
|
||||
{
|
||||
rf_RegularESubroutine(node,node->results[1]);
|
||||
rf_RegularXorFunc(node); /* does the wakeup here! */
|
||||
#if 1
|
||||
return(0); /* XXX This was missing... GO */
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
/************************************************************************************************
|
||||
* For EO_001_CreateSmallWriteDAG, there are (i)RegularONEFunc() and (ii)SimpleONEFunc() to
|
||||
* be used. The previous case is when write access at least sectors of full stripe unit.
|
||||
* The later function is used when the write access two stripe units but with total sectors
|
||||
* less than sectors per SU. In this case, the access of parity and 'E' are shown as disconnected
|
||||
* areas in their stripe unit and parity write and 'E' write are both devided into two distinct
|
||||
* writes( totally four). This simple old-new write and regular old-new write happen as in RAID-5
|
||||
************************************************************************************************/
|
||||
|
||||
/* Algorithm:
|
||||
1. Store the difference of old data and new data in the Rod buffer.
|
||||
2. then encode this buffer into the buffer which already have old 'E' information inside it,
|
||||
the result can be shown to be the new 'E' information.
|
||||
3. xor the Wnd buffer into the difference buffer to recover the original old data.
|
||||
Here we have another alternative: to allocate a temporary buffer for storing the difference of
|
||||
old data and new data, then encode temp buf into old 'E' buf to form new 'E', but this approach
|
||||
take the same speed as the previous, and need more memory.
|
||||
*/
|
||||
int rf_RegularONEFunc(node)
|
||||
RF_DagNode_t *node;
|
||||
{
|
||||
RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams-1].p;
|
||||
RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) &raidPtr->Layout;
|
||||
int EpdaIndex = (node->numParams-1)/2 - 1; /* the parameter of node where you can find e-pda */
|
||||
int i, k, retcode = 0;
|
||||
int suoffset, length;
|
||||
RF_RowCol_t scol;
|
||||
char *srcbuf, *destbuf;
|
||||
RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
|
||||
RF_Etimer_t timer;
|
||||
RF_PhysDiskAddr_t *pda, *EPDA = (RF_PhysDiskAddr_t *) node->params[EpdaIndex].p;
|
||||
int ESUOffset = rf_StripeUnitOffset(layoutPtr, EPDA->startSector); /* generally zero */
|
||||
|
||||
RF_ASSERT( EPDA->type == RF_PDA_TYPE_Q );
|
||||
RF_ASSERT(ESUOffset == 0);
|
||||
|
||||
RF_ETIMER_START(timer);
|
||||
|
||||
/* Xor the Wnd buffer into Rod buffer, the difference of old data and new data is stored in Rod buffer */
|
||||
for( k=0; k< EpdaIndex; k += 2) {
|
||||
length = rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *)node->params[k].p)->numSector );
|
||||
retcode = rf_bxor( node->params[k+EpdaIndex+3].p, node->params[k+1].p, length, node->dagHdr->bp);
|
||||
}
|
||||
/* Start to encoding the buffer storing the difference of old data and new data into 'E' buffer */
|
||||
for (i=0; i<EpdaIndex; i+=2) if (node->params[i+1].p != node->results[0]) { /* results[0] is buf ptr of E */
|
||||
pda = (RF_PhysDiskAddr_t *) node->params[i].p;
|
||||
srcbuf = (char *) node->params[i+1].p;
|
||||
scol = rf_EUCol(layoutPtr, pda->raidAddress );
|
||||
suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
|
||||
destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr,suoffset);
|
||||
rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector);
|
||||
}
|
||||
/* Recover the original old data to be used by parity encoding function in XorNode */
|
||||
for( k=0; k< EpdaIndex; k += 2) {
|
||||
length = rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *)node->params[k].p)->numSector );
|
||||
retcode = rf_bxor( node->params[k+EpdaIndex+3].p, node->params[k+1].p, length, node->dagHdr->bp);
|
||||
}
|
||||
RF_ETIMER_STOP(timer);
|
||||
RF_ETIMER_EVAL(timer);
|
||||
tracerec->q_us += RF_ETIMER_VAL_US(timer);
|
||||
rf_GenericWakeupFunc(node, 0);
|
||||
#if 1
|
||||
return(0); /* XXX this was missing.. GO */
|
||||
#endif
|
||||
}
|
||||
|
||||
int rf_SimpleONEFunc(node)
|
||||
RF_DagNode_t *node;
|
||||
{
|
||||
RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams-1].p;
|
||||
RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) &raidPtr->Layout;
|
||||
RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p;
|
||||
int retcode = 0;
|
||||
char *srcbuf, *destbuf;
|
||||
RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
|
||||
int length;
|
||||
RF_RowCol_t scol;
|
||||
RF_Etimer_t timer;
|
||||
|
||||
RF_ASSERT( ((RF_PhysDiskAddr_t *)node->params[2].p)->type == RF_PDA_TYPE_Q );
|
||||
if (node->dagHdr->status == rf_enable) {
|
||||
RF_ETIMER_START(timer);
|
||||
length = rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *)node->params[4].p)->numSector );/* this is a pda of writeDataNodes */
|
||||
/* bxor to buffer of readDataNodes */
|
||||
retcode = rf_bxor( node->params[5].p, node->params[1].p, length, node->dagHdr->bp);
|
||||
/* find out the corresponding colume in encoding matrix for write colume to be encoded into redundant disk 'E' */
|
||||
scol = rf_EUCol(layoutPtr, pda->raidAddress );
|
||||
srcbuf = node->params[1].p;
|
||||
destbuf = node->params[3].p;
|
||||
/* Start encoding process */
|
||||
rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector);
|
||||
rf_bxor( node->params[5].p, node->params[1].p, length, node->dagHdr->bp);
|
||||
RF_ETIMER_STOP(timer); RF_ETIMER_EVAL(timer); tracerec->q_us += RF_ETIMER_VAL_US(timer);
|
||||
|
||||
}
|
||||
return(rf_GenericWakeupFunc(node, retcode)); /* call wake func explicitly since no I/O in this node */
|
||||
}
|
||||
|
||||
|
||||
/****** called by rf_RegularPEFunc(node) and rf_RegularEFunc(node) in f.f. large write ********/
|
||||
void rf_RegularESubroutine(node, ebuf)
|
||||
RF_DagNode_t *node;
|
||||
char *ebuf;
|
||||
{
|
||||
RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams-1].p;
|
||||
RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) &raidPtr->Layout;
|
||||
RF_PhysDiskAddr_t *pda;
|
||||
int i, suoffset;
|
||||
RF_RowCol_t scol;
|
||||
char *srcbuf, *destbuf;
|
||||
RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
|
||||
RF_Etimer_t timer;
|
||||
|
||||
RF_ETIMER_START(timer);
|
||||
for (i=0; i<node->numParams-2; i+=2) {
|
||||
RF_ASSERT( node->params[i+1].p != ebuf );
|
||||
pda = (RF_PhysDiskAddr_t *) node->params[i].p;
|
||||
suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
|
||||
scol = rf_EUCol(layoutPtr, pda->raidAddress );
|
||||
srcbuf = (char *) node->params[i+1].p;
|
||||
destbuf = ebuf + rf_RaidAddressToByte(raidPtr, suoffset );
|
||||
rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector);
|
||||
}
|
||||
RF_ETIMER_STOP(timer);
|
||||
RF_ETIMER_EVAL(timer);
|
||||
tracerec->xor_us += RF_ETIMER_VAL_US(timer);
|
||||
}
|
||||
|
||||
|
||||
/*******************************************************************************************
|
||||
* Used in EO_001_CreateLargeWriteDAG
|
||||
******************************************************************************************/
|
||||
int rf_RegularEFunc(node)
|
||||
RF_DagNode_t *node;
|
||||
{
|
||||
rf_RegularESubroutine(node, node->results[0]);
|
||||
rf_GenericWakeupFunc(node, 0);
|
||||
#if 1
|
||||
return(0); /* XXX this was missing?.. GO */
|
||||
#endif
|
||||
}
|
||||
|
||||
/*******************************************************************************************
|
||||
* This degraded function allow only two case:
|
||||
* 1. when write access the full failed stripe unit, then the access can be more than
|
||||
* one tripe units.
|
||||
* 2. when write access only part of the failed SU, we assume accesses of more than
|
||||
* one stripe unit is not allowed so that the write can be dealt with like a
|
||||
* large write.
|
||||
* The following function is based on these assumptions. So except in the second case,
|
||||
* it looks the same as a large write encodeing function. But this is not exactly the
|
||||
* normal way for doing a degraded write, since raidframe have to break cases of access
|
||||
* other than the above two into smaller accesses. We may have to change
|
||||
* DegrESubroutin in the future.
|
||||
*******************************************************************************************/
|
||||
void rf_DegrESubroutine(node, ebuf)
|
||||
RF_DagNode_t *node;
|
||||
char *ebuf;
|
||||
{
|
||||
RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams-1].p;
|
||||
RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) &raidPtr->Layout;
|
||||
RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams-2].p;
|
||||
RF_PhysDiskAddr_t *pda;
|
||||
int i, suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector);
|
||||
RF_RowCol_t scol;
|
||||
char *srcbuf, *destbuf;
|
||||
RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
|
||||
RF_Etimer_t timer;
|
||||
|
||||
RF_ETIMER_START(timer);
|
||||
for (i=0; i<node->numParams-2; i+=2) {
|
||||
RF_ASSERT( node->params[i+1].p != ebuf );
|
||||
pda = (RF_PhysDiskAddr_t *) node->params[i].p;
|
||||
suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
|
||||
scol = rf_EUCol(layoutPtr, pda->raidAddress );
|
||||
srcbuf = (char *) node->params[i+1].p;
|
||||
destbuf = ebuf + rf_RaidAddressToByte(raidPtr, suoffset-failedSUOffset);
|
||||
rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector);
|
||||
}
|
||||
|
||||
RF_ETIMER_STOP(timer); RF_ETIMER_EVAL(timer); tracerec->q_us += RF_ETIMER_VAL_US(timer);
|
||||
}
|
||||
|
||||
|
||||
/**************************************************************************************
|
||||
* This function is used in case where one data disk failed and both redundant disks
|
||||
* alive. It is used in the EO_100_CreateWriteDAG. Note: if there is another disk
|
||||
* failed in the stripe but not accessed at this time, then we should, instead, use
|
||||
* the rf_EOWriteDoubleRecoveryFunc().
|
||||
**************************************************************************************/
|
||||
int rf_Degraded_100_EOFunc(node)
|
||||
RF_DagNode_t *node;
|
||||
{
|
||||
rf_DegrESubroutine(node, node->results[1]);
|
||||
rf_RecoveryXorFunc(node); /* does the wakeup here! */
|
||||
#if 1
|
||||
return(0); /* XXX this was missing... SHould these be void functions??? GO */
|
||||
#endif
|
||||
}
|
||||
|
||||
/**************************************************************************************
|
||||
* This function is to encode one sector in one of the data disks to the E disk.
|
||||
* However, in evenodd this function can also be used as decoding function to recover
|
||||
* data from dead disk in the case of parity failure and a single data failure.
|
||||
**************************************************************************************/
|
||||
void rf_e_EncOneSect(
|
||||
RF_RowCol_t srcLogicCol,
|
||||
char *srcSecbuf,
|
||||
RF_RowCol_t destLogicCol,
|
||||
char *destSecbuf,
|
||||
int bytesPerSector)
|
||||
{
|
||||
int S_index; /* index of the EU in the src col which need be Xored into all EUs in a dest sector */
|
||||
int numRowInEncMatix = (RF_EO_MATRIX_DIM) -1;
|
||||
RF_RowCol_t j, indexInDest, /* row index of an encoding unit in the destination colume of encoding matrix */
|
||||
indexInSrc; /* row index of an encoding unit in the source colume used for recovery */
|
||||
int bytesPerEU = bytesPerSector/numRowInEncMatix;
|
||||
|
||||
#if RF_EO_MATRIX_DIM > 17
|
||||
int shortsPerEU = bytesPerEU/sizeof(short);
|
||||
short *destShortBuf, *srcShortBuf1, *srcShortBuf2;
|
||||
register short temp1;
|
||||
#elif RF_EO_MATRIX_DIM == 17
|
||||
int longsPerEU = bytesPerEU/sizeof(long);
|
||||
long *destLongBuf, *srcLongBuf1, *srcLongBuf2;
|
||||
register long temp1;
|
||||
#endif
|
||||
|
||||
#if RF_EO_MATRIX_DIM > 17
|
||||
RF_ASSERT( sizeof(short) == 2 || sizeof(short) == 1 );
|
||||
RF_ASSERT( bytesPerEU % sizeof(short) == 0 );
|
||||
#elif RF_EO_MATRIX_DIM == 17
|
||||
RF_ASSERT( sizeof(long) == 8 || sizeof(long) == 4 );
|
||||
RF_ASSERT( bytesPerEU % sizeof(long) == 0);
|
||||
#endif
|
||||
|
||||
S_index = rf_EO_Mod( ( RF_EO_MATRIX_DIM -1 + destLogicCol - srcLogicCol), RF_EO_MATRIX_DIM);
|
||||
#if RF_EO_MATRIX_DIM > 17
|
||||
srcShortBuf1 = (short *)(srcSecbuf + S_index * bytesPerEU);
|
||||
#elif RF_EO_MATRIX_DIM == 17
|
||||
srcLongBuf1 = (long *)(srcSecbuf + S_index * bytesPerEU);
|
||||
#endif
|
||||
|
||||
for( indexInDest = 0; indexInDest < numRowInEncMatix ; indexInDest++){
|
||||
indexInSrc = rf_EO_Mod( (indexInDest + destLogicCol - srcLogicCol), RF_EO_MATRIX_DIM );
|
||||
|
||||
#if RF_EO_MATRIX_DIM > 17
|
||||
destShortBuf = (short *)(destSecbuf + indexInDest * bytesPerEU);
|
||||
srcShortBuf2 = (short *)(srcSecbuf + indexInSrc * bytesPerEU);
|
||||
for(j=0; j < shortsPerEU; j++) {
|
||||
temp1 = destShortBuf[j]^srcShortBuf1[j];
|
||||
/* note: S_index won't be at the end row for any src col! */
|
||||
if(indexInSrc != RF_EO_MATRIX_DIM -1) destShortBuf[j] = (srcShortBuf2[j])^temp1;
|
||||
/* if indexInSrc is at the end row, ie. RF_EO_MATRIX_DIM -1, then all elements are zero! */
|
||||
else destShortBuf[j] = temp1;
|
||||
}
|
||||
|
||||
#elif RF_EO_MATRIX_DIM == 17
|
||||
destLongBuf = (long *)(destSecbuf + indexInDest * bytesPerEU);
|
||||
srcLongBuf2 = (long *)(srcSecbuf + indexInSrc * bytesPerEU);
|
||||
for(j=0; j < longsPerEU; j++) {
|
||||
temp1 = destLongBuf[j]^srcLongBuf1[j];
|
||||
if(indexInSrc != RF_EO_MATRIX_DIM -1) destLongBuf[j] = (srcLongBuf2[j])^temp1;
|
||||
else destLongBuf[j] = temp1;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
void rf_e_encToBuf(
|
||||
RF_Raid_t *raidPtr,
|
||||
RF_RowCol_t srcLogicCol,
|
||||
char *srcbuf,
|
||||
RF_RowCol_t destLogicCol,
|
||||
char *destbuf,
|
||||
int numSector)
|
||||
{
|
||||
int i, bytesPerSector = rf_RaidAddressToByte(raidPtr, 1);
|
||||
|
||||
for (i=0; i < numSector; i++)
|
||||
{
|
||||
rf_e_EncOneSect( srcLogicCol, srcbuf, destLogicCol, destbuf, bytesPerSector);
|
||||
srcbuf += bytesPerSector;
|
||||
destbuf += bytesPerSector;
|
||||
}
|
||||
}
|
||||
|
||||
/**************************************************************************************
|
||||
* when parity die and one data die, We use second redundant information, 'E',
|
||||
* to recover the data in dead disk. This function is used in the recovery node of
|
||||
* for EO_110_CreateReadDAG
|
||||
**************************************************************************************/
|
||||
int rf_RecoveryEFunc(node)
|
||||
RF_DagNode_t *node;
|
||||
{
|
||||
RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams-1].p;
|
||||
RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) &raidPtr->Layout;
|
||||
RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams-2].p;
|
||||
RF_RowCol_t scol, /*source logical column*/
|
||||
fcol = rf_EUCol(layoutPtr, failedPDA->raidAddress ); /* logical column of failed SU */
|
||||
int i;
|
||||
RF_PhysDiskAddr_t *pda;
|
||||
int suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr,failedPDA->startSector);
|
||||
char *srcbuf, *destbuf;
|
||||
RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
|
||||
RF_Etimer_t timer;
|
||||
|
||||
bzero( (char *)node->results[0], rf_RaidAddressToByte(raidPtr,failedPDA->numSector));
|
||||
if (node->dagHdr->status == rf_enable) {
|
||||
RF_ETIMER_START(timer);
|
||||
for (i=0; i<node->numParams-2; i+=2) if (node->params[i+1].p != node->results[0]) {
|
||||
pda = (RF_PhysDiskAddr_t *) node->params[i].p;
|
||||
if( i == node->numParams - 4 ) scol = RF_EO_MATRIX_DIM - 2; /* the colume of redundant E */
|
||||
else scol = rf_EUCol(layoutPtr, pda->raidAddress );
|
||||
srcbuf = (char *) node->params[i+1].p;
|
||||
suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
|
||||
destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr,suoffset-failedSUOffset);
|
||||
rf_e_encToBuf(raidPtr, scol, srcbuf, fcol, destbuf, pda->numSector);
|
||||
}
|
||||
RF_ETIMER_STOP(timer);
|
||||
RF_ETIMER_EVAL(timer);
|
||||
tracerec->xor_us += RF_ETIMER_VAL_US(timer);
|
||||
}
|
||||
return (rf_GenericWakeupFunc(node, 0)); /* node execute successfully */
|
||||
}
|
||||
|
||||
/**************************************************************************************
|
||||
* This function is used in the case where one data and the parity have filed.
|
||||
* (in EO_110_CreateWriteDAG )
|
||||
**************************************************************************************/
|
||||
int rf_EO_DegradedWriteEFunc(RF_DagNode_t *node)
|
||||
{
|
||||
rf_DegrESubroutine(node, node->results[0]);
|
||||
rf_GenericWakeupFunc(node, 0);
|
||||
#if 1
|
||||
return(0); /* XXX Yet another one!! GO */
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**************************************************************************************
|
||||
* THE FUNCTION IS FOR DOUBLE DEGRADED READ AND WRITE CASES
|
||||
**************************************************************************************/
|
||||
|
||||
void rf_doubleEOdecode(
|
||||
RF_Raid_t *raidPtr,
|
||||
char **rrdbuf,
|
||||
char **dest,
|
||||
RF_RowCol_t *fcol,
|
||||
char *pbuf,
|
||||
char *ebuf)
|
||||
{
|
||||
RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) &(raidPtr->Layout);
|
||||
int i, j, k, f1, f2, row;
|
||||
int rrdrow, erow, count = 0;
|
||||
int bytesPerSector = rf_RaidAddressToByte(raidPtr, 1 );
|
||||
int numRowInEncMatix = (RF_EO_MATRIX_DIM) -1;
|
||||
#if 0
|
||||
int pcol = (RF_EO_MATRIX_DIM) - 1;
|
||||
#endif
|
||||
int ecol = (RF_EO_MATRIX_DIM) - 2;
|
||||
int bytesPerEU = bytesPerSector/numRowInEncMatix;
|
||||
int numDataCol = layoutPtr->numDataCol;
|
||||
#if RF_EO_MATRIX_DIM > 17
|
||||
int shortsPerEU = bytesPerEU/sizeof(short);
|
||||
short *rrdbuf_current, *pbuf_current, *ebuf_current;
|
||||
short *dest_smaller, *dest_smaller_current, *dest_larger, *dest_larger_current;
|
||||
register short *temp;
|
||||
short *P;
|
||||
|
||||
RF_ASSERT( bytesPerEU % sizeof(short) == 0);
|
||||
RF_Malloc(P, bytesPerEU, (short *));
|
||||
RF_Malloc(temp, bytesPerEU, (short *));
|
||||
#elif RF_EO_MATRIX_DIM == 17
|
||||
int longsPerEU = bytesPerEU/sizeof(long);
|
||||
long *rrdbuf_current, *pbuf_current, *ebuf_current;
|
||||
long *dest_smaller, *dest_smaller_current, *dest_larger, *dest_larger_current;
|
||||
register long *temp;
|
||||
long *P;
|
||||
|
||||
RF_ASSERT( bytesPerEU % sizeof(long) == 0);
|
||||
RF_Malloc(P, bytesPerEU, (long *));
|
||||
RF_Malloc(temp, bytesPerEU, (long *));
|
||||
#endif
|
||||
RF_ASSERT( *((long *)dest[0]) == 0);
|
||||
RF_ASSERT( *((long *)dest[1]) == 0);
|
||||
bzero((char *)P, bytesPerEU);
|
||||
bzero((char *)temp, bytesPerEU);
|
||||
RF_ASSERT( *P == 0 );
|
||||
/* calculate the 'P' parameter, which, not parity, is the Xor of all elements in
|
||||
the last two column, ie. 'E' and 'parity' colume, see the Ref. paper by Blaum, et al 1993 */
|
||||
for( i=0; i< numRowInEncMatix; i++)
|
||||
for( k=0; k< longsPerEU; k++) {
|
||||
#if RF_EO_MATRIX_DIM > 17
|
||||
ebuf_current = ((short *)ebuf) + i*shortsPerEU + k;
|
||||
pbuf_current = ((short *)pbuf) + i*shortsPerEU + k;
|
||||
#elif RF_EO_MATRIX_DIM == 17
|
||||
ebuf_current = ((long *)ebuf) + i*longsPerEU + k;
|
||||
pbuf_current = ((long *)pbuf) + i*longsPerEU + k;
|
||||
#endif
|
||||
P[k] ^= *ebuf_current;
|
||||
P[k] ^= *pbuf_current;
|
||||
}
|
||||
RF_ASSERT( fcol[0] != fcol[1] );
|
||||
if( fcol[0] < fcol[1] ) {
|
||||
#if RF_EO_MATRIX_DIM > 17
|
||||
dest_smaller = (short *)(dest[0]);
|
||||
dest_larger = (short *)(dest[1]);
|
||||
#elif RF_EO_MATRIX_DIM == 17
|
||||
dest_smaller = (long *)(dest[0]);
|
||||
dest_larger = (long *)(dest[1]);
|
||||
#endif
|
||||
f1 = fcol[0];
|
||||
f2 = fcol[1];
|
||||
}
|
||||
else {
|
||||
#if RF_EO_MATRIX_DIM > 17
|
||||
dest_smaller = (short *)(dest[1]);
|
||||
dest_larger = (short *)(dest[0]);
|
||||
#elif RF_EO_MATRIX_DIM == 17
|
||||
dest_smaller = (long *)(dest[1]);
|
||||
dest_larger = (long *)(dest[0]);
|
||||
#endif
|
||||
f1 = fcol[1];
|
||||
f2 = fcol[0];
|
||||
}
|
||||
row = (RF_EO_MATRIX_DIM) -1;
|
||||
while( (row = rf_EO_Mod( (row+f1-f2), RF_EO_MATRIX_DIM )) != ( (RF_EO_MATRIX_DIM) -1) )
|
||||
{
|
||||
#if RF_EO_MATRIX_DIM > 17
|
||||
dest_larger_current = dest_larger + row*shortsPerEU;
|
||||
dest_smaller_current = dest_smaller + row*shortsPerEU;
|
||||
#elif RF_EO_MATRIX_DIM == 17
|
||||
dest_larger_current = dest_larger + row*longsPerEU;
|
||||
dest_smaller_current = dest_smaller + row*longsPerEU;
|
||||
#endif
|
||||
/** Do the diagonal recovery. Initially, temp[k] = (failed 1),
|
||||
which is the failed data in the colume which has smaller col index. **/
|
||||
/* step 1: ^(SUM of nonfailed in-diagonal A(rrdrow,0..m-3)) */
|
||||
for( j=0; j< numDataCol; j++)
|
||||
{
|
||||
if( j == f1 || j == f2 ) continue;
|
||||
rrdrow = rf_EO_Mod( (row+f2-j), RF_EO_MATRIX_DIM );
|
||||
if ( rrdrow != (RF_EO_MATRIX_DIM) -1 ) {
|
||||
#if RF_EO_MATRIX_DIM > 17
|
||||
rrdbuf_current = (short *)(rrdbuf[j]) + rrdrow * shortsPerEU;
|
||||
for (k=0; k< shortsPerEU; k++) temp[k] ^= *(rrdbuf_current + k);
|
||||
#elif RF_EO_MATRIX_DIM == 17
|
||||
rrdbuf_current = (long *)(rrdbuf[j]) + rrdrow * longsPerEU;
|
||||
for (k=0; k< longsPerEU; k++) temp[k] ^= *(rrdbuf_current + k);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
/* step 2: ^E(erow,m-2), If erow is at the buttom row, don't Xor into it
|
||||
E(erow,m-2) = (principle diagonal) ^ (failed 1) ^ (failed 2)
|
||||
^ ( SUM of nonfailed in-diagonal A(rrdrow,0..m-3) )
|
||||
After this step, temp[k] = (principle diagonal) ^ (failed 2) */
|
||||
|
||||
erow = rf_EO_Mod( (row+f2-ecol), (RF_EO_MATRIX_DIM) );
|
||||
if ( erow != (RF_EO_MATRIX_DIM) -1) {
|
||||
#if RF_EO_MATRIX_DIM > 17
|
||||
ebuf_current = (short *)ebuf + shortsPerEU * erow;
|
||||
for (k=0; k< shortsPerEU; k++) temp[k] ^= *(ebuf_current+k);
|
||||
#elif RF_EO_MATRIX_DIM == 17
|
||||
ebuf_current = (long *)ebuf + longsPerEU * erow;
|
||||
for (k=0; k< longsPerEU; k++) temp[k] ^= *(ebuf_current+k);
|
||||
#endif
|
||||
}
|
||||
/* step 3: ^P to obtain the failed data (failed 2).
|
||||
P can be proved to be actually (principle diagonal)
|
||||
After this step, temp[k] = (failed 2), the failed data to be recovered */
|
||||
#if RF_EO_MATRIX_DIM > 17
|
||||
for (k=0; k< shortsPerEU; k++) temp[k] ^= P[k];
|
||||
/* Put the data to the destination buffer */
|
||||
for (k=0; k< shortsPerEU; k++) dest_larger_current[k] = temp[k];
|
||||
#elif RF_EO_MATRIX_DIM == 17
|
||||
for (k=0; k< longsPerEU; k++) temp[k] ^= P[k];
|
||||
/* Put the data to the destination buffer */
|
||||
for (k=0; k< longsPerEU; k++) dest_larger_current[k] = temp[k];
|
||||
#endif
|
||||
|
||||
/** THE FOLLOWING DO THE HORIZONTAL XOR **/
|
||||
/* step 1: ^(SUM of A(row,0..m-3)), ie. all nonfailed data columes */
|
||||
for (j=0; j< numDataCol; j++)
|
||||
{
|
||||
if( j == f1 || j == f2 ) continue;
|
||||
#if RF_EO_MATRIX_DIM > 17
|
||||
rrdbuf_current = (short *)(rrdbuf[j]) + row * shortsPerEU;
|
||||
for (k=0; k< shortsPerEU; k++) temp[k] ^= *(rrdbuf_current+k);
|
||||
#elif RF_EO_MATRIX_DIM == 17
|
||||
rrdbuf_current = (long *)(rrdbuf[j]) + row * longsPerEU;
|
||||
for (k=0; k< longsPerEU; k++) temp[k] ^= *(rrdbuf_current+k);
|
||||
#endif
|
||||
}
|
||||
/* step 2: ^A(row,m-1) */
|
||||
/* step 3: Put the data to the destination buffer */
|
||||
#if RF_EO_MATRIX_DIM > 17
|
||||
pbuf_current = (short *)pbuf + shortsPerEU * row;
|
||||
for (k=0; k< shortsPerEU; k++) temp[k] ^= *(pbuf_current+k);
|
||||
for (k=0; k< shortsPerEU; k++) dest_smaller_current[k] = temp[k];
|
||||
#elif RF_EO_MATRIX_DIM == 17
|
||||
pbuf_current = (long *)pbuf + longsPerEU * row;
|
||||
for (k=0; k< longsPerEU; k++) temp[k] ^= *(pbuf_current+k);
|
||||
for (k=0; k< longsPerEU; k++) dest_smaller_current[k] = temp[k];
|
||||
#endif
|
||||
count++;
|
||||
}
|
||||
/* Check if all Encoding Unit in the data buffer have been decoded,
|
||||
according EvenOdd theory, if "RF_EO_MATRIX_DIM" is a prime number,
|
||||
this algorithm will covered all buffer */
|
||||
RF_ASSERT( count == numRowInEncMatix );
|
||||
RF_Free((char *)P, bytesPerEU);
|
||||
RF_Free((char *)temp, bytesPerEU);
|
||||
}
|
||||
|
||||
|
||||
/***************************************************************************************
|
||||
* This function is called by double degragded read
|
||||
* EO_200_CreateReadDAG
|
||||
*
|
||||
***************************************************************************************/
|
||||
int rf_EvenOddDoubleRecoveryFunc(node)
|
||||
RF_DagNode_t *node;
|
||||
{
|
||||
int ndataParam = 0;
|
||||
int np = node->numParams;
|
||||
RF_AccessStripeMap_t *asmap = (RF_AccessStripeMap_t *) node->params[np-1].p;
|
||||
RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np-2].p;
|
||||
RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) &(raidPtr->Layout);
|
||||
int i, prm, sector, nresults = node->numResults;
|
||||
RF_SectorCount_t secPerSU = layoutPtr->sectorsPerStripeUnit;
|
||||
unsigned sosAddr;
|
||||
int two = 0, mallc_one= 0, mallc_two = 0; /* flags to indicate if memory is allocated */
|
||||
int bytesPerSector = rf_RaidAddressToByte(raidPtr, 1 );
|
||||
RF_PhysDiskAddr_t *ppda,*ppda2,*epda,*epda2,*pda, *pda0, *pda1, npda;
|
||||
RF_RowCol_t fcol[2], fsuoff[2], fsuend[2], numDataCol = layoutPtr->numDataCol;
|
||||
char **buf, *ebuf, *pbuf, *dest[2];
|
||||
long *suoff=NULL, *suend=NULL, *prmToCol=NULL, psuoff, esuoff;
|
||||
RF_SectorNum_t startSector, endSector;
|
||||
RF_Etimer_t timer;
|
||||
RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
|
||||
|
||||
RF_ETIMER_START(timer);
|
||||
|
||||
/* Find out the number of parameters which are pdas for data information */
|
||||
for (i = 0; i<= np; i++)
|
||||
if( ((RF_PhysDiskAddr_t *)node->params[i].p)->type != RF_PDA_TYPE_DATA) {ndataParam = i ; break; }
|
||||
|
||||
RF_Malloc(buf, numDataCol*sizeof(char *), (char **));
|
||||
if (ndataParam != 0 ){
|
||||
RF_Malloc(suoff, ndataParam*sizeof(long), (long *) );
|
||||
RF_Malloc(suend, ndataParam*sizeof(long), (long *) );
|
||||
RF_Malloc(prmToCol, ndataParam*sizeof(long), (long *) );
|
||||
}
|
||||
|
||||
if (asmap->failedPDAs[1] &&
|
||||
(asmap->failedPDAs[1]->numSector + asmap->failedPDAs[0]->numSector < secPerSU)) {
|
||||
RF_ASSERT(0); /* currently, no support for this situation */
|
||||
ppda = node->params[np-6].p;
|
||||
ppda2 = node->params[np-5].p;
|
||||
RF_ASSERT( ppda2->type == RF_PDA_TYPE_PARITY );
|
||||
epda = node->params[np-4].p;
|
||||
epda2 = node->params[np-3].p;
|
||||
RF_ASSERT( epda2->type == RF_PDA_TYPE_Q );
|
||||
two = 1;
|
||||
}
|
||||
else {
|
||||
ppda = node->params[np-4].p;
|
||||
epda = node->params[np-3].p;
|
||||
psuoff = rf_StripeUnitOffset(layoutPtr, ppda->startSector);
|
||||
esuoff = rf_StripeUnitOffset(layoutPtr, epda->startSector);
|
||||
RF_ASSERT( psuoff == esuoff );
|
||||
}
|
||||
/*
|
||||
the followings have three goals:
|
||||
1. determine the startSector to begin decoding and endSector to end decoding.
|
||||
2. determine the colume numbers of the two failed disks.
|
||||
3. determine the offset and end offset of the access within each failed stripe unit.
|
||||
*/
|
||||
if( nresults == 1 ) {
|
||||
/* find the startSector to begin decoding */
|
||||
pda = node->results[0];
|
||||
bzero(pda->bufPtr, bytesPerSector*pda->numSector );
|
||||
fsuoff[0] = rf_StripeUnitOffset(layoutPtr, pda->startSector );
|
||||
fsuend[0] = fsuoff[0] + pda->numSector;
|
||||
startSector = fsuoff[0];
|
||||
endSector = fsuend[0];
|
||||
|
||||
/* find out the the column of failed disk being accessed */
|
||||
fcol[0] = rf_EUCol(layoutPtr, pda->raidAddress );
|
||||
|
||||
/* find out the other failed colume not accessed */
|
||||
sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress);
|
||||
for (i=0; i < numDataCol; i++) {
|
||||
npda.raidAddress = sosAddr + (i * secPerSU);
|
||||
(raidPtr->Layout.map->MapSector)(raidPtr, npda.raidAddress, &(npda.row), &(npda.col), &(npda.startSector), 0);
|
||||
/* skip over dead disks */
|
||||
if (RF_DEAD_DISK(raidPtr->Disks[npda.row][npda.col].status))
|
||||
if (i != fcol[0]) break;
|
||||
}
|
||||
RF_ASSERT (i < numDataCol);
|
||||
fcol[1] = i;
|
||||
}
|
||||
else {
|
||||
RF_ASSERT ( nresults == 2 );
|
||||
pda0 = node->results[0]; bzero(pda0->bufPtr, bytesPerSector*pda0->numSector );
|
||||
pda1 = node->results[1]; bzero(pda1->bufPtr, bytesPerSector*pda1->numSector );
|
||||
/* determine the failed colume numbers of the two failed disks. */
|
||||
fcol[0] = rf_EUCol(layoutPtr, pda0->raidAddress );
|
||||
fcol[1] = rf_EUCol(layoutPtr, pda1->raidAddress );
|
||||
/* determine the offset and end offset of the access within each failed stripe unit. */
|
||||
fsuoff[0] = rf_StripeUnitOffset(layoutPtr, pda0->startSector );
|
||||
fsuend[0] = fsuoff[0] + pda0->numSector;
|
||||
fsuoff[1] = rf_StripeUnitOffset(layoutPtr, pda1->startSector );
|
||||
fsuend[1] = fsuoff[1] + pda1->numSector;
|
||||
/* determine the startSector to begin decoding */
|
||||
startSector = RF_MIN( pda0->startSector, pda1->startSector );
|
||||
/* determine the endSector to end decoding */
|
||||
endSector = RF_MAX( fsuend[0], fsuend[1] );
|
||||
}
|
||||
/*
|
||||
assign the beginning sector and the end sector for each parameter
|
||||
find out the corresponding colume # for each parameter
|
||||
*/
|
||||
for( prm=0; prm < ndataParam; prm++ ) {
|
||||
pda = node->params[prm].p;
|
||||
suoff[prm] = rf_StripeUnitOffset(layoutPtr, pda->startSector);
|
||||
suend[prm] = suoff[prm] + pda->numSector;
|
||||
prmToCol[prm] = rf_EUCol(layoutPtr, pda->raidAddress );
|
||||
}
|
||||
/* 'sector' is the sector for the current decoding algorithm. For each sector in the failed SU,
|
||||
find out the corresponding parameters that cover the current sector and that are needed for
|
||||
decoding of this sector in failed SU. 2. Find out if sector is in the shadow of any accessed
|
||||
failed SU. If not, malloc a temporary space of a sector in size.
|
||||
*/
|
||||
for( sector = startSector; sector < endSector; sector++ ){
|
||||
if ( nresults == 2 )
|
||||
if( !(fsuoff[0]<=sector && sector<fsuend[0]) && !(fsuoff[1]<=sector && sector<fsuend[1]) )continue;
|
||||
for( prm=0; prm < ndataParam; prm++ )
|
||||
if( suoff[prm] <= sector && sector < suend[prm] )
|
||||
buf[(prmToCol[prm])] = ((RF_PhysDiskAddr_t *)node->params[prm].p)->bufPtr +
|
||||
rf_RaidAddressToByte(raidPtr, sector-suoff[prm]);
|
||||
/* find out if sector is in the shadow of any accessed failed SU. If yes, assign dest[0], dest[1] to point
|
||||
at suitable position of the buffer corresponding to failed SUs. if no, malloc a temporary space of
|
||||
a sector in size for destination of decoding.
|
||||
*/
|
||||
RF_ASSERT( nresults == 1 || nresults == 2 );
|
||||
if ( nresults == 1) {
|
||||
dest[0] = ((RF_PhysDiskAddr_t *)node->results[0])->bufPtr + rf_RaidAddressToByte(raidPtr, sector-fsuoff[0]);
|
||||
/* Always malloc temp buffer to dest[1] */
|
||||
RF_Malloc( dest[1], bytesPerSector, (char *) );
|
||||
bzero(dest[1],bytesPerSector); mallc_two = 1; }
|
||||
else {
|
||||
if( fsuoff[0] <= sector && sector < fsuend[0] )
|
||||
dest[0] = ((RF_PhysDiskAddr_t *)node->results[0])->bufPtr + rf_RaidAddressToByte(raidPtr, sector-fsuoff[0]);
|
||||
else { RF_Malloc( dest[0], bytesPerSector, (char *) );
|
||||
bzero(dest[0],bytesPerSector); mallc_one = 1; }
|
||||
if( fsuoff[1] <= sector && sector < fsuend[1] )
|
||||
dest[1] = ((RF_PhysDiskAddr_t *)node->results[1])->bufPtr + rf_RaidAddressToByte(raidPtr, sector-fsuoff[1]);
|
||||
else { RF_Malloc( dest[1], bytesPerSector, (char *) );
|
||||
bzero(dest[1],bytesPerSector); mallc_two = 1; }
|
||||
RF_ASSERT( mallc_one == 0 || mallc_two == 0 );
|
||||
}
|
||||
pbuf = ppda->bufPtr + rf_RaidAddressToByte(raidPtr, sector-psuoff );
|
||||
ebuf = epda->bufPtr + rf_RaidAddressToByte(raidPtr, sector-esuoff );
|
||||
/*
|
||||
* After finish finding all needed sectors, call doubleEOdecode function for decoding
|
||||
* one sector to destination.
|
||||
*/
|
||||
rf_doubleEOdecode(raidPtr, buf, dest, fcol, pbuf, ebuf );
|
||||
/* free all allocated memory, and mark flag to indicate no memory is being allocated */
|
||||
if( mallc_one == 1) RF_Free( dest[0], bytesPerSector );
|
||||
if( mallc_two == 1) RF_Free( dest[1], bytesPerSector );
|
||||
mallc_one = mallc_two = 0;
|
||||
}
|
||||
RF_Free(buf, numDataCol*sizeof(char *));
|
||||
if (ndataParam != 0){
|
||||
RF_Free(suoff, ndataParam*sizeof(long));
|
||||
RF_Free(suend, ndataParam*sizeof(long));
|
||||
RF_Free(prmToCol, ndataParam*sizeof(long));
|
||||
}
|
||||
|
||||
RF_ETIMER_STOP(timer);
|
||||
RF_ETIMER_EVAL(timer);
|
||||
if (tracerec) {
|
||||
tracerec->q_us += RF_ETIMER_VAL_US(timer);
|
||||
}
|
||||
rf_GenericWakeupFunc(node,0);
|
||||
#if 1
|
||||
return(0); /* XXX is this even close!!?!?!!? GO */
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
/* currently, only access of one of the two failed SU is allowed in this function.
|
||||
* also, asmap->numStripeUnitsAccessed is limited to be one, the RaidFrame will break large access into
|
||||
* many accesses of single stripe unit.
|
||||
*/
|
||||
|
||||
int rf_EOWriteDoubleRecoveryFunc(node)
|
||||
RF_DagNode_t *node;
|
||||
{
|
||||
int np = node->numParams;
|
||||
RF_AccessStripeMap_t *asmap = (RF_AccessStripeMap_t *) node->params[np-1].p;
|
||||
RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np-2].p;
|
||||
RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) &(raidPtr->Layout);
|
||||
RF_SectorNum_t sector;
|
||||
RF_RowCol_t col, scol;
|
||||
int prm, i, j;
|
||||
RF_SectorCount_t secPerSU = layoutPtr->sectorsPerStripeUnit;
|
||||
unsigned sosAddr;
|
||||
unsigned bytesPerSector = rf_RaidAddressToByte(raidPtr, 1 );
|
||||
RF_int64 numbytes;
|
||||
RF_SectorNum_t startSector, endSector;
|
||||
RF_PhysDiskAddr_t *ppda,*epda,*pda, *fpda, npda;
|
||||
RF_RowCol_t fcol[2], numDataCol = layoutPtr->numDataCol;
|
||||
char **buf; /* buf[0], buf[1], buf[2], ...etc. point to buffer storing data read from col0, col1, col2 */
|
||||
char *ebuf, *pbuf, *dest[2], *olddata[2];
|
||||
RF_Etimer_t timer;
|
||||
RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
|
||||
|
||||
RF_ASSERT( asmap->numDataFailed == 1 ); /* currently only support this case, the other failed SU is not being accessed */
|
||||
RF_ETIMER_START(timer);
|
||||
RF_Malloc(buf, numDataCol*sizeof(char *), (char **));
|
||||
|
||||
ppda = node->results[0]; /* Instead of being buffers, node->results[0] and [1] are Ppda and Epda */
|
||||
epda = node->results[1];
|
||||
fpda = asmap->failedPDAs[0];
|
||||
|
||||
/* First, recovery the failed old SU using EvenOdd double decoding */
|
||||
/* determine the startSector and endSector for decoding */
|
||||
startSector = rf_StripeUnitOffset(layoutPtr, fpda->startSector );
|
||||
endSector = startSector + fpda->numSector;
|
||||
/* Assign buf[col] pointers to point to each non-failed colume and initialize the pbuf
|
||||
and ebuf to point at the beginning of each source buffers and destination buffers */
|
||||
for( prm=0; prm < numDataCol-2; prm++ ) {
|
||||
pda = (RF_PhysDiskAddr_t *)node->params[prm].p;
|
||||
col = rf_EUCol(layoutPtr, pda->raidAddress );
|
||||
buf[col] = pda->bufPtr;
|
||||
}
|
||||
/* pbuf and ebuf: they will change values as double recovery decoding goes on */
|
||||
pbuf = ppda->bufPtr;
|
||||
ebuf = epda->bufPtr;
|
||||
/* find out the logical colume numbers in the encoding matrix of the two failed columes */
|
||||
fcol[0] = rf_EUCol(layoutPtr, fpda->raidAddress );
|
||||
|
||||
/* find out the other failed colume not accessed this time */
|
||||
sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress);
|
||||
for (i=0; i < numDataCol; i++) {
|
||||
npda.raidAddress = sosAddr + (i * secPerSU);
|
||||
(raidPtr->Layout.map->MapSector)(raidPtr, npda.raidAddress, &(npda.row), &(npda.col), &(npda.startSector), 0);
|
||||
/* skip over dead disks */
|
||||
if (RF_DEAD_DISK(raidPtr->Disks[npda.row][npda.col].status))
|
||||
if (i != fcol[0]) break;
|
||||
}
|
||||
RF_ASSERT (i < numDataCol);
|
||||
fcol[1] = i;
|
||||
/* assign temporary space to put recovered failed SU */
|
||||
numbytes = fpda->numSector * bytesPerSector;
|
||||
RF_Malloc(olddata[0], numbytes, (char *) );
|
||||
RF_Malloc(olddata[1], numbytes, (char *) );
|
||||
dest[0] = olddata[0];
|
||||
dest[1] = olddata[1];
|
||||
bzero(olddata[0], numbytes);
|
||||
bzero(olddata[1], numbytes);
|
||||
/* Begin the recovery decoding, initially buf[j], ebuf, pbuf, dest[j] have already
|
||||
pointed at the beginning of each source buffers and destination buffers */
|
||||
for( sector = startSector, i=0; sector < endSector; sector++ , i++){
|
||||
rf_doubleEOdecode(raidPtr, buf, dest, fcol, pbuf, ebuf );
|
||||
for (j=0; j < numDataCol; j++)
|
||||
if( ( j != fcol[0]) && ( j != fcol[1] ) ) buf[j] += bytesPerSector;
|
||||
dest[0] += bytesPerSector;
|
||||
dest[1] += bytesPerSector;
|
||||
ebuf += bytesPerSector;
|
||||
pbuf += bytesPerSector;
|
||||
}
|
||||
/* after recovery, the buffer pointed by olddata[0] is the old failed data.
|
||||
With new writing data and this old data, use small write to calculate
|
||||
the new redundant informations
|
||||
*/
|
||||
/* node->params[ 0, ... PDAPerDisk * (numDataCol - 2)-1 ] are Pdas of Rrd;
|
||||
params[ PDAPerDisk*(numDataCol - 2), ... PDAPerDisk*numDataCol -1 ] are Pdas of Rp, ( Rp2 ), Re, ( Re2 ) ;
|
||||
params[ PDAPerDisk*numDataCol, ... PDAPerDisk*numDataCol +asmap->numStripeUnitsAccessed -asmap->numDataFailed-1]
|
||||
are Pdas of wudNodes;
|
||||
For current implementation, we assume the simplest case:
|
||||
asmap->numStripeUnitsAccessed == 1 and asmap->numDataFailed == 1 ie. PDAPerDisk = 1
|
||||
then node->params[numDataCol] must be the new data to be writen to the failed disk. We first bxor the new data
|
||||
into the old recovered data, then do the same things as small write.
|
||||
*/
|
||||
|
||||
rf_bxor( ((RF_PhysDiskAddr_t *)node->params[numDataCol].p)->bufPtr, olddata[0], numbytes, node->dagHdr->bp);
|
||||
/* do new 'E' calculation */
|
||||
/* find out the corresponding colume in encoding matrix for write colume to be encoded into redundant disk 'E' */
|
||||
scol = rf_EUCol(layoutPtr, fpda->raidAddress );
|
||||
/* olddata[0] now is source buffer pointer; epda->bufPtr is the dest buffer pointer */
|
||||
rf_e_encToBuf(raidPtr, scol, olddata[0], RF_EO_MATRIX_DIM - 2, epda->bufPtr, fpda->numSector);
|
||||
|
||||
/* do new 'P' calculation */
|
||||
rf_bxor( olddata[0], ppda->bufPtr, numbytes, node->dagHdr->bp);
|
||||
/* Free the allocated buffer */
|
||||
RF_Free( olddata[0], numbytes );
|
||||
RF_Free( olddata[1], numbytes );
|
||||
RF_Free( buf, numDataCol*sizeof(char *));
|
||||
|
||||
RF_ETIMER_STOP(timer);
|
||||
RF_ETIMER_EVAL(timer);
|
||||
if (tracerec) {
|
||||
tracerec->q_us += RF_ETIMER_VAL_US(timer);
|
||||
}
|
||||
|
||||
rf_GenericWakeupFunc(node,0);
|
||||
return(0);
|
||||
}
|
|
@ -0,0 +1,76 @@
|
|||
/* $NetBSD: rf_evenodd_dagfuncs.h,v 1.1 1998/11/13 04:20:29 oster Exp $ */
|
||||
/*
|
||||
* rf_evenodd_dagfuncs.h
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 1996 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Author: Chang-Ming Wu
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and
|
||||
* its documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
||||
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*/
|
||||
|
||||
#ifndef _RF__RF_EVENODD_DAGFUNCS_H_
|
||||
#define _RF__RF_EVENODD_DAGFUNCS_H_
|
||||
|
||||
extern RF_RedFuncs_t rf_EOSmallWriteEFuncs;
|
||||
extern RF_RedFuncs_t rf_EOSmallWritePFuncs;
|
||||
extern RF_RedFuncs_t rf_eoERecoveryFuncs;
|
||||
extern RF_RedFuncs_t rf_eoPRecoveryFuncs;
|
||||
extern RF_RedFuncs_t rf_eoERecoveryFuncs;
|
||||
|
||||
int rf_RegularPEFunc(RF_DagNode_t *node);
|
||||
int rf_RegularONEFunc(RF_DagNode_t *node);
|
||||
int rf_SimpleONEFunc(RF_DagNode_t *node);
|
||||
void rf_RegularESubroutine(RF_DagNode_t *node, char *ebuf);
|
||||
int rf_RegularEFunc(RF_DagNode_t *node);
|
||||
void rf_DegrESubroutine(RF_DagNode_t *node, char *ebuf);
|
||||
int rf_Degraded_100_EOFunc(RF_DagNode_t *node);
|
||||
void rf_e_EncOneSect(RF_RowCol_t srcLogicCol, char *srcSecbuf,
|
||||
RF_RowCol_t destLogicCol, char *destSecbuf, int bytesPerSector);
|
||||
void rf_e_encToBuf(RF_Raid_t *raidPtr, RF_RowCol_t srcLogicCol,
|
||||
char *srcbuf, RF_RowCol_t destLogicCol, char *destbuf, int numSector);
|
||||
int rf_RecoveryEFunc(RF_DagNode_t *node);
|
||||
int rf_EO_DegradedWriteEFunc(RF_DagNode_t *node);
|
||||
void rf_doubleEOdecode(RF_Raid_t *raidPtr, char **rrdbuf, char **dest,
|
||||
RF_RowCol_t *fcol, char *pbuf, char *ebuf);
|
||||
int rf_EvenOddDoubleRecoveryFunc(RF_DagNode_t *node);
|
||||
int rf_EOWriteDoubleRecoveryFunc(RF_DagNode_t *node);
|
||||
|
||||
#define rf_EUCol(_layoutPtr_, _addr_ ) \
|
||||
( (_addr_)%( (_layoutPtr_)->dataSectorsPerStripe ) )/((_layoutPtr_)->sectorsPerStripeUnit)
|
||||
|
||||
#define rf_EO_Mod( _int1_, _int2_ ) \
|
||||
( ((_int1_) < 0)? (((_int1_)+(_int2_))%(_int2_)) : (_int1_)%(_int2_) )
|
||||
|
||||
#define rf_OffsetOfNextEUBoundary(_offset_, sec_per_eu) ((_offset_)/(sec_per_eu) + 1)*(sec_per_eu)
|
||||
|
||||
#define RF_EO_MATRIX_DIM 17
|
||||
|
||||
/*
|
||||
* RF_EO_MATRIX_DIM should be a prime number: and "bytesPerSector" should be
|
||||
* dividable by ( RF_EO_MATRIX_DIM - 1) to fully encode and utilize the space
|
||||
* in a sector, this number could also be 17. Tha later case doesn't apply
|
||||
* for disk array larger than 17 columns totally.
|
||||
*/
|
||||
|
||||
#endif /* !_RF__RF_EVENODD_DAGFUNCS_H_ */
|
|
@ -0,0 +1,198 @@
|
|||
/* $NetBSD: rf_evenodd_dags.c,v 1.1 1998/11/13 04:20:29 oster Exp $ */
|
||||
/*
|
||||
* rf_evenodd_dags.c
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 1996 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Author: Chang-Ming Wu
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and
|
||||
* its documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
||||
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*/
|
||||
|
||||
#include "rf_archs.h"
|
||||
|
||||
#if RF_INCLUDE_EVENODD > 0
|
||||
|
||||
#include "rf_types.h"
|
||||
#include "rf_raid.h"
|
||||
#include "rf_dag.h"
|
||||
#include "rf_dagfuncs.h"
|
||||
#include "rf_dagutils.h"
|
||||
#include "rf_etimer.h"
|
||||
#include "rf_acctrace.h"
|
||||
#include "rf_general.h"
|
||||
#include "rf_evenodd_dags.h"
|
||||
#include "rf_evenodd.h"
|
||||
#include "rf_evenodd_dagfuncs.h"
|
||||
#include "rf_pq.h"
|
||||
#include "rf_dagdegrd.h"
|
||||
#include "rf_dagdegwr.h"
|
||||
#include "rf_dagffwr.h"
|
||||
|
||||
|
||||
/*
|
||||
* Lost one data.
|
||||
* Use P to reconstruct missing data.
|
||||
*/
|
||||
RF_CREATE_DAG_FUNC_DECL(rf_EO_100_CreateReadDAG)
|
||||
{
|
||||
rf_CreateDegradedReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_eoPRecoveryFuncs);
|
||||
}
|
||||
|
||||
/*
|
||||
* Lost data + E.
|
||||
* Use P to reconstruct missing data.
|
||||
*/
|
||||
RF_CREATE_DAG_FUNC_DECL(rf_EO_101_CreateReadDAG)
|
||||
{
|
||||
rf_CreateDegradedReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_eoPRecoveryFuncs);
|
||||
}
|
||||
|
||||
/*
|
||||
* Lost data + P.
|
||||
* Make E look like P, and use Eor for Xor, and we can
|
||||
* use degraded read DAG.
|
||||
*/
|
||||
RF_CREATE_DAG_FUNC_DECL(rf_EO_110_CreateReadDAG)
|
||||
{
|
||||
RF_PhysDiskAddr_t *temp;
|
||||
/* swap P and E pointers to fake out the DegradedReadDAG code */
|
||||
temp = asmap->parityInfo; asmap->parityInfo = asmap->qInfo; asmap->qInfo = temp;
|
||||
rf_CreateDegradedReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_eoERecoveryFuncs);
|
||||
}
|
||||
|
||||
/*
|
||||
* Lost two data.
|
||||
*/
|
||||
RF_CREATE_DAG_FUNC_DECL(rf_EOCreateDoubleDegradedReadDAG)
|
||||
{
|
||||
rf_EO_DoubleDegRead(raidPtr, asmap, dag_h, bp, flags, allocList);
|
||||
}
|
||||
|
||||
/*
|
||||
* Lost two data.
|
||||
*/
|
||||
RF_CREATE_DAG_FUNC_DECL(rf_EO_200_CreateReadDAG)
|
||||
{
|
||||
rf_EOCreateDoubleDegradedReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList);
|
||||
}
|
||||
|
||||
RF_CREATE_DAG_FUNC_DECL(rf_EO_100_CreateWriteDAG)
|
||||
{
|
||||
if (asmap->numStripeUnitsAccessed != 1 &&
|
||||
asmap->failedPDAs[0]->numSector != raidPtr->Layout.sectorsPerStripeUnit)
|
||||
RF_PANIC();
|
||||
rf_CommonCreateSimpleDegradedWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 2, (int (*)(RF_DagNode_t *))rf_Degraded_100_EOFunc, RF_TRUE);
|
||||
}
|
||||
|
||||
/*
|
||||
* E is dead. Small write.
|
||||
*/
|
||||
RF_CREATE_DAG_FUNC_DECL(rf_EO_001_CreateSmallWriteDAG)
|
||||
{
|
||||
rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_EOSmallWritePFuncs, NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
* E is dead. Large write.
|
||||
*/
|
||||
RF_CREATE_DAG_FUNC_DECL(rf_EO_001_CreateLargeWriteDAG)
|
||||
{
|
||||
rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 1, rf_RegularPFunc, RF_TRUE);
|
||||
}
|
||||
|
||||
/*
|
||||
* P is dead. Small write.
|
||||
* Swap E + P, use single-degraded stuff.
|
||||
*/
|
||||
RF_CREATE_DAG_FUNC_DECL(rf_EO_010_CreateSmallWriteDAG)
|
||||
{
|
||||
RF_PhysDiskAddr_t *temp;
|
||||
/* swap P and E pointers to fake out the DegradedReadDAG code */
|
||||
temp = asmap->parityInfo; asmap->parityInfo = asmap->qInfo; asmap->qInfo = temp;
|
||||
rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_EOSmallWriteEFuncs, NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
* P is dead. Large write.
|
||||
* Swap E + P, use single-degraded stuff.
|
||||
*/
|
||||
RF_CREATE_DAG_FUNC_DECL(rf_EO_010_CreateLargeWriteDAG)
|
||||
{
|
||||
RF_PhysDiskAddr_t *temp;
|
||||
/* swap P and E pointers to fake out the code */
|
||||
temp = asmap->parityInfo; asmap->parityInfo = asmap->qInfo; asmap->qInfo = temp;
|
||||
rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 1, rf_RegularEFunc, RF_FALSE);
|
||||
}
|
||||
|
||||
RF_CREATE_DAG_FUNC_DECL(rf_EO_011_CreateWriteDAG)
|
||||
{
|
||||
rf_CreateNonRedundantWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList,
|
||||
RF_IO_TYPE_WRITE);
|
||||
}
|
||||
|
||||
RF_CREATE_DAG_FUNC_DECL(rf_EO_110_CreateWriteDAG)
|
||||
{
|
||||
RF_PhysDiskAddr_t *temp;
|
||||
|
||||
if (asmap->numStripeUnitsAccessed != 1 &&
|
||||
asmap->failedPDAs[0]->numSector != raidPtr->Layout.sectorsPerStripeUnit)
|
||||
{
|
||||
RF_PANIC();
|
||||
}
|
||||
/* swap P and E to fake out parity code */
|
||||
temp = asmap->parityInfo; asmap->parityInfo = asmap->qInfo; asmap->qInfo = temp;
|
||||
rf_CommonCreateSimpleDegradedWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList,1, (int (*)(RF_DagNode_t *))rf_EO_DegradedWriteEFunc, RF_FALSE);
|
||||
/* is the regular E func the right one to call? */
|
||||
}
|
||||
|
||||
RF_CREATE_DAG_FUNC_DECL(rf_EO_101_CreateWriteDAG)
|
||||
{
|
||||
if (asmap->numStripeUnitsAccessed != 1 &&
|
||||
asmap->failedPDAs[0]->numSector != raidPtr->Layout.sectorsPerStripeUnit)
|
||||
RF_PANIC();
|
||||
rf_CommonCreateSimpleDegradedWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList,1, rf_RecoveryXorFunc, RF_TRUE);
|
||||
}
|
||||
|
||||
RF_CREATE_DAG_FUNC_DECL(rf_EO_DoubleDegRead)
|
||||
{
|
||||
rf_DoubleDegRead(raidPtr, asmap, dag_h, bp, flags, allocList,
|
||||
"Re", "EvenOddRecovery", rf_EvenOddDoubleRecoveryFunc);
|
||||
}
|
||||
|
||||
RF_CREATE_DAG_FUNC_DECL(rf_EOCreateSmallWriteDAG)
|
||||
{
|
||||
rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_pFuncs, &rf_EOSmallWriteEFuncs);
|
||||
}
|
||||
|
||||
RF_CREATE_DAG_FUNC_DECL(rf_EOCreateLargeWriteDAG)
|
||||
{
|
||||
rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 2, rf_RegularPEFunc, RF_FALSE);
|
||||
}
|
||||
|
||||
RF_CREATE_DAG_FUNC_DECL(rf_EO_200_CreateWriteDAG)
|
||||
{
|
||||
rf_DoubleDegSmallWrite(raidPtr, asmap, dag_h, bp, flags, allocList, "Re", "We", "EOWrDDRecovery", rf_EOWriteDoubleRecoveryFunc);
|
||||
}
|
||||
|
||||
#endif /* RF_INCLUDE_EVENODD > 0 */
|
|
@ -0,0 +1,63 @@
|
|||
/* $NetBSD: rf_evenodd_dags.h,v 1.1 1998/11/13 04:20:29 oster Exp $ */
|
||||
/*
|
||||
* rf_evenodd_dags.h
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 1996 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Author: Chang-Ming Wu
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and
|
||||
* its documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
||||
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*/
|
||||
|
||||
#ifndef _RF__RF_EVENODD_DAGS_H_
|
||||
#define _RF__RF_EVENODD_DAGS_H_
|
||||
|
||||
#include "rf_types.h"
|
||||
|
||||
#if RF_UTILITY == 0
|
||||
#include "rf_dag.h"
|
||||
|
||||
/* extern decl's of the failure mode EO functions.
|
||||
* swiped from rf_pqdeg.h
|
||||
*/
|
||||
|
||||
RF_CREATE_DAG_FUNC_DECL(rf_EO_100_CreateReadDAG);
|
||||
RF_CREATE_DAG_FUNC_DECL(rf_EO_101_CreateReadDAG);
|
||||
RF_CREATE_DAG_FUNC_DECL(rf_EO_110_CreateReadDAG);
|
||||
RF_CREATE_DAG_FUNC_DECL(rf_EO_200_CreateReadDAG);
|
||||
RF_CREATE_DAG_FUNC_DECL(rf_EOCreateDoubleDegradedReadDAG);
|
||||
RF_CREATE_DAG_FUNC_DECL(rf_EO_100_CreateWriteDAG);
|
||||
RF_CREATE_DAG_FUNC_DECL(rf_EO_010_CreateSmallWriteDAG);
|
||||
RF_CREATE_DAG_FUNC_DECL(rf_EO_001_CreateSmallWriteDAG);
|
||||
RF_CREATE_DAG_FUNC_DECL(rf_EO_010_CreateLargeWriteDAG);
|
||||
RF_CREATE_DAG_FUNC_DECL(rf_EO_001_CreateLargeWriteDAG);
|
||||
RF_CREATE_DAG_FUNC_DECL(rf_EO_011_CreateWriteDAG);
|
||||
RF_CREATE_DAG_FUNC_DECL(rf_EO_110_CreateWriteDAG);
|
||||
RF_CREATE_DAG_FUNC_DECL(rf_EO_101_CreateWriteDAG);
|
||||
RF_CREATE_DAG_FUNC_DECL(rf_EO_DoubleDegRead);
|
||||
RF_CREATE_DAG_FUNC_DECL(rf_EOCreateSmallWriteDAG);
|
||||
RF_CREATE_DAG_FUNC_DECL(rf_EOCreateLargeWriteDAG);
|
||||
RF_CREATE_DAG_FUNC_DECL(rf_EO_200_CreateWriteDAG);
|
||||
#endif /* RF_UTILITY == 0 */
|
||||
|
||||
#endif /* !_RF__RF_EVENODD_DAGS_H_ */
|
|
@ -0,0 +1,370 @@
|
|||
/* $NetBSD: rf_fifo.c,v 1.1 1998/11/13 04:20:29 oster Exp $ */
|
||||
/*
|
||||
* Copyright (c) 1995 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Author: Mark Holland
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and
|
||||
* its documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
||||
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*/
|
||||
|
||||
/***************************************************
|
||||
*
|
||||
* rf_fifo.c -- prioritized fifo queue code.
|
||||
* There are only two priority levels: hi and lo.
|
||||
*
|
||||
* Aug 4, 1994, adapted from raidSim version (MCH)
|
||||
*
|
||||
***************************************************/
|
||||
|
||||
/*
|
||||
* :
|
||||
* Log: rf_fifo.c,v
|
||||
* Revision 1.20 1996/06/18 20:53:11 jimz
|
||||
* fix up disk queueing (remove configure routine,
|
||||
* add shutdown list arg to create routines)
|
||||
*
|
||||
* Revision 1.19 1996/06/14 00:08:21 jimz
|
||||
* make happier in all environments
|
||||
*
|
||||
* Revision 1.18 1996/06/13 20:41:24 jimz
|
||||
* add random queueing
|
||||
*
|
||||
* Revision 1.17 1996/06/09 02:36:46 jimz
|
||||
* lots of little crufty cleanup- fixup whitespace
|
||||
* issues, comment #ifdefs, improve typing in some
|
||||
* places (esp size-related)
|
||||
*
|
||||
* Revision 1.16 1996/06/07 22:26:27 jimz
|
||||
* type-ify which_ru (RF_ReconUnitNum_t)
|
||||
*
|
||||
* Revision 1.15 1996/06/07 21:33:04 jimz
|
||||
* begin using consistent types for sector numbers,
|
||||
* stripe numbers, row+col numbers, recon unit numbers
|
||||
*
|
||||
* Revision 1.14 1996/06/06 01:15:02 jimz
|
||||
* added debugging
|
||||
*
|
||||
* Revision 1.13 1996/05/30 23:22:16 jimz
|
||||
* bugfixes of serialization, timing problems
|
||||
* more cleanup
|
||||
*
|
||||
* Revision 1.12 1996/05/30 11:29:41 jimz
|
||||
* Numerous bug fixes. Stripe lock release code disagreed with the taking code
|
||||
* about when stripes should be locked (I made it consistent: no parity, no lock)
|
||||
* There was a lot of extra serialization of I/Os which I've removed- a lot of
|
||||
* it was to calculate values for the cache code, which is no longer with us.
|
||||
* More types, function, macro cleanup. Added code to properly quiesce the array
|
||||
* on shutdown. Made a lot of stuff array-specific which was (bogusly) general
|
||||
* before. Fixed memory allocation, freeing bugs.
|
||||
*
|
||||
* Revision 1.11 1996/05/27 18:56:37 jimz
|
||||
* more code cleanup
|
||||
* better typing
|
||||
* compiles in all 3 environments
|
||||
*
|
||||
* Revision 1.10 1996/05/23 21:46:35 jimz
|
||||
* checkpoint in code cleanup (release prep)
|
||||
* lots of types, function names have been fixed
|
||||
*
|
||||
* Revision 1.9 1995/12/12 18:10:06 jimz
|
||||
* MIN -> RF_MIN, MAX -> RF_MAX, ASSERT -> RF_ASSERT
|
||||
* fix 80-column brain damage in comments
|
||||
*
|
||||
* Revision 1.8 1995/12/01 18:22:15 root
|
||||
* added copyright info
|
||||
*
|
||||
* Revision 1.7 1995/11/07 15:32:16 wvcii
|
||||
* added function FifoPeek()
|
||||
*
|
||||
*/
|
||||
|
||||
#include "rf_types.h"
|
||||
#include "rf_alloclist.h"
|
||||
#include "rf_stripelocks.h"
|
||||
#include "rf_layout.h"
|
||||
#include "rf_diskqueue.h"
|
||||
#include "rf_fifo.h"
|
||||
#include "rf_debugMem.h"
|
||||
#include "rf_general.h"
|
||||
#include "rf_threadid.h"
|
||||
#include "rf_options.h"
|
||||
|
||||
#if !defined(KERNEL) && RF_INCLUDE_QUEUE_RANDOM > 0
|
||||
#include "rf_randmacros.h"
|
||||
RF_DECLARE_STATIC_RANDOM
|
||||
#endif /* !KERNEL && RF_INCLUDE_QUEUE_RANDOM > 0 */
|
||||
|
||||
/* just malloc a header, zero it (via calloc), and return it */
|
||||
/*ARGSUSED*/
|
||||
void *rf_FifoCreate(sectPerDisk, clList, listp)
|
||||
RF_SectorCount_t sectPerDisk;
|
||||
RF_AllocListElem_t *clList;
|
||||
RF_ShutdownList_t **listp;
|
||||
{
|
||||
RF_FifoHeader_t *q;
|
||||
|
||||
#if !defined(KERNEL) && RF_INCLUDE_QUEUE_RANDOM > 0
|
||||
RF_INIT_STATIC_RANDOM(1);
|
||||
#endif /* !KERNEL && RF_INCLUDE_QUEUE_RANDOM > 0 */
|
||||
RF_CallocAndAdd(q, 1, sizeof(RF_FifoHeader_t), (RF_FifoHeader_t *), clList);
|
||||
q->hq_count = q->lq_count = 0;
|
||||
#if !defined(KERNEL) && RF_INCLUDE_QUEUE_RANDOM > 0
|
||||
q->rval = (long)RF_STATIC_RANDOM();
|
||||
#endif /* !KERNEL && RF_INCLUDE_QUEUE_RANDOM > 0 */
|
||||
return((void *)q);
|
||||
}
|
||||
|
||||
void rf_FifoEnqueue(q_in, elem, priority)
|
||||
void *q_in;
|
||||
RF_DiskQueueData_t *elem;
|
||||
int priority;
|
||||
{
|
||||
RF_FifoHeader_t *q = (RF_FifoHeader_t *)q_in;
|
||||
|
||||
RF_ASSERT(priority == RF_IO_NORMAL_PRIORITY || priority == RF_IO_LOW_PRIORITY);
|
||||
|
||||
elem->next = NULL;
|
||||
if (priority == RF_IO_NORMAL_PRIORITY) {
|
||||
if (!q->hq_tail) {
|
||||
RF_ASSERT(q->hq_count == 0 && q->hq_head == NULL);
|
||||
q->hq_head = q->hq_tail = elem;
|
||||
} else {
|
||||
RF_ASSERT(q->hq_count != 0 && q->hq_head != NULL);
|
||||
q->hq_tail->next = elem;
|
||||
q->hq_tail = elem;
|
||||
}
|
||||
q->hq_count++;
|
||||
}
|
||||
else {
|
||||
RF_ASSERT(elem->next == NULL);
|
||||
if (rf_fifoDebug) {
|
||||
int tid;
|
||||
rf_get_threadid(tid);
|
||||
printf("[%d] fifo: ENQ lopri\n", tid);
|
||||
}
|
||||
if (!q->lq_tail) {
|
||||
RF_ASSERT(q->lq_count == 0 && q->lq_head == NULL);
|
||||
q->lq_head = q->lq_tail = elem;
|
||||
} else {
|
||||
RF_ASSERT(q->lq_count != 0 && q->lq_head != NULL);
|
||||
q->lq_tail->next = elem;
|
||||
q->lq_tail = elem;
|
||||
}
|
||||
q->lq_count++;
|
||||
}
|
||||
if ((q->hq_count + q->lq_count)!= elem->queue->queueLength) {
|
||||
printf("Queue lengths differ!: %d %d %d\n",
|
||||
q->hq_count, q->lq_count, (int)elem->queue->queueLength);
|
||||
printf("%d %d %d %d\n",
|
||||
(int)elem->queue->numOutstanding,
|
||||
(int)elem->queue->maxOutstanding,
|
||||
(int)elem->queue->row,
|
||||
(int)elem->queue->col);
|
||||
}
|
||||
RF_ASSERT((q->hq_count + q->lq_count) == elem->queue->queueLength);
|
||||
}
|
||||
|
||||
RF_DiskQueueData_t *rf_FifoDequeue(q_in)
|
||||
void *q_in;
|
||||
{
|
||||
RF_FifoHeader_t *q = (RF_FifoHeader_t *) q_in;
|
||||
RF_DiskQueueData_t *nd;
|
||||
|
||||
RF_ASSERT(q);
|
||||
if (q->hq_head) {
|
||||
RF_ASSERT(q->hq_count != 0 && q->hq_tail != NULL);
|
||||
nd = q->hq_head; q->hq_head = q->hq_head->next;
|
||||
if (!q->hq_head) q->hq_tail = NULL;
|
||||
nd->next = NULL;
|
||||
q->hq_count--;
|
||||
} else if (q->lq_head) {
|
||||
RF_ASSERT(q->lq_count != 0 && q->lq_tail != NULL);
|
||||
nd = q->lq_head; q->lq_head = q->lq_head->next;
|
||||
if (!q->lq_head) q->lq_tail = NULL;
|
||||
nd->next = NULL;
|
||||
q->lq_count--;
|
||||
if (rf_fifoDebug) {
|
||||
int tid;
|
||||
rf_get_threadid(tid);
|
||||
printf("[%d] fifo: DEQ lopri %lx\n", tid, (long)nd);
|
||||
}
|
||||
} else {
|
||||
RF_ASSERT(q->hq_count == 0 && q->lq_count == 0 && q->hq_tail == NULL && q->lq_tail == NULL);
|
||||
nd = NULL;
|
||||
}
|
||||
return(nd);
|
||||
}
|
||||
|
||||
/* This never gets used!! No loss (I hope) if we don't include it... GO */
|
||||
#if !defined(__NetBSD__) && !defined(_KERNEL)
|
||||
|
||||
static RF_DiskQueueData_t *n_in_q(headp, tailp, countp, n, deq)
|
||||
RF_DiskQueueData_t **headp;
|
||||
RF_DiskQueueData_t **tailp;
|
||||
int *countp;
|
||||
int n;
|
||||
int deq;
|
||||
{
|
||||
RF_DiskQueueData_t *r, *s;
|
||||
int i;
|
||||
|
||||
for(s=NULL,i=n,r=*headp;r;s=r,r=r->next) {
|
||||
if (i == 0)
|
||||
break;
|
||||
i--;
|
||||
}
|
||||
RF_ASSERT(r != NULL);
|
||||
if (deq == 0)
|
||||
return(r);
|
||||
if (s) {
|
||||
s->next = r->next;
|
||||
}
|
||||
else {
|
||||
*headp = r->next;
|
||||
}
|
||||
if (*tailp == r)
|
||||
*tailp = s;
|
||||
(*countp)--;
|
||||
return(r);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if !defined(KERNEL) && RF_INCLUDE_QUEUE_RANDOM > 0
|
||||
RF_DiskQueueData_t *rf_RandomPeek(q_in)
|
||||
void *q_in;
|
||||
{
|
||||
RF_FifoHeader_t *q = (RF_FifoHeader_t *) q_in;
|
||||
RF_DiskQueueData_t *req;
|
||||
int n;
|
||||
|
||||
if (q->hq_head) {
|
||||
n = q->rval % q->hq_count;
|
||||
req = n_in_q(&q->hq_head, &q->hq_tail, &q->hq_count, n, 0);
|
||||
}
|
||||
else {
|
||||
RF_ASSERT(q->hq_count == 0);
|
||||
if (q->lq_head == NULL) {
|
||||
RF_ASSERT(q->lq_count == 0);
|
||||
return(NULL);
|
||||
}
|
||||
n = q->rval % q->lq_count;
|
||||
req = n_in_q(&q->lq_head, &q->lq_tail, &q->lq_count, n, 0);
|
||||
}
|
||||
RF_ASSERT((q->hq_count + q->lq_count) == req->queue->queueLength);
|
||||
RF_ASSERT(req != NULL);
|
||||
return(req);
|
||||
}
|
||||
|
||||
RF_DiskQueueData_t *rf_RandomDequeue(q_in)
|
||||
void *q_in;
|
||||
{
|
||||
RF_FifoHeader_t *q = (RF_FifoHeader_t *) q_in;
|
||||
RF_DiskQueueData_t *req;
|
||||
int n;
|
||||
|
||||
if (q->hq_head) {
|
||||
n = q->rval % q->hq_count;
|
||||
q->rval = (long)RF_STATIC_RANDOM();
|
||||
req = n_in_q(&q->hq_head, &q->hq_tail, &q->hq_count, n, 1);
|
||||
}
|
||||
else {
|
||||
RF_ASSERT(q->hq_count == 0);
|
||||
if (q->lq_head == NULL) {
|
||||
RF_ASSERT(q->lq_count == 0);
|
||||
return(NULL);
|
||||
}
|
||||
n = q->rval % q->lq_count;
|
||||
q->rval = (long)RF_STATIC_RANDOM();
|
||||
req = n_in_q(&q->lq_head, &q->lq_tail, &q->lq_count, n, 1);
|
||||
}
|
||||
RF_ASSERT((q->hq_count + q->lq_count) == (req->queue->queueLength-1));
|
||||
return(req);
|
||||
}
|
||||
#endif /* !KERNEL && RF_INCLUDE_QUEUE_RANDOM > 0 */
|
||||
|
||||
/* Return ptr to item at head of queue. Used to examine request
|
||||
* info without actually dequeueing the request.
|
||||
*/
|
||||
RF_DiskQueueData_t *rf_FifoPeek(void *q_in)
|
||||
{
|
||||
RF_DiskQueueData_t *headElement = NULL;
|
||||
RF_FifoHeader_t *q = (RF_FifoHeader_t *) q_in;
|
||||
|
||||
RF_ASSERT(q);
|
||||
if (q->hq_head)
|
||||
headElement = q->hq_head;
|
||||
else if (q->lq_head)
|
||||
headElement = q->lq_head;
|
||||
return(headElement);
|
||||
}
|
||||
|
||||
/* We sometimes need to promote a low priority access to a regular priority access.
|
||||
* Currently, this is only used when the user wants to write a stripe which is currently
|
||||
* under reconstruction.
|
||||
* This routine will promote all accesses tagged with the indicated parityStripeID from
|
||||
* the low priority queue to the end of the normal priority queue.
|
||||
* We assume the queue is locked upon entry.
|
||||
*/
|
||||
int rf_FifoPromote(q_in, parityStripeID, which_ru)
|
||||
void *q_in;
|
||||
RF_StripeNum_t parityStripeID;
|
||||
RF_ReconUnitNum_t which_ru;
|
||||
{
|
||||
RF_FifoHeader_t *q = (RF_FifoHeader_t *) q_in;
|
||||
RF_DiskQueueData_t *lp = q->lq_head, *pt = NULL; /* lp = lo-pri queue pointer, pt = trailer */
|
||||
int retval = 0;
|
||||
|
||||
while (lp) {
|
||||
|
||||
/* search for the indicated parity stripe in the low-pri queue */
|
||||
if (lp->parityStripeID == parityStripeID && lp->which_ru == which_ru) {
|
||||
/*printf("FifoPromote: promoting access for psid %ld\n",parityStripeID);*/
|
||||
if (pt) pt->next = lp->next; /* delete an entry other than the first */
|
||||
else q->lq_head = lp->next; /* delete the head entry */
|
||||
|
||||
if (!q->lq_head) q->lq_tail = NULL; /* we deleted the only entry */
|
||||
else if (lp == q->lq_tail) q->lq_tail = pt; /* we deleted the tail entry */
|
||||
|
||||
lp->next = NULL;
|
||||
q->lq_count--;
|
||||
|
||||
if (q->hq_tail) {q->hq_tail->next = lp; q->hq_tail = lp;} /* append to hi-priority queue */
|
||||
else {q->hq_head = q->hq_tail = lp;}
|
||||
q->hq_count++;
|
||||
|
||||
/*UpdateShortestSeekFinishTimeForced(lp->requestPtr, lp->diskState);*/ /* deal with this later, if ever */
|
||||
|
||||
lp = (pt) ? pt->next : q->lq_head; /* reset low-pri pointer and continue */
|
||||
retval++;
|
||||
|
||||
} else {pt = lp; lp = lp->next;}
|
||||
}
|
||||
|
||||
/* sanity check. delete this if you ever put more than one entry in the low-pri queue */
|
||||
RF_ASSERT(retval == 0 || retval == 1);
|
||||
if (rf_fifoDebug) {
|
||||
int tid;
|
||||
rf_get_threadid(tid);
|
||||
printf("[%d] fifo: promote %d\n", tid, retval);
|
||||
}
|
||||
return(retval);
|
||||
}
|
|
@ -0,0 +1,114 @@
|
|||
/* $NetBSD: rf_fifo.h,v 1.1 1998/11/13 04:20:30 oster Exp $ */
|
||||
/*
|
||||
* Copyright (c) 1995 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Author: Mark Holland
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and
|
||||
* its documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
||||
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*/
|
||||
|
||||
/*
|
||||
* rf_fifo.h -- prioritized FIFO queue code.
|
||||
*
|
||||
* 4-9-93 Created (MCH)
|
||||
*/
|
||||
|
||||
/*
|
||||
* :
|
||||
* Log: rf_fifo.h,v
|
||||
* Revision 1.12 1996/06/18 20:53:11 jimz
|
||||
* fix up disk queueing (remove configure routine,
|
||||
* add shutdown list arg to create routines)
|
||||
*
|
||||
* Revision 1.11 1996/06/13 20:41:28 jimz
|
||||
* add random queueing
|
||||
*
|
||||
* Revision 1.10 1996/06/13 20:38:28 jimz
|
||||
* add random dequeue, peek
|
||||
*
|
||||
* Revision 1.9 1996/06/09 02:36:46 jimz
|
||||
* lots of little crufty cleanup- fixup whitespace
|
||||
* issues, comment #ifdefs, improve typing in some
|
||||
* places (esp size-related)
|
||||
*
|
||||
* Revision 1.8 1996/06/07 22:26:27 jimz
|
||||
* type-ify which_ru (RF_ReconUnitNum_t)
|
||||
*
|
||||
* Revision 1.7 1996/06/07 21:33:04 jimz
|
||||
* begin using consistent types for sector numbers,
|
||||
* stripe numbers, row+col numbers, recon unit numbers
|
||||
*
|
||||
* Revision 1.6 1996/05/30 23:22:16 jimz
|
||||
* bugfixes of serialization, timing problems
|
||||
* more cleanup
|
||||
*
|
||||
* Revision 1.5 1996/05/30 11:29:41 jimz
|
||||
* Numerous bug fixes. Stripe lock release code disagreed with the taking code
|
||||
* about when stripes should be locked (I made it consistent: no parity, no lock)
|
||||
* There was a lot of extra serialization of I/Os which I've removed- a lot of
|
||||
* it was to calculate values for the cache code, which is no longer with us.
|
||||
* More types, function, macro cleanup. Added code to properly quiesce the array
|
||||
* on shutdown. Made a lot of stuff array-specific which was (bogusly) general
|
||||
* before. Fixed memory allocation, freeing bugs.
|
||||
*
|
||||
* Revision 1.4 1996/05/23 21:46:35 jimz
|
||||
* checkpoint in code cleanup (release prep)
|
||||
* lots of types, function names have been fixed
|
||||
*
|
||||
* Revision 1.3 1995/12/01 18:22:26 root
|
||||
* added copyright info
|
||||
*
|
||||
* Revision 1.2 1995/11/07 15:31:57 wvcii
|
||||
* added Peek() function
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _RF__RF_FIFO_H_
|
||||
#define _RF__RF_FIFO_H_
|
||||
|
||||
#include "rf_archs.h"
|
||||
#include "rf_types.h"
|
||||
#include "rf_diskqueue.h"
|
||||
|
||||
typedef struct RF_FifoHeader_s {
|
||||
RF_DiskQueueData_t *hq_head, *hq_tail; /* high priority requests */
|
||||
RF_DiskQueueData_t *lq_head, *lq_tail; /* low priority requests */
|
||||
int hq_count, lq_count; /* debug only */
|
||||
#if !defined(KERNEL) && RF_INCLUDE_QUEUE_RANDOM > 0
|
||||
long rval; /* next random number (random qpolicy) */
|
||||
#endif /* !KERNEL && RF_INCLUDE_QUEUE_RANDOM > 0 */
|
||||
} RF_FifoHeader_t;
|
||||
|
||||
extern void *rf_FifoCreate(RF_SectorCount_t sectPerDisk,
|
||||
RF_AllocListElem_t *clList, RF_ShutdownList_t **listp);
|
||||
extern void rf_FifoEnqueue(void *q_in, RF_DiskQueueData_t *elem,
|
||||
int priority);
|
||||
extern RF_DiskQueueData_t *rf_FifoDequeue(void *q_in);
|
||||
extern RF_DiskQueueData_t *rf_FifoPeek(void *q_in);
|
||||
extern int rf_FifoPromote(void *q_in, RF_StripeNum_t parityStripeID,
|
||||
RF_ReconUnitNum_t which_ru);
|
||||
#if !defined(KERNEL) && RF_INCLUDE_QUEUE_RANDOM > 0
|
||||
extern RF_DiskQueueData_t *rf_RandomDequeue(void *q_in);
|
||||
extern RF_DiskQueueData_t *rf_RandomPeek(void *q_in);
|
||||
#endif /* !KERNEL && RF_INCLUDE_QUEUE_RANDOM > 0 */
|
||||
|
||||
#endif /* !_RF__RF_FIFO_H_ */
|
|
@ -0,0 +1,733 @@
|
|||
/* $NetBSD: rf_freelist.h,v 1.1 1998/11/13 04:20:30 oster Exp $ */
|
||||
/*
|
||||
* rf_freelist.h
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 1995 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Author: Jim Zelenka
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and
|
||||
* its documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
||||
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*/
|
||||
/*
|
||||
* :
|
||||
* Log: rf_freelist.h,v
|
||||
* Revision 1.13 1996/06/10 12:50:57 jimz
|
||||
* Add counters to freelists to track number of allocations, frees,
|
||||
* grows, max size, etc. Adjust a couple sets of PRIME params based
|
||||
* on the results.
|
||||
*
|
||||
* Revision 1.12 1996/06/10 11:55:47 jimz
|
||||
* Straightened out some per-array/not-per-array distinctions, fixed
|
||||
* a couple bugs related to confusion. Added shutdown lists. Removed
|
||||
* layout shutdown function (now subsumed by shutdown lists).
|
||||
*
|
||||
* Revision 1.11 1996/06/05 18:06:02 jimz
|
||||
* Major code cleanup. The Great Renaming is now done.
|
||||
* Better modularity. Better typing. Fixed a bunch of
|
||||
* synchronization bugs. Made a lot of global stuff
|
||||
* per-desc or per-array. Removed dead code.
|
||||
*
|
||||
* Revision 1.10 1996/06/02 17:31:48 jimz
|
||||
* Moved a lot of global stuff into array structure, where it belongs.
|
||||
* Fixed up paritylogging, pss modules in this manner. Some general
|
||||
* code cleanup. Removed lots of dead code, some dead files.
|
||||
*
|
||||
* Revision 1.9 1996/05/31 22:26:54 jimz
|
||||
* fix a lot of mapping problems, memory allocation problems
|
||||
* found some weird lock issues, fixed 'em
|
||||
* more code cleanup
|
||||
*
|
||||
* Revision 1.8 1996/05/30 11:29:41 jimz
|
||||
* Numerous bug fixes. Stripe lock release code disagreed with the taking code
|
||||
* about when stripes should be locked (I made it consistent: no parity, no lock)
|
||||
* There was a lot of extra serialization of I/Os which I've removed- a lot of
|
||||
* it was to calculate values for the cache code, which is no longer with us.
|
||||
* More types, function, macro cleanup. Added code to properly quiesce the array
|
||||
* on shutdown. Made a lot of stuff array-specific which was (bogusly) general
|
||||
* before. Fixed memory allocation, freeing bugs.
|
||||
*
|
||||
* Revision 1.7 1996/05/27 18:56:37 jimz
|
||||
* more code cleanup
|
||||
* better typing
|
||||
* compiles in all 3 environments
|
||||
*
|
||||
* Revision 1.6 1996/05/23 21:46:35 jimz
|
||||
* checkpoint in code cleanup (release prep)
|
||||
* lots of types, function names have been fixed
|
||||
*
|
||||
* Revision 1.5 1996/05/20 16:16:12 jimz
|
||||
* switch to rf_{mutex,cond}_{init,destroy}
|
||||
*
|
||||
* Revision 1.4 1996/05/18 19:51:34 jimz
|
||||
* major code cleanup- fix syntax, make some types consistent,
|
||||
* add prototypes, clean out dead code, et cetera
|
||||
*
|
||||
* Revision 1.3 1996/05/16 16:04:52 jimz
|
||||
* allow init func to fail for FREELIST ops
|
||||
*
|
||||
* Revision 1.2 1996/05/16 14:54:08 jimz
|
||||
* added _INIT and _CLEAN versions of ops for objects with
|
||||
* internal allocations
|
||||
*
|
||||
* Revision 1.1 1996/05/15 23:37:53 jimz
|
||||
* Initial revision
|
||||
*
|
||||
*/
|
||||
/*
|
||||
* rf_freelist.h -- code to manage counted freelists
|
||||
*
|
||||
* Keep an arena of fixed-size objects. When a new object is needed,
|
||||
* allocate it as necessary. When an object is freed, either put it
|
||||
* in the arena, or really free it, depending on the maximum arena
|
||||
* size.
|
||||
*/
|
||||
|
||||
#ifndef _RF__RF_FREELIST_H_
|
||||
#define _RF__RF_FREELIST_H_
|
||||
|
||||
#include "rf_types.h"
|
||||
#include "rf_debugMem.h"
|
||||
#include "rf_general.h"
|
||||
#include "rf_threadstuff.h"
|
||||
|
||||
#define RF_FREELIST_STATS 0
|
||||
|
||||
#if RF_FREELIST_STATS > 0
|
||||
typedef struct RF_FreeListStats_s {
|
||||
char *file;
|
||||
int line;
|
||||
int allocations;
|
||||
int frees;
|
||||
int max_free;
|
||||
int grows;
|
||||
int outstanding;
|
||||
int max_outstanding;
|
||||
} RF_FreeListStats_t;
|
||||
|
||||
#define RF_FREELIST_STAT_INIT(_fl_) { \
|
||||
bzero((char *)&((_fl_)->stats), sizeof(RF_FreeListStats_t)); \
|
||||
(_fl_)->stats.file = __FILE__; \
|
||||
(_fl_)->stats.line = __LINE__; \
|
||||
}
|
||||
|
||||
#define RF_FREELIST_STAT_ALLOC(_fl_) { \
|
||||
(_fl_)->stats.allocations++; \
|
||||
(_fl_)->stats.outstanding++; \
|
||||
if ((_fl_)->stats.outstanding > (_fl_)->stats.max_outstanding) \
|
||||
(_fl_)->stats.max_outstanding = (_fl_)->stats.outstanding; \
|
||||
}
|
||||
|
||||
#define RF_FREELIST_STAT_FREE_UPDATE(_fl_) { \
|
||||
if ((_fl_)->free_cnt > (_fl_)->stats.max_free) \
|
||||
(_fl_)->stats.max_free = (_fl_)->free_cnt; \
|
||||
}
|
||||
|
||||
#define RF_FREELIST_STAT_FREE(_fl_) { \
|
||||
(_fl_)->stats.frees++; \
|
||||
(_fl_)->stats.outstanding--; \
|
||||
RF_FREELIST_STAT_FREE_UPDATE(_fl_); \
|
||||
}
|
||||
|
||||
#define RF_FREELIST_STAT_GROW(_fl_) { \
|
||||
(_fl_)->stats.grows++; \
|
||||
RF_FREELIST_STAT_FREE_UPDATE(_fl_); \
|
||||
}
|
||||
|
||||
#define RF_FREELIST_STAT_REPORT(_fl_) { \
|
||||
printf("Freelist at %s %d (%s)\n", (_fl_)->stats.file, (_fl_)->stats.line, RF_STRING(_fl_)); \
|
||||
printf(" %d allocations, %d frees\n", (_fl_)->stats.allocations, (_fl_)->stats.frees); \
|
||||
printf(" %d grows\n", (_fl_)->stats.grows); \
|
||||
printf(" %d outstanding\n", (_fl_)->stats.outstanding); \
|
||||
printf(" %d free (max)\n", (_fl_)->stats.max_free); \
|
||||
printf(" %d outstanding (max)\n", (_fl_)->stats.max_outstanding); \
|
||||
}
|
||||
|
||||
#else /* RF_FREELIST_STATS > 0 */
|
||||
|
||||
#define RF_FREELIST_STAT_INIT(_fl_)
|
||||
#define RF_FREELIST_STAT_ALLOC(_fl_)
|
||||
#define RF_FREELIST_STAT_FREE_UPDATE(_fl_)
|
||||
#define RF_FREELIST_STAT_FREE(_fl_)
|
||||
#define RF_FREELIST_STAT_GROW(_fl_)
|
||||
#define RF_FREELIST_STAT_REPORT(_fl_)
|
||||
|
||||
#endif /* RF_FREELIST_STATS > 0 */
|
||||
|
||||
struct RF_FreeList_s {
|
||||
void *objlist; /* list of free obj */
|
||||
int free_cnt; /* how many free obj */
|
||||
int max_free_cnt; /* max free arena size */
|
||||
int obj_inc; /* how many to allocate at a time */
|
||||
int obj_size; /* size of objects */
|
||||
RF_DECLARE_MUTEX(lock)
|
||||
#if RF_FREELIST_STATS > 0
|
||||
RF_FreeListStats_t stats; /* statistics */
|
||||
#endif /* RF_FREELIST_STATS > 0 */
|
||||
};
|
||||
|
||||
/*
|
||||
* fl = freelist
|
||||
* maxcnt = max number of items in arena
|
||||
* inc = how many to allocate at a time
|
||||
* size = size of object
|
||||
*/
|
||||
#define RF_FREELIST_CREATE(_fl_,_maxcnt_,_inc_,_size_) { \
|
||||
int rc; \
|
||||
RF_ASSERT((_inc_) > 0); \
|
||||
RF_Malloc(_fl_, sizeof(RF_FreeList_t), (RF_FreeList_t *)); \
|
||||
(_fl_)->objlist = NULL; \
|
||||
(_fl_)->free_cnt = 0; \
|
||||
(_fl_)->max_free_cnt = _maxcnt_; \
|
||||
(_fl_)->obj_inc = _inc_; \
|
||||
(_fl_)->obj_size = _size_; \
|
||||
rc = rf_mutex_init(&(_fl_)->lock); \
|
||||
if (rc) { \
|
||||
RF_Free(_fl_, sizeof(RF_FreeList_t)); \
|
||||
_fl_ = NULL; \
|
||||
} \
|
||||
RF_FREELIST_STAT_INIT(_fl_); \
|
||||
}
|
||||
|
||||
/*
|
||||
* fl = freelist
|
||||
* cnt = number to prime with
|
||||
* nextp = name of "next" pointer in obj
|
||||
* cast = object cast
|
||||
*/
|
||||
#define RF_FREELIST_PRIME(_fl_,_cnt_,_nextp_,_cast_) { \
|
||||
void *_p; \
|
||||
int _i; \
|
||||
RF_LOCK_MUTEX((_fl_)->lock); \
|
||||
for(_i=0;_i<(_cnt_);_i++) { \
|
||||
RF_Calloc(_p,1,(_fl_)->obj_size,(void *)); \
|
||||
if (_p) { \
|
||||
(_cast_(_p))->_nextp_ = (_fl_)->objlist; \
|
||||
(_fl_)->objlist = _p; \
|
||||
(_fl_)->free_cnt++; \
|
||||
} \
|
||||
else { \
|
||||
break; \
|
||||
} \
|
||||
} \
|
||||
RF_FREELIST_STAT_FREE_UPDATE(_fl_); \
|
||||
RF_UNLOCK_MUTEX((_fl_)->lock); \
|
||||
}
|
||||
|
||||
#define RF_FREELIST_MUTEX_OF(_fl_) ((_fl_)->lock)
|
||||
|
||||
#define RF_FREELIST_DO_UNLOCK(_fl_) { \
|
||||
RF_UNLOCK_MUTEX((_fl_)->lock); \
|
||||
}
|
||||
|
||||
#define RF_FREELIST_DO_LOCK(_fl_) { \
|
||||
RF_LOCK_MUTEX((_fl_)->lock); \
|
||||
}
|
||||
|
||||
/*
|
||||
* fl = freelist
|
||||
* cnt = number to prime with
|
||||
* nextp = name of "next" pointer in obj
|
||||
* cast = object cast
|
||||
* init = func to call to init obj
|
||||
*/
|
||||
#define RF_FREELIST_PRIME_INIT(_fl_,_cnt_,_nextp_,_cast_,_init_) { \
|
||||
void *_p; \
|
||||
int _i; \
|
||||
RF_LOCK_MUTEX((_fl_)->lock); \
|
||||
for(_i=0;_i<(_cnt_);_i++) { \
|
||||
RF_Calloc(_p,1,(_fl_)->obj_size,(void *)); \
|
||||
if (_init_ (_cast_ _p)) { \
|
||||
RF_Free(_p,(_fl_)->obj_size); \
|
||||
_p = NULL; \
|
||||
} \
|
||||
if (_p) { \
|
||||
(_cast_(_p))->_nextp_ = (_fl_)->objlist; \
|
||||
(_fl_)->objlist = _p; \
|
||||
(_fl_)->free_cnt++; \
|
||||
} \
|
||||
else { \
|
||||
break; \
|
||||
} \
|
||||
} \
|
||||
RF_FREELIST_STAT_FREE_UPDATE(_fl_); \
|
||||
RF_UNLOCK_MUTEX((_fl_)->lock); \
|
||||
}
|
||||
|
||||
/*
|
||||
* fl = freelist
|
||||
* cnt = number to prime with
|
||||
* nextp = name of "next" pointer in obj
|
||||
* cast = object cast
|
||||
* init = func to call to init obj
|
||||
* arg = arg to init obj func
|
||||
*/
|
||||
#define RF_FREELIST_PRIME_INIT_ARG(_fl_,_cnt_,_nextp_,_cast_,_init_,_arg_) { \
|
||||
void *_p; \
|
||||
int _i; \
|
||||
RF_LOCK_MUTEX((_fl_)->lock); \
|
||||
for(_i=0;_i<(_cnt_);_i++) { \
|
||||
RF_Calloc(_p,1,(_fl_)->obj_size,(void *)); \
|
||||
if (_init_ (_cast_ _p,_arg_)) { \
|
||||
RF_Free(_p,(_fl_)->obj_size); \
|
||||
_p = NULL; \
|
||||
} \
|
||||
if (_p) { \
|
||||
(_cast_(_p))->_nextp_ = (_fl_)->objlist; \
|
||||
(_fl_)->objlist = _p; \
|
||||
(_fl_)->free_cnt++; \
|
||||
} \
|
||||
else { \
|
||||
break; \
|
||||
} \
|
||||
} \
|
||||
RF_FREELIST_STAT_FREE_UPDATE(_fl_); \
|
||||
RF_UNLOCK_MUTEX((_fl_)->lock); \
|
||||
}
|
||||
|
||||
/*
|
||||
* fl = freelist
|
||||
* obj = object to allocate
|
||||
* nextp = name of "next" pointer in obj
|
||||
* cast = cast of obj assignment
|
||||
* init = init obj func
|
||||
*/
|
||||
#define RF_FREELIST_GET_INIT(_fl_,_obj_,_nextp_,_cast_,_init_) { \
|
||||
void *_p; \
|
||||
int _i; \
|
||||
RF_LOCK_MUTEX((_fl_)->lock); \
|
||||
RF_ASSERT(sizeof(*(_obj_))==((_fl_)->obj_size)); \
|
||||
if (_fl_->objlist) { \
|
||||
_obj_ = _cast_((_fl_)->objlist); \
|
||||
(_fl_)->objlist = (void *)((_obj_)->_nextp_); \
|
||||
(_fl_)->free_cnt--; \
|
||||
} \
|
||||
else { \
|
||||
/* \
|
||||
* Allocate one at a time so we can free \
|
||||
* one at a time without cleverness when arena \
|
||||
* is full. \
|
||||
*/ \
|
||||
RF_Calloc(_obj_,1,(_fl_)->obj_size,_cast_); \
|
||||
if (_obj_) { \
|
||||
if (_init_ (_obj_)) { \
|
||||
RF_Free(_obj_,(_fl_)->obj_size); \
|
||||
_obj_ = NULL; \
|
||||
} \
|
||||
else { \
|
||||
for(_i=1;_i<(_fl_)->obj_inc;_i++) { \
|
||||
RF_Calloc(_p,1,(_fl_)->obj_size,(void *)); \
|
||||
if (_p) { \
|
||||
if (_init_ (_p)) { \
|
||||
RF_Free(_p,(_fl_)->obj_size); \
|
||||
_p = NULL; \
|
||||
break; \
|
||||
} \
|
||||
(_cast_(_p))->_nextp_ = (_fl_)->objlist; \
|
||||
(_fl_)->objlist = _p; \
|
||||
} \
|
||||
else { \
|
||||
break; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
RF_FREELIST_STAT_GROW(_fl_); \
|
||||
} \
|
||||
RF_FREELIST_STAT_ALLOC(_fl_); \
|
||||
RF_UNLOCK_MUTEX((_fl_)->lock); \
|
||||
}
|
||||
|
||||
/*
|
||||
* fl = freelist
|
||||
* obj = object to allocate
|
||||
* nextp = name of "next" pointer in obj
|
||||
* cast = cast of obj assignment
|
||||
* init = init obj func
|
||||
* arg = arg to init obj func
|
||||
*/
|
||||
#define RF_FREELIST_GET_INIT_ARG(_fl_,_obj_,_nextp_,_cast_,_init_,_arg_) { \
|
||||
void *_p; \
|
||||
int _i; \
|
||||
RF_LOCK_MUTEX((_fl_)->lock); \
|
||||
RF_ASSERT(sizeof(*(_obj_))==((_fl_)->obj_size)); \
|
||||
if (_fl_->objlist) { \
|
||||
_obj_ = _cast_((_fl_)->objlist); \
|
||||
(_fl_)->objlist = (void *)((_obj_)->_nextp_); \
|
||||
(_fl_)->free_cnt--; \
|
||||
} \
|
||||
else { \
|
||||
/* \
|
||||
* Allocate one at a time so we can free \
|
||||
* one at a time without cleverness when arena \
|
||||
* is full. \
|
||||
*/ \
|
||||
RF_Calloc(_obj_,1,(_fl_)->obj_size,_cast_); \
|
||||
if (_obj_) { \
|
||||
if (_init_ (_obj_,_arg_)) { \
|
||||
RF_Free(_obj_,(_fl_)->obj_size); \
|
||||
_obj_ = NULL; \
|
||||
} \
|
||||
else { \
|
||||
for(_i=1;_i<(_fl_)->obj_inc;_i++) { \
|
||||
RF_Calloc(_p,1,(_fl_)->obj_size,(void *)); \
|
||||
if (_p) { \
|
||||
if (_init_ (_p,_arg_)) { \
|
||||
RF_Free(_p,(_fl_)->obj_size); \
|
||||
_p = NULL; \
|
||||
break; \
|
||||
} \
|
||||
(_cast_(_p))->_nextp_ = (_fl_)->objlist; \
|
||||
(_fl_)->objlist = _p; \
|
||||
} \
|
||||
else { \
|
||||
break; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
RF_FREELIST_STAT_GROW(_fl_); \
|
||||
} \
|
||||
RF_FREELIST_STAT_ALLOC(_fl_); \
|
||||
RF_UNLOCK_MUTEX((_fl_)->lock); \
|
||||
}
|
||||
|
||||
/*
|
||||
* fl = freelist
|
||||
* obj = object to allocate
|
||||
* nextp = name of "next" pointer in obj
|
||||
* cast = cast of obj assignment
|
||||
* init = init obj func
|
||||
*/
|
||||
#define RF_FREELIST_GET_INIT_NOUNLOCK(_fl_,_obj_,_nextp_,_cast_,_init_) { \
|
||||
void *_p; \
|
||||
int _i; \
|
||||
RF_LOCK_MUTEX((_fl_)->lock); \
|
||||
RF_ASSERT(sizeof(*(_obj_))==((_fl_)->obj_size)); \
|
||||
if (_fl_->objlist) { \
|
||||
_obj_ = _cast_((_fl_)->objlist); \
|
||||
(_fl_)->objlist = (void *)((_obj_)->_nextp_); \
|
||||
(_fl_)->free_cnt--; \
|
||||
} \
|
||||
else { \
|
||||
/* \
|
||||
* Allocate one at a time so we can free \
|
||||
* one at a time without cleverness when arena \
|
||||
* is full. \
|
||||
*/ \
|
||||
RF_Calloc(_obj_,1,(_fl_)->obj_size,_cast_); \
|
||||
if (_obj_) { \
|
||||
if (_init_ (_obj_)) { \
|
||||
RF_Free(_obj_,(_fl_)->obj_size); \
|
||||
_obj_ = NULL; \
|
||||
} \
|
||||
else { \
|
||||
for(_i=1;_i<(_fl_)->obj_inc;_i++) { \
|
||||
RF_Calloc(_p,1,(_fl_)->obj_size,(void *)); \
|
||||
if (_p) { \
|
||||
if (_init_ (_p)) { \
|
||||
RF_Free(_p,(_fl_)->obj_size); \
|
||||
_p = NULL; \
|
||||
break; \
|
||||
} \
|
||||
(_cast_(_p))->_nextp_ = (_fl_)->objlist; \
|
||||
(_fl_)->objlist = _p; \
|
||||
} \
|
||||
else { \
|
||||
break; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
RF_FREELIST_STAT_GROW(_fl_); \
|
||||
} \
|
||||
RF_FREELIST_STAT_ALLOC(_fl_); \
|
||||
}
|
||||
|
||||
/*
|
||||
* fl = freelist
|
||||
* obj = object to allocate
|
||||
* nextp = name of "next" pointer in obj
|
||||
* cast = cast of obj assignment
|
||||
*/
|
||||
#define RF_FREELIST_GET(_fl_,_obj_,_nextp_,_cast_) { \
|
||||
void *_p; \
|
||||
int _i; \
|
||||
RF_LOCK_MUTEX((_fl_)->lock); \
|
||||
RF_ASSERT(sizeof(*(_obj_))==((_fl_)->obj_size)); \
|
||||
if (_fl_->objlist) { \
|
||||
_obj_ = _cast_((_fl_)->objlist); \
|
||||
(_fl_)->objlist = (void *)((_obj_)->_nextp_); \
|
||||
(_fl_)->free_cnt--; \
|
||||
} \
|
||||
else { \
|
||||
/* \
|
||||
* Allocate one at a time so we can free \
|
||||
* one at a time without cleverness when arena \
|
||||
* is full. \
|
||||
*/ \
|
||||
RF_Calloc(_obj_,1,(_fl_)->obj_size,_cast_); \
|
||||
if (_obj_) { \
|
||||
for(_i=1;_i<(_fl_)->obj_inc;_i++) { \
|
||||
RF_Calloc(_p,1,(_fl_)->obj_size,(void *)); \
|
||||
if (_p) { \
|
||||
(_cast_(_p))->_nextp_ = (_fl_)->objlist; \
|
||||
(_fl_)->objlist = _p; \
|
||||
} \
|
||||
else { \
|
||||
break; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
RF_FREELIST_STAT_GROW(_fl_); \
|
||||
} \
|
||||
RF_FREELIST_STAT_ALLOC(_fl_); \
|
||||
RF_UNLOCK_MUTEX((_fl_)->lock); \
|
||||
}
|
||||
|
||||
/*
|
||||
* fl = freelist
|
||||
* obj = object to allocate
|
||||
* nextp = name of "next" pointer in obj
|
||||
* cast = cast of obj assignment
|
||||
* num = num objs to return
|
||||
*/
|
||||
#define RF_FREELIST_GET_N(_fl_,_obj_,_nextp_,_cast_,_num_) { \
|
||||
void *_p, *_l, *_f; \
|
||||
int _i, _n; \
|
||||
_l = _f = NULL; \
|
||||
_n = 0; \
|
||||
RF_LOCK_MUTEX((_fl_)->lock); \
|
||||
RF_ASSERT(sizeof(*(_obj_))==((_fl_)->obj_size)); \
|
||||
for(_n=0;_n<_num_;_n++) { \
|
||||
if (_fl_->objlist) { \
|
||||
_obj_ = _cast_((_fl_)->objlist); \
|
||||
(_fl_)->objlist = (void *)((_obj_)->_nextp_); \
|
||||
(_fl_)->free_cnt--; \
|
||||
} \
|
||||
else { \
|
||||
/* \
|
||||
* Allocate one at a time so we can free \
|
||||
* one at a time without cleverness when arena \
|
||||
* is full. \
|
||||
*/ \
|
||||
RF_Calloc(_obj_,1,(_fl_)->obj_size,_cast_); \
|
||||
if (_obj_) { \
|
||||
for(_i=1;_i<(_fl_)->obj_inc;_i++) { \
|
||||
RF_Calloc(_p,1,(_fl_)->obj_size,(void *)); \
|
||||
if (_p) { \
|
||||
(_cast_(_p))->_nextp_ = (_fl_)->objlist; \
|
||||
(_fl_)->objlist = _p; \
|
||||
} \
|
||||
else { \
|
||||
break; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
RF_FREELIST_STAT_GROW(_fl_); \
|
||||
} \
|
||||
if (_f == NULL) \
|
||||
_f = _obj_; \
|
||||
if (_obj_) { \
|
||||
(_cast_(_obj_))->_nextp_ = _l; \
|
||||
_l = _obj_; \
|
||||
RF_FREELIST_STAT_ALLOC(_fl_); \
|
||||
} \
|
||||
else { \
|
||||
(_cast_(_f))->_nextp_ = (_fl_)->objlist; \
|
||||
(_fl_)->objlist = _l; \
|
||||
_n = _num_; \
|
||||
} \
|
||||
} \
|
||||
RF_UNLOCK_MUTEX((_fl_)->lock); \
|
||||
}
|
||||
|
||||
/*
|
||||
* fl = freelist
|
||||
* obj = object to free
|
||||
* nextp = name of "next" pointer in obj
|
||||
*/
|
||||
#define RF_FREELIST_FREE(_fl_,_obj_,_nextp_) { \
|
||||
RF_LOCK_MUTEX((_fl_)->lock); \
|
||||
if ((_fl_)->free_cnt == (_fl_)->max_free_cnt) { \
|
||||
RF_Free(_obj_,(_fl_)->obj_size); \
|
||||
} \
|
||||
else { \
|
||||
RF_ASSERT((_fl_)->free_cnt < (_fl_)->max_free_cnt); \
|
||||
(_obj_)->_nextp_ = (_fl_)->objlist; \
|
||||
(_fl_)->objlist = (void *)(_obj_); \
|
||||
(_fl_)->free_cnt++; \
|
||||
} \
|
||||
RF_FREELIST_STAT_FREE(_fl_); \
|
||||
RF_UNLOCK_MUTEX((_fl_)->lock); \
|
||||
}
|
||||
|
||||
/*
|
||||
* fl = freelist
|
||||
* obj = object to free
|
||||
* nextp = name of "next" pointer in obj
|
||||
* num = num to free (debugging)
|
||||
*/
|
||||
#define RF_FREELIST_FREE_N(_fl_,_obj_,_nextp_,_cast_,_num_) { \
|
||||
void *_no; \
|
||||
int _n; \
|
||||
_n = 0; \
|
||||
RF_LOCK_MUTEX((_fl_)->lock); \
|
||||
while(_obj_) { \
|
||||
_no = (_cast_(_obj_))->_nextp_; \
|
||||
if ((_fl_)->free_cnt == (_fl_)->max_free_cnt) { \
|
||||
RF_Free(_obj_,(_fl_)->obj_size); \
|
||||
} \
|
||||
else { \
|
||||
RF_ASSERT((_fl_)->free_cnt < (_fl_)->max_free_cnt); \
|
||||
(_obj_)->_nextp_ = (_fl_)->objlist; \
|
||||
(_fl_)->objlist = (void *)(_obj_); \
|
||||
(_fl_)->free_cnt++; \
|
||||
} \
|
||||
_n++; \
|
||||
_obj_ = _no; \
|
||||
RF_FREELIST_STAT_FREE(_fl_); \
|
||||
} \
|
||||
RF_ASSERT(_n==(_num_)); \
|
||||
RF_UNLOCK_MUTEX((_fl_)->lock); \
|
||||
}
|
||||
|
||||
/*
|
||||
* fl = freelist
|
||||
* obj = object to free
|
||||
* nextp = name of "next" pointer in obj
|
||||
* clean = undo for init
|
||||
*/
|
||||
#define RF_FREELIST_FREE_CLEAN(_fl_,_obj_,_nextp_,_clean_) { \
|
||||
RF_LOCK_MUTEX((_fl_)->lock); \
|
||||
if ((_fl_)->free_cnt == (_fl_)->max_free_cnt) { \
|
||||
_clean_ (_obj_); \
|
||||
RF_Free(_obj_,(_fl_)->obj_size); \
|
||||
} \
|
||||
else { \
|
||||
RF_ASSERT((_fl_)->free_cnt < (_fl_)->max_free_cnt); \
|
||||
(_obj_)->_nextp_ = (_fl_)->objlist; \
|
||||
(_fl_)->objlist = (void *)(_obj_); \
|
||||
(_fl_)->free_cnt++; \
|
||||
} \
|
||||
RF_FREELIST_STAT_FREE(_fl_); \
|
||||
RF_UNLOCK_MUTEX((_fl_)->lock); \
|
||||
}
|
||||
|
||||
/*
|
||||
* fl = freelist
|
||||
* obj = object to free
|
||||
* nextp = name of "next" pointer in obj
|
||||
* clean = undo for init
|
||||
* arg = arg for undo func
|
||||
*/
|
||||
#define RF_FREELIST_FREE_CLEAN_ARG(_fl_,_obj_,_nextp_,_clean_,_arg_) { \
|
||||
RF_LOCK_MUTEX((_fl_)->lock); \
|
||||
if ((_fl_)->free_cnt == (_fl_)->max_free_cnt) { \
|
||||
_clean_ (_obj_,_arg_); \
|
||||
RF_Free(_obj_,(_fl_)->obj_size); \
|
||||
} \
|
||||
else { \
|
||||
RF_ASSERT((_fl_)->free_cnt < (_fl_)->max_free_cnt); \
|
||||
(_obj_)->_nextp_ = (_fl_)->objlist; \
|
||||
(_fl_)->objlist = (void *)(_obj_); \
|
||||
(_fl_)->free_cnt++; \
|
||||
} \
|
||||
RF_FREELIST_STAT_FREE(_fl_); \
|
||||
RF_UNLOCK_MUTEX((_fl_)->lock); \
|
||||
}
|
||||
|
||||
/*
|
||||
* fl = freelist
|
||||
* obj = object to free
|
||||
* nextp = name of "next" pointer in obj
|
||||
* clean = undo for init
|
||||
*/
|
||||
#define RF_FREELIST_FREE_CLEAN_NOUNLOCK(_fl_,_obj_,_nextp_,_clean_) { \
|
||||
RF_LOCK_MUTEX((_fl_)->lock); \
|
||||
if ((_fl_)->free_cnt == (_fl_)->max_free_cnt) { \
|
||||
_clean_ (_obj_); \
|
||||
RF_Free(_obj_,(_fl_)->obj_size); \
|
||||
} \
|
||||
else { \
|
||||
RF_ASSERT((_fl_)->free_cnt < (_fl_)->max_free_cnt); \
|
||||
(_obj_)->_nextp_ = (_fl_)->objlist; \
|
||||
(_fl_)->objlist = (void *)(_obj_); \
|
||||
(_fl_)->free_cnt++; \
|
||||
} \
|
||||
RF_FREELIST_STAT_FREE(_fl_); \
|
||||
}
|
||||
|
||||
/*
|
||||
* fl = freelist
|
||||
* nextp = name of "next" pointer in obj
|
||||
* cast = cast to object type
|
||||
*/
|
||||
#define RF_FREELIST_DESTROY(_fl_,_nextp_,_cast_) { \
|
||||
void *_cur, *_next; \
|
||||
RF_FREELIST_STAT_REPORT(_fl_); \
|
||||
rf_mutex_destroy(&((_fl_)->lock)); \
|
||||
for(_cur=(_fl_)->objlist;_cur;_cur=_next) { \
|
||||
_next = (_cast_ _cur)->_nextp_; \
|
||||
RF_Free(_cur,(_fl_)->obj_size); \
|
||||
} \
|
||||
RF_Free(_fl_,sizeof(RF_FreeList_t)); \
|
||||
}
|
||||
|
||||
/*
|
||||
* fl = freelist
|
||||
* nextp = name of "next" pointer in obj
|
||||
* cast = cast to object type
|
||||
* clean = func to undo obj init
|
||||
*/
|
||||
#define RF_FREELIST_DESTROY_CLEAN(_fl_,_nextp_,_cast_,_clean_) { \
|
||||
void *_cur, *_next; \
|
||||
RF_FREELIST_STAT_REPORT(_fl_); \
|
||||
rf_mutex_destroy(&((_fl_)->lock)); \
|
||||
for(_cur=(_fl_)->objlist;_cur;_cur=_next) { \
|
||||
_next = (_cast_ _cur)->_nextp_; \
|
||||
_clean_ (_cur); \
|
||||
RF_Free(_cur,(_fl_)->obj_size); \
|
||||
} \
|
||||
RF_Free(_fl_,sizeof(RF_FreeList_t)); \
|
||||
}
|
||||
|
||||
/*
|
||||
* fl = freelist
|
||||
* nextp = name of "next" pointer in obj
|
||||
* cast = cast to object type
|
||||
* clean = func to undo obj init
|
||||
* arg = arg for undo func
|
||||
*/
|
||||
#define RF_FREELIST_DESTROY_CLEAN_ARG(_fl_,_nextp_,_cast_,_clean_,_arg_) { \
|
||||
void *_cur, *_next; \
|
||||
RF_FREELIST_STAT_REPORT(_fl_); \
|
||||
rf_mutex_destroy(&((_fl_)->lock)); \
|
||||
for(_cur=(_fl_)->objlist;_cur;_cur=_next) { \
|
||||
_next = (_cast_ _cur)->_nextp_; \
|
||||
_clean_ (_cur,_arg_); \
|
||||
RF_Free(_cur,(_fl_)->obj_size); \
|
||||
} \
|
||||
RF_Free(_fl_,sizeof(RF_FreeList_t)); \
|
||||
}
|
||||
|
||||
#endif /* !_RF__RF_FREELIST_H_ */
|
|
@ -0,0 +1,268 @@
|
|||
/* $NetBSD: rf_general.h,v 1.1 1998/11/13 04:20:30 oster Exp $ */
|
||||
/*
|
||||
* Copyright (c) 1995 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Author: Mark Holland
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and
|
||||
* its documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
||||
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*/
|
||||
|
||||
/*
|
||||
* rf_general.h -- some general-use definitions
|
||||
*/
|
||||
|
||||
/*
|
||||
* :
|
||||
* Log: rf_general.h,v
|
||||
* Revision 1.26 1996/08/09 16:44:57 jimz
|
||||
* sunos port
|
||||
*
|
||||
* Revision 1.25 1996/08/07 21:08:57 jimz
|
||||
* get NBPG defined for IRIX
|
||||
*
|
||||
* Revision 1.24 1996/08/06 22:02:06 jimz
|
||||
* include linux/user.h for linux to get NBPG
|
||||
*
|
||||
* Revision 1.23 1996/07/27 23:36:08 jimz
|
||||
* Solaris port of simulator
|
||||
*
|
||||
* Revision 1.22 1996/06/09 02:36:46 jimz
|
||||
* lots of little crufty cleanup- fixup whitespace
|
||||
* issues, comment #ifdefs, improve typing in some
|
||||
* places (esp size-related)
|
||||
*
|
||||
* Revision 1.21 1996/05/30 23:22:16 jimz
|
||||
* bugfixes of serialization, timing problems
|
||||
* more cleanup
|
||||
*
|
||||
* Revision 1.20 1996/05/27 18:56:37 jimz
|
||||
* more code cleanup
|
||||
* better typing
|
||||
* compiles in all 3 environments
|
||||
*
|
||||
* Revision 1.19 1996/05/24 22:17:04 jimz
|
||||
* continue code + namespace cleanup
|
||||
* typed a bunch of flags
|
||||
*
|
||||
* Revision 1.18 1996/05/23 21:46:35 jimz
|
||||
* checkpoint in code cleanup (release prep)
|
||||
* lots of types, function names have been fixed
|
||||
*
|
||||
* Revision 1.17 1996/05/23 00:33:23 jimz
|
||||
* code cleanup: move all debug decls to rf_options.c, all extern
|
||||
* debug decls to rf_options.h, all debug vars preceded by rf_
|
||||
*
|
||||
* Revision 1.16 1996/05/21 18:53:13 jimz
|
||||
* be sure that noop macros don't confuse conditionals and loops
|
||||
*
|
||||
* Revision 1.15 1996/05/18 19:51:34 jimz
|
||||
* major code cleanup- fix syntax, make some types consistent,
|
||||
* add prototypes, clean out dead code, et cetera
|
||||
*
|
||||
* Revision 1.14 1996/05/08 21:01:24 jimz
|
||||
* fixed up enum type names that were conflicting with other
|
||||
* enums and function names (ie, "panic")
|
||||
* future naming trends will be towards RF_ and rf_ for
|
||||
* everything raidframe-related
|
||||
*
|
||||
* Revision 1.13 1995/12/12 18:10:06 jimz
|
||||
* MIN -> RF_MIN, MAX -> RF_MAX, ASSERT -> RF_ASSERT
|
||||
* fix 80-column brain damage in comments
|
||||
*
|
||||
* Revision 1.12 1995/12/01 18:29:08 root
|
||||
* added copyright info
|
||||
*
|
||||
* Revision 1.11 1995/09/19 22:59:52 jimz
|
||||
* Add kernel macro RF_DKU_END_IO(). When DKUSAGE is not defined,
|
||||
* this is a no-op. When it is defined, it calls dku_end_io()
|
||||
* correctly given a raidframe unit number and a buf pointer.
|
||||
*
|
||||
* Revision 1.10 1995/07/03 18:13:56 holland
|
||||
* changed kernel defn of GETTIME
|
||||
*
|
||||
* Revision 1.9 1995/07/02 15:07:42 holland
|
||||
* bug fixes related to getting distributed sparing numbers
|
||||
*
|
||||
* Revision 1.8 1995/06/12 15:54:40 rachad
|
||||
* Added garbege collection for log structured storage
|
||||
*
|
||||
* Revision 1.7 1995/06/03 19:18:16 holland
|
||||
* changes related to kernelization: access traces
|
||||
* changes related to distributed sparing: some bug fixes
|
||||
*
|
||||
* Revision 1.6 1995/05/01 13:28:00 holland
|
||||
* parity range locks, locking disk requests, recon+parityscan in kernel, etc.
|
||||
*
|
||||
* Revision 1.5 1995/04/06 14:47:56 rachad
|
||||
* merge completed
|
||||
*
|
||||
* Revision 1.4 1995/03/15 20:45:23 holland
|
||||
* distr sparing changes.
|
||||
*
|
||||
* Revision 1.3 1995/02/03 22:31:36 holland
|
||||
* many changes related to kernelization
|
||||
*
|
||||
* Revision 1.2 1994/11/29 21:37:10 danner
|
||||
* Added divide by zero check.
|
||||
*
|
||||
*/
|
||||
|
||||
/*#define NOASSERT*/
|
||||
|
||||
#ifndef _RF__RF_GENERAL_H_
|
||||
#define _RF__RF_GENERAL_H_
|
||||
|
||||
#ifdef _KERNEL
|
||||
#define KERNEL
|
||||
#endif
|
||||
|
||||
#if !defined(KERNEL) && !defined(NOASSERT)
|
||||
#include <assert.h>
|
||||
#endif /* !KERNEL && !NOASSERT */
|
||||
|
||||
/* error reporting and handling */
|
||||
|
||||
#ifndef KERNEL
|
||||
|
||||
#define RF_ERRORMSG(s) fprintf(stderr,(s))
|
||||
#define RF_ERRORMSG1(s,a) fprintf(stderr,(s),(a))
|
||||
#define RF_ERRORMSG2(s,a,b) fprintf(stderr,(s),(a),(b))
|
||||
#define RF_ERRORMSG3(s,a,b,c) fprintf(stderr,(s),(a),(b),(c))
|
||||
#define RF_ERRORMSG4(s,a,b,c,d) fprintf(stderr,(s),(a),(b),(c),(d))
|
||||
#define RF_ERRORMSG5(s,a,b,c,d,e) fprintf(stderr,(s),(a),(b),(c),(d),(e))
|
||||
#ifndef NOASSERT
|
||||
#define RF_ASSERT(x) {assert(x);}
|
||||
#else /* !NOASSERT */
|
||||
#define RF_ASSERT(x) {/*noop*/}
|
||||
#endif /* !NOASSERT */
|
||||
#define RF_PANIC() {printf("YIKES! Something terrible happened at line %d of file %s. Use a debugger.\n",__LINE__,__FILE__); abort();}
|
||||
|
||||
#else /* !KERNEL */
|
||||
#if defined(__NetBSD__) && defined(_KERNEL)
|
||||
#include<sys/systm.h> /* printf, sprintf, and friends */
|
||||
#endif
|
||||
#define RF_ERRORMSG(s) printf((s))
|
||||
#define RF_ERRORMSG1(s,a) printf((s),(a))
|
||||
#define RF_ERRORMSG2(s,a,b) printf((s),(a),(b))
|
||||
#define RF_ERRORMSG3(s,a,b,c) printf((s),(a),(b),(c))
|
||||
#define RF_ERRORMSG4(s,a,b,c,d) printf((s),(a),(b),(c),(d))
|
||||
#define RF_ERRORMSG5(s,a,b,c,d,e) printf((s),(a),(b),(c),(d),(e))
|
||||
#define perror(x)
|
||||
extern char rf_panicbuf[];
|
||||
#define RF_PANIC() {sprintf(rf_panicbuf,"raidframe error at line %d file %s",__LINE__,__FILE__); panic(rf_panicbuf);}
|
||||
|
||||
#ifdef RF_ASSERT
|
||||
#undef RF_ASSERT
|
||||
#endif /* RF_ASSERT */
|
||||
#ifndef NOASSERT
|
||||
#define RF_ASSERT(_x_) { \
|
||||
if (!(_x_)) { \
|
||||
sprintf(rf_panicbuf, \
|
||||
"raidframe error at line %d file %s (failed asserting %s)\n", \
|
||||
__LINE__, __FILE__, #_x_); \
|
||||
panic(rf_panicbuf); \
|
||||
} \
|
||||
}
|
||||
#else /* !NOASSERT */
|
||||
#define RF_ASSERT(x) {/*noop*/}
|
||||
#endif /* !NOASSERT */
|
||||
|
||||
#endif /* !KERNEL */
|
||||
|
||||
/* random stuff */
|
||||
#define RF_MAX(a,b) (((a) > (b)) ? (a) : (b))
|
||||
#define RF_MIN(a,b) (((a) < (b)) ? (a) : (b))
|
||||
|
||||
/* divide-by-zero check */
|
||||
#define RF_DB0_CHECK(a,b) ( ((b)==0) ? 0 : (a)/(b) )
|
||||
|
||||
/* get time of day */
|
||||
#ifdef KERNEL
|
||||
#ifndef __NetBSD__
|
||||
extern struct timeval time;
|
||||
#endif /* !__NetBSD__ */
|
||||
#define RF_GETTIME(_t) microtime(&(_t))
|
||||
#else /* KERNEL */
|
||||
#define RF_GETTIME(_t) gettimeofday(&(_t), NULL);
|
||||
#endif /* KERNEL */
|
||||
|
||||
/*
|
||||
* zero memory- not all bzero calls go through here, only
|
||||
* those which in the kernel may have a user address
|
||||
*/
|
||||
#ifdef KERNEL
|
||||
#ifndef __NetBSD__
|
||||
#define RF_BZERO(_bp,_b,_l) if (IS_SYS_VA(_b)) bzero(_b,_l); else rf_BzeroWithRemap(_bp,_b,_l)
|
||||
#else
|
||||
|
||||
#define RF_BZERO(_bp,_b,_l) bzero(_b,_l) /* XXX This is likely incorrect. GO*/
|
||||
#endif /* __NetBSD__ */
|
||||
#else /* KERNEL */
|
||||
#define RF_BZERO(_bp,_b,_l) bzero(_b,_l)
|
||||
#endif /* KERNEL */
|
||||
|
||||
#ifdef sun
|
||||
#include <sys/param.h>
|
||||
#ifndef NBPG
|
||||
#define NBPG PAGESIZE
|
||||
#endif /* !NBPG */
|
||||
#endif /* sun */
|
||||
|
||||
#ifdef IRIX
|
||||
#include <sys/tfp.h>
|
||||
#define NBPG _PAGESZ
|
||||
#endif /* IRIX */
|
||||
|
||||
#ifdef LINUX
|
||||
#include <linux/user.h>
|
||||
#endif /* LINUX */
|
||||
|
||||
#define RF_UL(x) ((unsigned long) (x))
|
||||
#define RF_PGMASK RF_UL(NBPG-1)
|
||||
#define RF_BLIP(x) (NBPG - (RF_UL(x) & RF_PGMASK)) /* bytes left in page */
|
||||
#define RF_PAGE_ALIGNED(x) ((RF_UL(x) & RF_PGMASK) == 0)
|
||||
|
||||
#ifdef KERNEL
|
||||
#ifndef __NetBSD__
|
||||
#include <dkusage.h>
|
||||
#endif
|
||||
#if DKUSAGE > 0
|
||||
#define RF_DKU_END_IO(_unit_,_bp_) { \
|
||||
int s = splbio(); \
|
||||
dku_end_io(DKU_RAIDFRAME_BUS, _unit_, 0, \
|
||||
(((_bp_)->b_flags&(B_READ|B_WRITE) == B_READ) ? \
|
||||
CAM_DIR_IN : CAM_DIR_OUT), \
|
||||
(_bp_)->b_bcount); \
|
||||
splx(s); \
|
||||
}
|
||||
#else /* DKUSAGE > 0 */
|
||||
#define RF_DKU_END_IO(unit) { /* noop */ }
|
||||
#endif /* DKUSAGE > 0 */
|
||||
#endif /* KERNEL */
|
||||
|
||||
#ifdef __STDC__
|
||||
#define RF_STRING(_str_) #_str_
|
||||
#else /* __STDC__ */
|
||||
#define RF_STRING(_str_) "_str_"
|
||||
#endif /* __STDC__ */
|
||||
|
||||
#endif /* !_RF__RF_GENERAL_H_ */
|
|
@ -0,0 +1,199 @@
|
|||
/* $NetBSD: rf_geniq.c,v 1.1 1998/11/13 04:20:30 oster Exp $ */
|
||||
/*
|
||||
* Copyright (c) 1995 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Author: Daniel Stodolsky
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and
|
||||
* its documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
||||
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*/
|
||||
|
||||
/* rf_geniq.c
|
||||
* code which implements Reed-Solomon encoding for RAID level 6
|
||||
*/
|
||||
|
||||
/* :
|
||||
* Log: rf_geniq.c,v
|
||||
* Revision 1.12 1996/07/29 16:37:00 jimz
|
||||
* remove archs.h include to avoid VPATH problems in kernel
|
||||
* rf_invertq.c now must include archs.h before invertq.h
|
||||
*
|
||||
* Revision 1.11 1996/07/29 15:04:16 jimz
|
||||
* correct rf_archs.h path for kernel
|
||||
*
|
||||
* Revision 1.10 1996/07/27 23:36:08 jimz
|
||||
* Solaris port of simulator
|
||||
*
|
||||
* Revision 1.9 1996/07/18 22:57:14 jimz
|
||||
* port simulator to AIX
|
||||
*
|
||||
* Revision 1.8 1996/07/15 17:22:18 jimz
|
||||
* nit-pick code cleanup
|
||||
* resolve stdlib problems on DEC OSF
|
||||
*
|
||||
* Revision 1.7 1996/06/09 02:36:46 jimz
|
||||
* lots of little crufty cleanup- fixup whitespace
|
||||
* issues, comment #ifdefs, improve typing in some
|
||||
* places (esp size-related)
|
||||
*
|
||||
* Revision 1.6 1996/05/23 21:46:35 jimz
|
||||
* checkpoint in code cleanup (release prep)
|
||||
* lots of types, function names have been fixed
|
||||
*
|
||||
* Revision 1.5 1995/12/01 18:29:18 root
|
||||
* added copyright info
|
||||
*
|
||||
*/
|
||||
|
||||
#define RF_UTILITY 1
|
||||
#include "rf_pqdeg.h"
|
||||
|
||||
/*
|
||||
five bit lfsr
|
||||
poly - feedback connections
|
||||
|
||||
val = value;
|
||||
*/
|
||||
int lsfr_shift(val,poly)
|
||||
unsigned val, poly;
|
||||
{
|
||||
unsigned new;
|
||||
unsigned int i;
|
||||
unsigned high = (val >> 4) & 1;
|
||||
unsigned bit;
|
||||
|
||||
new = (poly & 1) ? high : 0;
|
||||
|
||||
for (i=1; i <=4; i++)
|
||||
{
|
||||
bit = (val >> (i-1)) & 1;
|
||||
if (poly & (1<<i)) /* there is a feedback connection */
|
||||
new = new | ((bit ^ high)<<i);
|
||||
else
|
||||
new = new | (bit << i);
|
||||
}
|
||||
return new;
|
||||
}
|
||||
|
||||
/* generate Q matricies for the data */
|
||||
|
||||
RF_ua32_t rf_qfor[32];
|
||||
|
||||
void main()
|
||||
{
|
||||
unsigned int i,j,l,a,b;
|
||||
unsigned int val;
|
||||
unsigned int r;
|
||||
unsigned int m,p,q;
|
||||
|
||||
RF_ua32_t k;
|
||||
|
||||
printf("/*\n");
|
||||
printf(" * rf_invertq.h\n");
|
||||
printf(" */\n");
|
||||
printf("/*\n");
|
||||
printf(" * GENERATED FILE -- DO NOT EDIT\n");
|
||||
printf(" */\n");
|
||||
printf("\n");
|
||||
printf("#ifndef _RF__RF_INVERTQ_H_\n");
|
||||
printf("#define _RF__RF_INVERTQ_H_\n");
|
||||
printf("\n");
|
||||
printf("/*\n");
|
||||
printf(" * rf_geniq.c must include rf_archs.h before including\n");
|
||||
printf(" * this file (to get VPATH magic right with the way we\n");
|
||||
printf(" * generate this file in kernel trees)\n");
|
||||
printf(" */\n");
|
||||
printf("/* #include \"rf_archs.h\" */\n");
|
||||
printf("\n");
|
||||
printf("#if (RF_INCLUDE_PQ > 0) || (RF_INCLUDE_RAID6 > 0)\n");
|
||||
printf("\n");
|
||||
printf("#define RF_Q_COLS 32\n");
|
||||
printf("RF_ua32_t rf_rn = {\n");
|
||||
k[0] = 1;
|
||||
for (j=0 ; j < 31; j++)
|
||||
k[j+1] = lsfr_shift(k[j],5);
|
||||
for (j=0; j < 32; j++)
|
||||
printf("%d, ",k[j]);
|
||||
printf("};\n");
|
||||
|
||||
printf("RF_ua32_t rf_qfor[32] = {\n");
|
||||
for (i=0; i < 32; i++)
|
||||
{
|
||||
printf("/* i = %d */ { 0, ",i);
|
||||
rf_qfor[i][0] = 0;
|
||||
for (j=1; j < 32; j++)
|
||||
{
|
||||
val = j;
|
||||
for (l=0; l < i; l++)
|
||||
val = lsfr_shift(val,5);
|
||||
rf_qfor[i][j] = val;
|
||||
printf("%d, ",val);
|
||||
}
|
||||
printf("},\n");
|
||||
}
|
||||
printf("};\n");
|
||||
printf("#define RF_Q_DATA_COL(col_num) rf_rn[col_num],rf_qfor[28-(col_num)]\n");
|
||||
|
||||
/* generate the inverse tables. (i,j,p,q) */
|
||||
/* The table just stores a. Get b back from
|
||||
the parity */
|
||||
printf("#ifdef KERNEL\n");
|
||||
printf("RF_ua1024_t rf_qinv[1]; /* don't compile monster table into kernel */\n");
|
||||
printf("#elif defined(NO_PQ)\n");
|
||||
printf("RF_ua1024_t rf_qinv[29*29];\n");
|
||||
printf("#else /* !KERNEL && NO_PQ */\n");
|
||||
printf("RF_ua1024_t rf_qinv[29*29] = {\n");
|
||||
for (i=0; i < 29; i++)
|
||||
{
|
||||
for (j =0; j < 29; j++)
|
||||
{
|
||||
printf("/* i %d, j %d */{ ",i,j);
|
||||
if (i==j)
|
||||
for (l=0; l < 1023; l++) printf("0, ");
|
||||
else
|
||||
{
|
||||
for (p=0; p < 32; p++)
|
||||
for (q=0; q < 32; q++)
|
||||
{
|
||||
/* What are a, b such that
|
||||
a ^ b = p; and
|
||||
qfor[(28-i)][a ^ rf_rn[i+1]] ^ qfor[(28-j)][b ^ rf_rn[j+1]] = q.
|
||||
Solve by guessing a. Then testing.
|
||||
*/
|
||||
for ( a =0 ; a < 32; a++ )
|
||||
{
|
||||
b = a ^ p;
|
||||
if ( (rf_qfor[28-i][a^ k[i+1]] ^ rf_qfor[28-j][b ^ k[j+1]]) == q )
|
||||
break;
|
||||
}
|
||||
if (a == 32) printf("unable to solve %d %d %d %d\n",i,j,p,q);
|
||||
printf("%d,",a);
|
||||
}
|
||||
}
|
||||
printf("},\n");
|
||||
}
|
||||
}
|
||||
printf("};\n");
|
||||
printf("\n#endif /* (RF_INCLUDE_PQ > 0) || (RF_INCLUDE_RAID6 > 0) */\n\n");
|
||||
printf("#endif /* !KERNEL && NO_PQ */\n");
|
||||
printf("#endif /* !_RF__RF_INVERTQ_H_ */\n");
|
||||
exit(0);
|
||||
}
|
|
@ -0,0 +1,890 @@
|
|||
/* $NetBSD: rf_geometry.c,v 1.1 1998/11/13 04:20:30 oster Exp $ */
|
||||
/*
|
||||
* Copyright (c) 1995 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Author: Mark Holland
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and
|
||||
* its documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
||||
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Changes:
|
||||
* 10/24/91 Changes to support disk bus contention model
|
||||
* (MCH) 1. Added media_done_time param to Access_time()
|
||||
*
|
||||
* 08/18/92 Geometry routines have been modified to support zone-bit
|
||||
* recording.
|
||||
* (AS) 1. Each routine which originally referenced the variable
|
||||
* 'disk->geom->sectors_per_track' has been modified,
|
||||
* since the number of sectors per track varies on disks
|
||||
* with zone-bit recording.
|
||||
*/
|
||||
|
||||
/* :
|
||||
* Log: rf_geometry.c,v
|
||||
* Revision 1.18 1996/08/11 00:40:57 jimz
|
||||
* fix up broken comment
|
||||
*
|
||||
* Revision 1.17 1996/07/28 20:31:39 jimz
|
||||
* i386netbsd port
|
||||
* true/false fixup
|
||||
*
|
||||
* Revision 1.16 1996/07/18 22:57:14 jimz
|
||||
* port simulator to AIX
|
||||
*
|
||||
* Revision 1.15 1996/06/07 21:33:04 jimz
|
||||
* begin using consistent types for sector numbers,
|
||||
* stripe numbers, row+col numbers, recon unit numbers
|
||||
*
|
||||
* Revision 1.14 1996/06/05 18:06:02 jimz
|
||||
* Major code cleanup. The Great Renaming is now done.
|
||||
* Better modularity. Better typing. Fixed a bunch of
|
||||
* synchronization bugs. Made a lot of global stuff
|
||||
* per-desc or per-array. Removed dead code.
|
||||
*
|
||||
* Revision 1.13 1996/05/30 23:22:16 jimz
|
||||
* bugfixes of serialization, timing problems
|
||||
* more cleanup
|
||||
*
|
||||
* Revision 1.12 1996/05/30 11:29:41 jimz
|
||||
* Numerous bug fixes. Stripe lock release code disagreed with the taking code
|
||||
* about when stripes should be locked (I made it consistent: no parity, no lock)
|
||||
* There was a lot of extra serialization of I/Os which I've removed- a lot of
|
||||
* it was to calculate values for the cache code, which is no longer with us.
|
||||
* More types, function, macro cleanup. Added code to properly quiesce the array
|
||||
* on shutdown. Made a lot of stuff array-specific which was (bogusly) general
|
||||
* before. Fixed memory allocation, freeing bugs.
|
||||
*
|
||||
* Revision 1.11 1996/05/24 22:17:04 jimz
|
||||
* continue code + namespace cleanup
|
||||
* typed a bunch of flags
|
||||
*
|
||||
* Revision 1.10 1996/05/23 00:33:23 jimz
|
||||
* code cleanup: move all debug decls to rf_options.c, all extern
|
||||
* debug decls to rf_options.h, all debug vars preceded by rf_
|
||||
*
|
||||
* Revision 1.9 1996/05/18 19:51:34 jimz
|
||||
* major code cleanup- fix syntax, make some types consistent,
|
||||
* add prototypes, clean out dead code, et cetera
|
||||
*
|
||||
* Revision 1.8 1995/12/12 18:10:06 jimz
|
||||
* MIN -> RF_MIN, MAX -> RF_MAX, ASSERT -> RF_ASSERT
|
||||
* fix 80-column brain damage in comments
|
||||
*
|
||||
* Revision 1.7 1995/12/01 18:29:34 root
|
||||
* added copyright info
|
||||
*
|
||||
*/
|
||||
|
||||
#include "rf_types.h"
|
||||
#include "rf_geometry.h"
|
||||
#include "rf_raid.h"
|
||||
#include "rf_general.h"
|
||||
#include "rf_debugMem.h"
|
||||
|
||||
#define DISK_DB "disk_db"
|
||||
#define DISK_NAME "HP2247"
|
||||
|
||||
#define ABS_DIFF(a,b) ( ((a)>(b)) ? ((a)-(b)) : ((b)-(a)) )
|
||||
|
||||
static RF_GeometryList_t *geom_list = (RF_GeometryList_t *) NULL;
|
||||
|
||||
RF_TICS_t rf_globalSpinup = 1.5;
|
||||
|
||||
#define NM_LGTH 80
|
||||
#define NM_PATN " %80s"
|
||||
|
||||
static RF_GeometryList_t *Fetch_geometry_db(FILE *fd);
|
||||
static void Format_disk(RF_DiskState_t *disk, long sectors_per_block);
|
||||
static long Find_cyl(RF_SectorNum_t block, RF_DiskState_t *disk);
|
||||
static long Find_track(RF_SectorNum_t block, RF_DiskState_t *disk);
|
||||
static long Find_phys_sector(RF_SectorNum_t block, RF_DiskState_t *disk);
|
||||
static RF_TICS_t Delay_to(RF_TICS_t cur_time, RF_SectorNum_t block,
|
||||
RF_DiskState_t *disk);
|
||||
static RF_TICS_t Seek_time(long to_cyl, long to_track, long from_cyl,
|
||||
long from_track, RF_DiskState_t *disk);
|
||||
static RF_TICS_t Seek(RF_TICS_t cur_time, RF_SectorNum_t block,
|
||||
RF_DiskState_t *disk, long update);
|
||||
static RF_TICS_t Rotate(RF_TICS_t cur_time, RF_SectorNum_t block,
|
||||
RF_DiskState_t *disk, long update);
|
||||
static RF_TICS_t Seek_Rotate(RF_TICS_t cur_time, RF_SectorNum_t block,
|
||||
RF_DiskState_t *disk, long update);
|
||||
static RF_TICS_t GAP(long sec_per_track, RF_DiskState_t *disk);
|
||||
static RF_TICS_t Block_access_time(RF_TICS_t cur_time, RF_SectorNum_t block,
|
||||
RF_SectorCount_t numblocks, RF_DiskState_t *disk, long update);
|
||||
static void Zero_stats(RF_DiskState_t *disk);
|
||||
static RF_TICS_t Update_stats(RF_TICS_t cur_time, RF_TICS_t seek, RF_TICS_t rotate,
|
||||
RF_TICS_t transfer, RF_DiskState_t *disk);
|
||||
static void rf_DiskParam(long numCyls, RF_TICS_t minSeek, RF_TICS_t avgSeek, RF_TICS_t maxSeek,
|
||||
RF_TICS_t *a, RF_TICS_t *b, RF_TICS_t *c);
|
||||
|
||||
static RF_GeometryList_t *Fetch_geometry_db(fd)
|
||||
FILE *fd;
|
||||
{
|
||||
long ret, lineno;
|
||||
char name[NM_LGTH], title[20];
|
||||
RF_GeometryList_t * list = (RF_GeometryList_t *) NULL,
|
||||
** next_ptr = & list;
|
||||
|
||||
if( RF_MAX_DISKNAME_LEN<NM_LGTH ) RF_PANIC();
|
||||
lineno = 0;
|
||||
while( (ret = fscanf( fd, " %20s", title )) != EOF ) {
|
||||
float tmp_f1, tmp_f2, tmp_f3, tmp_f4;
|
||||
float tmp_f5=0.0;
|
||||
float tmp_f6=0.0;
|
||||
RF_Geometry_t *g;
|
||||
long i, x, y, z, num_cylinders;
|
||||
RF_ZoneList_t ** znext_ptr;
|
||||
|
||||
if( ret == 1 && strncmp( "enddisk", title, 8 ) == 0 ) break;
|
||||
|
||||
RF_Calloc(*next_ptr, 1, sizeof(RF_GeometryList_t), (RF_GeometryList_t *));
|
||||
(*next_ptr)->next = (RF_GeometryList_t *) NULL;
|
||||
RF_Calloc(g, 1, sizeof(RF_Geometry_t), (RF_Geometry_t *));
|
||||
(*next_ptr)->disk = g;
|
||||
next_ptr = &( (*next_ptr)->next ); /*prep for next iteration */
|
||||
lineno++;
|
||||
if (fscanf( fd, NM_PATN, name ) != 1) {
|
||||
fprintf(stderr,"Disk DB Error: Can't get disk name from disk db\n");
|
||||
fprintf(stderr,"lineno=%d\n", lineno);
|
||||
fprintf(stderr,"name=\"%s\"\n", name);
|
||||
exit(1);
|
||||
}
|
||||
lineno++;
|
||||
if ( (fscanf(fd, " tracks per cylinder %ld", &(g->tracks_per_cyl)) != 1) || g->tracks_per_cyl <= 0) {
|
||||
fprintf(stderr,"Disk DB Error: Missing or invalid tracks/cyl for disk %s\n", name); exit(1);
|
||||
}
|
||||
lineno++;
|
||||
if ( (fscanf(fd, " number of disk zones %ld", &(g->num_zones)) != 1) || g->num_zones <= 0) {
|
||||
fprintf(stderr,"Disk DB Error: Missing or invalid number of zones for disk %s\n", name); exit(1);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* This section of code creates the linked list which
|
||||
contains the disk's zone information. */
|
||||
g->zbr_data = (RF_ZoneList_t *) NULL;
|
||||
znext_ptr = &(g->zbr_data);
|
||||
num_cylinders = 0;
|
||||
|
||||
/* This for-loop reads in the cylinder count, the sectors
|
||||
per track, and track skew for each zone on the disk. */
|
||||
for (i=1; i <= g->num_zones; i++) {
|
||||
lineno++;
|
||||
if ( (fscanf(fd, " number of cylinders in zone %ld", &x) != 1) || x < 1) {
|
||||
fprintf(stderr,"Disk DB Error: Zone %ld: Missing or invalid cyls/zone for disk %s\n", i, name); exit(1);
|
||||
}
|
||||
lineno++;
|
||||
if ( (fscanf(fd, " sectors per track in zone %ld", &y) != 1) || y < 1 ) {
|
||||
fprintf(stderr,"Disk DB Error: Zone %ld: Missing or invalid sectors/track for disk %s\n", i, name); exit(1);
|
||||
}
|
||||
lineno++;
|
||||
if ( (fscanf(fd, " track skew in zone %ld", &z) != 1) || z < 0 ) {
|
||||
fprintf(stderr,"Disk DB Error: Zone %ld: Missing or invalid track skew for disk %s\n",i, name); exit(1);
|
||||
}
|
||||
|
||||
RF_Calloc(*znext_ptr, 1, sizeof(RF_ZoneList_t), (RF_ZoneList_t *));
|
||||
(*znext_ptr)->next = (RF_ZoneList_t *) NULL;
|
||||
(*znext_ptr)->zone.num_cylinders = x;
|
||||
(*znext_ptr)->zone.sec_per_track = y;
|
||||
(*znext_ptr)->zone.track_skew = z;
|
||||
(*znext_ptr)->zone.num_sectors =
|
||||
(*znext_ptr)->zone.num_cylinders *
|
||||
g->tracks_per_cyl *
|
||||
(*znext_ptr)->zone.sec_per_track;
|
||||
znext_ptr = &((*znext_ptr)->next);
|
||||
num_cylinders = num_cylinders + x;
|
||||
} /* End of for-loop */
|
||||
|
||||
lineno++;
|
||||
if ( (fscanf(fd, " revolution time %f", &tmp_f1) != 1) || tmp_f1 <= 0) {
|
||||
fprintf(stderr,"Disk DB Error: Missing or invalid revolution time for disk %s\n",name); exit(1);
|
||||
}
|
||||
lineno++;
|
||||
if ( (fscanf(fd, " 1 cylinder seek time %f", &tmp_f2 ) != 1) || tmp_f2 <= 0) {
|
||||
fprintf(stderr,"Disk DB Error: Missing or invalid 1-cyl seek time for disk %s\n",name); exit(1);
|
||||
}
|
||||
lineno++;
|
||||
if ( (fscanf(fd, " max stroke seek time %f", &tmp_f3) != 1) || tmp_f3 <= 0) {
|
||||
fprintf(stderr,"Disk DB Error: Missing or invalid max seek time for disk %s\n",name); exit(1);
|
||||
}
|
||||
lineno++;
|
||||
if ( (fscanf(fd, " average seek time %f", &tmp_f4) != 1) || tmp_f4 <= 0) {
|
||||
fprintf(stderr,"Disk DB Error: Missing or invalid avg seek time for disk %s\n",name); exit(1);
|
||||
}
|
||||
lineno++;
|
||||
if ( (fscanf(fd, " time to sleep %f", &tmp_f5) != 1) || tmp_f4 <= 0) {
|
||||
fprintf(stderr,"Disk DB Error: Missing or invalid time to sleep for disk %s\n",name); exit(1);
|
||||
}
|
||||
lineno++;
|
||||
if ( (fscanf(fd, " time to spinup %f", &tmp_f6) != 1) || tmp_f4 <= 0) {
|
||||
fprintf(stderr,"Disk DB Error: Missing or invalid time to sleep for disk %s\n",name); exit(1);
|
||||
}
|
||||
strcpy( g->disk_name, name );
|
||||
g->revolution_time = tmp_f1;
|
||||
g->seek_one_cyl = tmp_f2;
|
||||
g->seek_max_stroke = tmp_f3;
|
||||
g->seek_avg = tmp_f4;
|
||||
g->time_to_sleep = tmp_f5;
|
||||
g->time_to_spinup = tmp_f6;
|
||||
/* convert disk specs to seek equation coeff */
|
||||
rf_DiskParam( num_cylinders, g->seek_one_cyl,
|
||||
g->seek_avg, g->seek_max_stroke,
|
||||
&g->seek_sqrt_coeff, &g->seek_linear_coeff,
|
||||
&g->seek_constant_coeff );
|
||||
}
|
||||
return( list );
|
||||
}
|
||||
|
||||
static void Format_disk(disk, sectors_per_block)
|
||||
RF_DiskState_t *disk;
|
||||
long sectors_per_block;
|
||||
{
|
||||
long sector_count = 0;
|
||||
RF_ZoneList_t *z;
|
||||
|
||||
if( disk == (RF_DiskState_t *) NULL ) RF_PANIC();
|
||||
if( disk->geom == (RF_Geometry_t *) NULL ) RF_PANIC();
|
||||
if( sectors_per_block <=0 ) RF_PANIC();
|
||||
|
||||
disk->sectors_per_block = sectors_per_block;
|
||||
z = disk->geom->zbr_data;
|
||||
/* This while-loop visits each disk zone and computes the total
|
||||
number of sectors on the disk. */
|
||||
while (z != (RF_ZoneList_t *) NULL) {
|
||||
sector_count = sector_count + (z->zone.num_cylinders *
|
||||
disk->geom->tracks_per_cyl *
|
||||
z->zone.sec_per_track);
|
||||
z = z->next;
|
||||
}
|
||||
|
||||
disk->last_block_index = (sector_count / sectors_per_block) - 1;
|
||||
}
|
||||
|
||||
void rf_InitDisk( disk, disk_db, disk_name, init_cyl, init_track, init_offset, row, col)
|
||||
RF_DiskState_t *disk;
|
||||
char *disk_db;
|
||||
char *disk_name;
|
||||
long init_cyl;
|
||||
long init_track;
|
||||
RF_TICS_t init_offset;
|
||||
int row;
|
||||
int col;
|
||||
{
|
||||
RF_GeometryList_t *gp;
|
||||
FILE *f;
|
||||
|
||||
RF_ASSERT( disk != (RF_DiskState_t *) NULL );
|
||||
|
||||
disk->cur_cyl = init_cyl;
|
||||
disk->cur_track = init_track;
|
||||
disk->index_offset = init_offset;
|
||||
disk->geom = (RF_Geometry_t *) NULL;
|
||||
disk->queueFinishTime = 0.0;
|
||||
disk->lastBlock = 0;
|
||||
disk->row=row;
|
||||
disk->col=col;
|
||||
Zero_stats(disk);
|
||||
|
||||
if (strncmp(disk_name,"/dev",4 )==0) strcpy(disk_name,"HP2247");
|
||||
|
||||
if( geom_list == (RF_GeometryList_t *) NULL ) {
|
||||
f = fopen(disk_db,"r");
|
||||
if (f == NULL) {
|
||||
fprintf(stderr, "ERROR: RAIDframe could not open disk db %s\n", disk_db);
|
||||
exit(1);
|
||||
}
|
||||
geom_list = Fetch_geometry_db( f );
|
||||
fclose( f );
|
||||
}
|
||||
for( gp = geom_list; gp != (RF_GeometryList_t *) NULL; gp = gp->next ) {
|
||||
RF_ASSERT( gp->disk != (RF_Geometry_t *) NULL
|
||||
&& gp->disk->disk_name != (char *) NULL );
|
||||
if( strncmp( disk_name, gp->disk->disk_name, RF_MAX_DISKNAME_LEN )
|
||||
== 0 ) {
|
||||
disk->geom = gp->disk;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if( disk->geom == (RF_Geometry_t *) NULL ) {
|
||||
fprintf( stderr, "Disk %s not found in database %s\n",
|
||||
disk_name, disk_db );
|
||||
exit(1);
|
||||
}
|
||||
|
||||
Format_disk( disk, 1 );
|
||||
}
|
||||
|
||||
static long Find_cyl( block, disk )
|
||||
RF_SectorNum_t block;
|
||||
RF_DiskState_t *disk;
|
||||
{
|
||||
RF_ZoneList_t * z;
|
||||
long tmp;
|
||||
|
||||
long log_sector = block * disk->sectors_per_block;
|
||||
long cylinder = 0;
|
||||
z = disk->geom->zbr_data;
|
||||
/* This while-loop finds the zone to which log_sector belongs,
|
||||
computes the starting cylinder number of this zone, and
|
||||
computes the sector offset into this zone. */
|
||||
while (log_sector >= z->zone.num_sectors) {
|
||||
log_sector = log_sector - z->zone.num_sectors;
|
||||
cylinder = cylinder + z->zone.num_cylinders;
|
||||
z = z->next;
|
||||
}
|
||||
|
||||
/* The cylinder to which log_sector belongs equals the starting
|
||||
cylinder number of its zone plus the cylinder offset into
|
||||
the zone. */
|
||||
tmp = cylinder + (log_sector / (z->zone.sec_per_track *
|
||||
disk->geom->tracks_per_cyl));
|
||||
|
||||
return( tmp );
|
||||
}
|
||||
|
||||
static long Find_track( block, disk )
|
||||
RF_SectorNum_t block;
|
||||
RF_DiskState_t *disk;
|
||||
{
|
||||
RF_ZoneList_t * z;
|
||||
long tmp;
|
||||
|
||||
long log_sector = block * disk->sectors_per_block;
|
||||
long track = 0;
|
||||
z = disk->geom->zbr_data;
|
||||
/* This while-loop finds the zone to which log_sector belongs,
|
||||
computes the starting track number of this zone, and computes
|
||||
the sector offset into this zone. */
|
||||
while (log_sector >= z->zone.num_sectors) {
|
||||
log_sector = log_sector - z->zone.num_sectors;
|
||||
track = track + (z->zone.num_cylinders *
|
||||
disk->geom->tracks_per_cyl);
|
||||
z = z->next;
|
||||
}
|
||||
|
||||
/* The track to which log_sector belongs equals the starting
|
||||
track number of its zone plus the track offset into the zone,
|
||||
modulo the number of tracks per cylinder on the disk. */
|
||||
tmp = (track + (log_sector / z->zone.sec_per_track)) %
|
||||
disk->geom->tracks_per_cyl;
|
||||
|
||||
return( tmp );
|
||||
}
|
||||
|
||||
/*
|
||||
** The position of a logical sector relative to the index mark on any track
|
||||
** is not simple. A simple organization would be:
|
||||
**
|
||||
** track 0 : 0, 1, 2, 3, ... N-1
|
||||
** track 1 : N,N+1,N+2,N+3, ... 2N-1
|
||||
** ^
|
||||
** Index mark just before this point
|
||||
**
|
||||
** This is not good because sequential access of sectors N-1 then N
|
||||
** will require a full revolution in between (because track switch requires
|
||||
** a couple of sectors to recalibrate from embedded servo). So frequently
|
||||
** sequentially numbered sectors are physically skewed so that the next
|
||||
** accessible sector after N-1 will be N (with a skew of 2)
|
||||
**
|
||||
** track 0 : 0, 1, 2, 3, ... N-1
|
||||
** track 1 : 2N-2,2N-1, N, N+1, ... 2N-3
|
||||
** ^
|
||||
** Index mark just before this point
|
||||
**
|
||||
** Layout gets even more complex with cylinder boundaries. Seek time
|
||||
** is A + B*M where M is the number of cylinders to seek over. On a sequential
|
||||
** access that crosses a cylinder boundary, the disk will rotate for
|
||||
** A+B seconds, then "track skew" sectors (inter-sector gaps actually)
|
||||
** before it can access another sector, so the cylinder to cylinder skew
|
||||
** is "track skew" + CEIL( sectors_per_track*(A+B)/revolution_time ).
|
||||
**
|
||||
** So if sector 0 is 0 sectors from the index mark on the first track,
|
||||
** where is sector X relative to the index mark on its track?
|
||||
**
|
||||
** ( ( X % sectors_per_track ) basic relative position **
|
||||
** + track_skew * ( X / sectors_per_track ) skewed for each track **
|
||||
** + CEIL( sectors_per_track*(A+B)/revolution_time )
|
||||
** * ( X / sectors_per_cylinder ) skewed more for each cyl **
|
||||
** ) % sectors_per_track wrapped around in the track **
|
||||
**
|
||||
**
|
||||
*/
|
||||
|
||||
static long Find_phys_sector(block, disk)
|
||||
RF_SectorNum_t block;
|
||||
RF_DiskState_t *disk;
|
||||
{
|
||||
long phys = 0;
|
||||
RF_ZoneList_t * z;
|
||||
long previous_spt = 1;
|
||||
long sector = block * disk->sectors_per_block;
|
||||
|
||||
z = disk->geom->zbr_data;
|
||||
/* This while-loop finds the zone to which sector belongs,
|
||||
and computes the physical sector up to that zone. */
|
||||
while (sector >= z->zone.num_sectors) {
|
||||
sector = sector - z->zone.num_sectors;
|
||||
/* By first multiplying 'phys' by the sectors per track in
|
||||
the current zone divided by the sectors per track in the
|
||||
previous zone, we convert a given physical sector in one
|
||||
zone to an equivalent physical sector in another zone. */
|
||||
phys = ((phys * z->zone.sec_per_track / previous_spt) +
|
||||
(((z->zone.num_sectors - 1) % z->zone.sec_per_track) +
|
||||
(z->zone.track_skew * z->zone.num_cylinders *
|
||||
disk->geom->tracks_per_cyl) +
|
||||
(long) ceil( (double) z->zone.sec_per_track *
|
||||
(disk->geom->seek_constant_coeff) /
|
||||
disk->geom->revolution_time) *
|
||||
z->zone.num_cylinders)) %
|
||||
z->zone.sec_per_track;
|
||||
previous_spt = z->zone.sec_per_track;
|
||||
z = z->next;
|
||||
}
|
||||
|
||||
/* The final physical sector equals the physical sector up to
|
||||
the particular zone, plus the physical sector caused by the
|
||||
sector offset into this zone. */
|
||||
phys = ((phys * z->zone.sec_per_track / previous_spt) +
|
||||
((sector % z->zone.sec_per_track) +
|
||||
(z->zone.track_skew * (sector / z->zone.sec_per_track)) +
|
||||
(long) ceil( (RF_TICS_t) z->zone.sec_per_track *
|
||||
(disk->geom->seek_constant_coeff) /
|
||||
disk->geom->revolution_time) *
|
||||
(sector / (z->zone.sec_per_track *
|
||||
disk->geom->tracks_per_cyl)))) %
|
||||
z->zone.sec_per_track;
|
||||
|
||||
|
||||
return( phys );
|
||||
}
|
||||
|
||||
/*
|
||||
** When each disk starts up, its index mark is a fraction (f) of a rotation
|
||||
** ahead from its heads (in the direction of rotation). The sector
|
||||
** under its heads is at a fraction f of a rotation from the index
|
||||
** mark. After T time has past, T/rotation_time revolutions have occured, so
|
||||
** the sector under the heads is at a fraction FRAC(f+T/rotation_time) of a
|
||||
** rotation from the index mark. If the target block is at physical sector
|
||||
** X relative to its index mark, then it is at fraction (X/sectors_per_track),
|
||||
** so the rotational delay is
|
||||
** ((X/sectors_per_track)-FRAC(f+T/rotation_time)) * revolution_time
|
||||
** if this is positive, otherwise it is
|
||||
** (1+(X/sectors_per_track)-FRAC(f+T/rotation_time)) * revolution_time
|
||||
*/
|
||||
|
||||
#define FRAC(a) ( (a) - (long) floor(a) )
|
||||
|
||||
static RF_TICS_t Delay_to(cur_time, block, disk)
|
||||
RF_TICS_t cur_time;
|
||||
RF_SectorNum_t block;
|
||||
RF_DiskState_t *disk;
|
||||
{
|
||||
RF_TICS_t tmp;
|
||||
RF_ZoneList_t *z;
|
||||
|
||||
long sector = block * disk->sectors_per_block;
|
||||
z = disk->geom->zbr_data;
|
||||
/* This while-loop finds the zone to which sector belongs. */
|
||||
while (sector >= z->zone.num_sectors) {
|
||||
sector = sector - z->zone.num_sectors;
|
||||
z = z->next;
|
||||
}
|
||||
|
||||
tmp = (
|
||||
(RF_TICS_t) Find_phys_sector(block,disk)/z->zone.sec_per_track
|
||||
- FRAC(disk->index_offset+cur_time/disk->geom->revolution_time)
|
||||
) * disk->geom->revolution_time;
|
||||
if( tmp < 0 ) tmp += disk->geom->revolution_time;
|
||||
if( tmp < 0 ) RF_PANIC();
|
||||
return( tmp );
|
||||
}
|
||||
|
||||
/* Hmmm...they seem to be computing the head switch time as
|
||||
* equal to the track skew penalty. Is this an approximation?
|
||||
* (MCH)
|
||||
*/
|
||||
static RF_TICS_t Seek_time( to_cyl, to_track, from_cyl, from_track, disk )
|
||||
long to_cyl;
|
||||
long to_track;
|
||||
long from_cyl;
|
||||
long from_track;
|
||||
RF_DiskState_t *disk;
|
||||
{
|
||||
long cyls = ABS_DIFF( from_cyl, to_cyl ) - 1;
|
||||
RF_TICS_t seek = 0.0;
|
||||
RF_ZoneList_t * z;
|
||||
|
||||
/* printf("Seek_time: from_cyl %ld, to_cyl %ld, from_trk %ld, to_trk %ld\n",from_cyl, to_cyl, from_track, to_track); */
|
||||
if( from_cyl != to_cyl ) {
|
||||
z = disk->geom->zbr_data;
|
||||
/* This while-loop finds the zone to which to_cyl belongs. */
|
||||
while (to_cyl >= z->zone.num_cylinders) {
|
||||
to_cyl = to_cyl - z->zone.num_cylinders;
|
||||
z = z->next;
|
||||
}
|
||||
|
||||
seek = disk->geom->seek_constant_coeff
|
||||
+ disk->geom->seek_linear_coeff * cyls
|
||||
+ disk->geom->seek_sqrt_coeff * sqrt( (double) cyls )
|
||||
+ z->zone.track_skew * disk->geom->revolution_time /
|
||||
z->zone.sec_per_track;
|
||||
|
||||
} else if( from_track != to_track ) {
|
||||
/* from_track and to_track must lie in the same zone. */
|
||||
z = disk->geom->zbr_data;
|
||||
/* This while-loop finds the zone to which from_cyl belongs. */
|
||||
while (from_cyl >= z->zone.num_cylinders) {
|
||||
from_cyl = from_cyl - z->zone.num_cylinders;
|
||||
z = z->next;
|
||||
}
|
||||
|
||||
seek = z->zone.track_skew
|
||||
* disk->geom->revolution_time
|
||||
/ z->zone.sec_per_track;
|
||||
}
|
||||
return( seek );
|
||||
}
|
||||
|
||||
static RF_TICS_t Seek(cur_time, block, disk, update)
|
||||
RF_TICS_t cur_time;
|
||||
RF_SectorNum_t block;
|
||||
RF_DiskState_t *disk;
|
||||
long update;
|
||||
{
|
||||
long cur_cyl, cur_track;
|
||||
/*
|
||||
** current location is derived from the time,
|
||||
** current track and current cylinder
|
||||
**
|
||||
** update current location as you go
|
||||
*/
|
||||
|
||||
RF_ASSERT( block <= disk->last_block_index );
|
||||
cur_cyl = disk->cur_cyl;
|
||||
cur_track = disk->cur_track;
|
||||
if (update) {
|
||||
disk->cur_cyl = Find_cyl( block, disk );
|
||||
disk->cur_track = Find_track( block, disk );
|
||||
}
|
||||
return( Seek_time( disk->cur_cyl, disk->cur_track,
|
||||
cur_cyl, cur_track, disk ) );
|
||||
}
|
||||
|
||||
static RF_TICS_t Rotate(cur_time, block, disk, update)
|
||||
RF_TICS_t cur_time;
|
||||
RF_SectorNum_t block;
|
||||
RF_DiskState_t *disk;
|
||||
long update;
|
||||
{
|
||||
/*
|
||||
** current location is derived from the time,
|
||||
** current track and current cylinder
|
||||
**
|
||||
** block the process until at the appropriate block
|
||||
** updating current location as you go
|
||||
*/
|
||||
|
||||
RF_ASSERT( block <= disk->last_block_index );
|
||||
return( Delay_to( cur_time, block, disk ) );
|
||||
}
|
||||
|
||||
static RF_TICS_t Seek_Rotate(cur_time, block, disk, update)
|
||||
RF_TICS_t cur_time;
|
||||
RF_SectorNum_t block;
|
||||
RF_DiskState_t *disk;
|
||||
long update;
|
||||
{
|
||||
RF_TICS_t seek, delay;
|
||||
|
||||
RF_ASSERT( block <= disk->last_block_index );
|
||||
seek = Seek( cur_time, block, disk, update );
|
||||
delay = seek + Rotate( cur_time+seek, block, disk, update );
|
||||
return( delay );
|
||||
}
|
||||
|
||||
static RF_TICS_t GAP(sec_per_track, disk)
|
||||
long sec_per_track;
|
||||
RF_DiskState_t *disk;
|
||||
{
|
||||
RF_TICS_t tmp = (disk->geom->revolution_time/(100*sec_per_track));
|
||||
return (tmp);
|
||||
}
|
||||
|
||||
RF_TICS_t Block_access_time(cur_time, block, numblocks, disk, update)
|
||||
RF_TICS_t cur_time;
|
||||
RF_SectorNum_t block;
|
||||
RF_SectorCount_t numblocks;
|
||||
RF_DiskState_t *disk;
|
||||
long update;
|
||||
{
|
||||
RF_TICS_t delay = 0;
|
||||
long cur = block, end = block + numblocks;
|
||||
long sector, tmp;
|
||||
RF_ZoneList_t * z;
|
||||
/*
|
||||
** this is the same as Seek_Rotate by merit of the mapping
|
||||
** except that the access ends before the gap to the next block
|
||||
*/
|
||||
RF_ASSERT( numblocks > 0 && end-1 <= disk->last_block_index );
|
||||
|
||||
while( cur < end ) {
|
||||
sector = cur * disk->sectors_per_block;
|
||||
z = disk->geom->zbr_data;
|
||||
/* This while-loop finds the zone to which sector belongs. */
|
||||
while (sector >= z->zone.num_sectors) {
|
||||
sector = sector - z->zone.num_sectors;
|
||||
z = z->next;
|
||||
}
|
||||
|
||||
tmp = RF_MIN( end - cur, z->zone.sec_per_track
|
||||
- cur % z->zone.sec_per_track );
|
||||
delay += tmp * disk->geom->revolution_time /
|
||||
z->zone.sec_per_track -
|
||||
GAP(z->zone.sec_per_track, disk);
|
||||
cur += tmp;
|
||||
if( cur != end )
|
||||
delay += Seek_Rotate( cur_time+delay, cur, disk, update );
|
||||
}
|
||||
return( delay );
|
||||
}
|
||||
|
||||
static void Zero_stats(disk)
|
||||
RF_DiskState_t *disk;
|
||||
{
|
||||
char traceFileName[64];
|
||||
disk->stats.num_events = 0;
|
||||
disk->stats.seek_sum = 0;
|
||||
disk->stats.seekSq_sum = 0;
|
||||
disk->stats.rotate_sum = 0;
|
||||
disk->stats.rotateSq_sum = 0;
|
||||
disk->stats.transfer_sum = 0;
|
||||
disk->stats.transferSq_sum = 0;
|
||||
disk->stats.access_sum = 0;
|
||||
disk->stats.accessSq_sum = 0;
|
||||
disk->stats.sleep_sum=0;
|
||||
disk->stats.idle_sum=0;
|
||||
disk->stats.rw_sum=0;
|
||||
disk->stats.spinup_sum=0;
|
||||
disk->stats.last_acc=0;
|
||||
if (rf_diskTrace){
|
||||
sprintf (traceFileName,"rf_diskTracer%dc%d\0",disk->row,disk->col);
|
||||
if ( (disk->traceFile= fopen(traceFileName, "w")) == NULL) {
|
||||
perror(traceFileName); RF_PANIC();}
|
||||
}
|
||||
}
|
||||
|
||||
static RF_TICS_t Update_stats(cur_time, seek, rotate, transfer, disk)
|
||||
RF_TICS_t cur_time;
|
||||
RF_TICS_t seek;
|
||||
RF_TICS_t rotate;
|
||||
RF_TICS_t transfer;
|
||||
RF_DiskState_t *disk;
|
||||
{
|
||||
RF_TICS_t spinup=0;
|
||||
RF_TICS_t sleep=0;
|
||||
RF_TICS_t idle=0;
|
||||
|
||||
disk->stats.num_events++;
|
||||
disk->stats.seek_sum += seek;
|
||||
disk->stats.seekSq_sum += seek*seek;
|
||||
disk->stats.rotate_sum += rotate;
|
||||
disk->stats.rotateSq_sum += rotate*rotate;
|
||||
disk->stats.transfer_sum += transfer;
|
||||
disk->stats.transferSq_sum += transfer*transfer;
|
||||
disk->stats.access_sum += seek+rotate+transfer;
|
||||
disk->stats.accessSq_sum +=
|
||||
(seek+rotate+transfer)*(seek+rotate+transfer);
|
||||
|
||||
/* ASSERT (cur_time - disk->stats.last_acc >= 0); */
|
||||
|
||||
if (cur_time-disk->stats.last_acc>disk->geom->time_to_sleep){
|
||||
idle=disk->geom->time_to_sleep;
|
||||
|
||||
sleep = cur_time - disk->stats.last_acc - idle;
|
||||
spinup=disk->geom->time_to_spinup;
|
||||
rf_globalSpinup = spinup;
|
||||
}
|
||||
|
||||
else{
|
||||
idle=cur_time - disk->stats.last_acc;
|
||||
}
|
||||
|
||||
|
||||
disk->stats.sleep_sum+=sleep;
|
||||
disk->stats.idle_sum+=idle;
|
||||
disk->stats.rw_sum+=seek+rotate+transfer;
|
||||
disk->stats.spinup_sum+=spinup;
|
||||
|
||||
if (rf_diskTrace){
|
||||
fprintf(disk->traceFile,"%g %g\n",disk->stats.last_acc,2.0);
|
||||
fprintf(disk->traceFile,"%g %g\n",(disk->stats.last_acc+idle),2.0);
|
||||
if (sleep){
|
||||
fprintf(disk->traceFile,"%g %g\n",(disk->stats.last_acc+idle),1.0);
|
||||
fprintf(disk->traceFile,"%g %g\n",(disk->stats.last_acc+idle+sleep),1.0);
|
||||
}
|
||||
|
||||
if (spinup){
|
||||
fprintf(disk->traceFile,"%g %g\n",(cur_time),4.0);
|
||||
fprintf(disk->traceFile,"%g %g\n",(cur_time+spinup),4.0);
|
||||
}
|
||||
|
||||
fprintf(disk->traceFile,"%g %g\n",(cur_time+spinup),3.0);
|
||||
fprintf(disk->traceFile,"%g %g\n",(cur_time+spinup+seek+rotate+transfer),3.0);
|
||||
|
||||
|
||||
}
|
||||
|
||||
disk->stats.last_acc=cur_time+spinup+seek+rotate+transfer;
|
||||
|
||||
return(spinup);
|
||||
}
|
||||
|
||||
|
||||
void rf_StopStats(disk, cur_time)
|
||||
RF_DiskState_t *disk;
|
||||
RF_TICS_t cur_time;
|
||||
{
|
||||
|
||||
RF_TICS_t sleep=0;
|
||||
RF_TICS_t idle=0;
|
||||
|
||||
if (cur_time - disk->stats.last_acc > disk->geom->time_to_sleep){
|
||||
|
||||
sleep = cur_time - disk->stats.last_acc-disk->geom->time_to_sleep;
|
||||
idle = disk->geom->time_to_sleep;
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
else{
|
||||
idle=cur_time - disk->stats.last_acc;
|
||||
}
|
||||
|
||||
disk->stats.sleep_sum+=sleep;
|
||||
disk->stats.idle_sum+=idle;
|
||||
|
||||
if (rf_diskTrace){
|
||||
fprintf(disk->traceFile,"%g %g\n",disk->stats.last_acc,2.0);
|
||||
fprintf(disk->traceFile,"%g %g\n",(disk->stats.last_acc+idle),2.0);
|
||||
if (sleep){
|
||||
fprintf(disk->traceFile,"%g %g\n",(disk->stats.last_acc+idle),1.0);
|
||||
fprintf(disk->traceFile,"%g %g\n",(disk->stats.last_acc+idle+sleep),1.0);
|
||||
}
|
||||
fclose(disk->traceFile);
|
||||
}
|
||||
}
|
||||
|
||||
/* Sometimes num_events is zero because the disk was failed at the start
|
||||
* of the simulation and never replaced. This causes a crash on some
|
||||
* architectures, which is why we have the conditional.
|
||||
*/
|
||||
void rf_Report_stats(
|
||||
RF_DiskState_t *disk,
|
||||
long *numEventsPtr,
|
||||
RF_TICS_t *avgSeekPtr,
|
||||
RF_TICS_t *avgRotatePtr,
|
||||
RF_TICS_t *avgTransferPtr,
|
||||
RF_TICS_t *avgAccessPtr,
|
||||
RF_TICS_t *SleepPtr,
|
||||
RF_TICS_t *IdlePtr,
|
||||
RF_TICS_t *RwPtr,
|
||||
RF_TICS_t *SpinupPtr)
|
||||
{
|
||||
*numEventsPtr = disk->stats.num_events;
|
||||
if (disk->stats.num_events) {
|
||||
*avgSeekPtr = disk->stats.seek_sum / disk->stats.num_events;
|
||||
*avgRotatePtr = disk->stats.rotate_sum / disk->stats.num_events;
|
||||
*avgTransferPtr = disk->stats.transfer_sum / disk->stats.num_events;
|
||||
*avgAccessPtr = disk->stats.access_sum / disk->stats.num_events;
|
||||
} else {
|
||||
*avgSeekPtr = 0;
|
||||
*avgRotatePtr = 0;
|
||||
*avgTransferPtr = 0;
|
||||
*avgAccessPtr = 0;
|
||||
}
|
||||
*SleepPtr = disk->stats.sleep_sum;
|
||||
*IdlePtr = disk->stats.idle_sum;
|
||||
*RwPtr = disk->stats.rw_sum ;
|
||||
*SpinupPtr = disk->stats.spinup_sum ;
|
||||
}
|
||||
|
||||
int rf_Access_time( access_time, cur_time, block, numblocks, disk, media_done_time, update )
|
||||
RF_TICS_t *access_time;
|
||||
RF_TICS_t cur_time;
|
||||
RF_SectorNum_t block;
|
||||
RF_SectorCount_t numblocks;
|
||||
RF_DiskState_t *disk;
|
||||
RF_TICS_t *media_done_time;
|
||||
long update; /* 1 => update disk state, 0 => don't */
|
||||
{
|
||||
/*
|
||||
* first move to the start of the data, then sweep to the end
|
||||
*/
|
||||
RF_TICS_t spinup=0;
|
||||
RF_TICS_t seek = Seek( cur_time, block, disk, update );
|
||||
RF_TICS_t rotate = Rotate( cur_time+seek, block, disk, update );
|
||||
RF_TICS_t transfer = Block_access_time( cur_time+seek+rotate, block,
|
||||
numblocks, disk, update );
|
||||
|
||||
if (update) spinup=Update_stats(cur_time, seek, rotate, transfer, disk );
|
||||
*media_done_time = seek+rotate+transfer;
|
||||
*access_time =( seek+rotate+transfer+spinup);
|
||||
return(0);
|
||||
}
|
||||
|
||||
/* added to take into account the fact that maping code acounts for the disk label */
|
||||
|
||||
void rf_GeometryDoReadCapacity(disk, numBlocks, blockSize)
|
||||
RF_DiskState_t *disk;
|
||||
RF_SectorCount_t *numBlocks;
|
||||
int *blockSize;
|
||||
{
|
||||
*numBlocks= (disk->last_block_index + 1 )-rf_protectedSectors;
|
||||
|
||||
*blockSize= (disk->sectors_per_block*512 );
|
||||
|
||||
/* in bytes */
|
||||
}
|
||||
|
||||
|
||||
/* END GEOMETRY ROUTINES **********************************************/
|
||||
|
||||
|
||||
static void rf_DiskParam(numCyls, minSeek, avgSeek, maxSeek, a, b, c)
|
||||
long numCyls;
|
||||
RF_TICS_t minSeek;
|
||||
RF_TICS_t avgSeek;
|
||||
RF_TICS_t maxSeek;
|
||||
RF_TICS_t *a;
|
||||
RF_TICS_t *b;
|
||||
RF_TICS_t *c;
|
||||
{
|
||||
if (minSeek == avgSeek && minSeek == maxSeek) {
|
||||
*a = 0.0; *b = 0.0; *c = minSeek;
|
||||
} else {
|
||||
*a = ( 15 * avgSeek - 10 * minSeek - 5 * maxSeek ) / ( 3 * sqrt( (double) numCyls ));
|
||||
*b = ( 7 * minSeek + 8 * maxSeek - 15 * avgSeek ) / ( 3 * numCyls );
|
||||
*c = minSeek;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,154 @@
|
|||
/* $NetBSD: rf_geometry.h,v 1.1 1998/11/13 04:20:30 oster Exp $ */
|
||||
/* geometry.h
|
||||
* code from raidSim to model disk behavior
|
||||
*/
|
||||
/*
|
||||
* Changes:
|
||||
* 8/18/92 Additional structures have been declared and existing
|
||||
* structures have been modified in order to support zone-
|
||||
* bit recording.
|
||||
* (AS) 1. The types 'Zone_data' and 'Zone_list' have been defined.
|
||||
* (AS) 2. The type 'Geometry' has been modified.
|
||||
*/
|
||||
|
||||
/* :
|
||||
* Log: rf_geometry.h,v
|
||||
* Revision 1.10 1996/08/06 22:25:08 jimz
|
||||
* include raidframe stuff before system stuff
|
||||
*
|
||||
* Revision 1.9 1996/06/07 21:33:04 jimz
|
||||
* begin using consistent types for sector numbers,
|
||||
* stripe numbers, row+col numbers, recon unit numbers
|
||||
*
|
||||
* Revision 1.8 1996/05/31 10:16:14 jimz
|
||||
* add raidsim note
|
||||
*
|
||||
* Revision 1.7 1996/05/30 23:22:16 jimz
|
||||
* bugfixes of serialization, timing problems
|
||||
* more cleanup
|
||||
*
|
||||
* Revision 1.6 1996/05/30 11:29:41 jimz
|
||||
* Numerous bug fixes. Stripe lock release code disagreed with the taking code
|
||||
* about when stripes should be locked (I made it consistent: no parity, no lock)
|
||||
* There was a lot of extra serialization of I/Os which I've removed- a lot of
|
||||
* it was to calculate values for the cache code, which is no longer with us.
|
||||
* More types, function, macro cleanup. Added code to properly quiesce the array
|
||||
* on shutdown. Made a lot of stuff array-specific which was (bogusly) general
|
||||
* before. Fixed memory allocation, freeing bugs.
|
||||
*
|
||||
* Revision 1.5 1996/05/18 19:51:34 jimz
|
||||
* major code cleanup- fix syntax, make some types consistent,
|
||||
* add prototypes, clean out dead code, et cetera
|
||||
*
|
||||
* Revision 1.4 1995/12/01 18:29:45 root
|
||||
* added copyright info
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _RF__RF_GEOMETRY_H_
|
||||
#define _RF__RF_GEOMETRY_H_
|
||||
|
||||
#include "rf_types.h"
|
||||
#include "rf_sys.h"
|
||||
#ifndef _KERNEL
|
||||
#include <string.h>
|
||||
#include <math.h>
|
||||
#ifdef __NetBSD__
|
||||
#include <stdio.h>
|
||||
#endif /* __NetBSD__ */
|
||||
#endif
|
||||
|
||||
#define RF_MAX_DISKNAME_LEN 80
|
||||
|
||||
typedef struct RF_ZoneData_s {
|
||||
long num_cylinders; /* Number of cylinders in zone */
|
||||
long sec_per_track; /* Sectors per track in zone */
|
||||
long track_skew; /* Skew of each track in zone */
|
||||
long num_sectors; /* Number of sectors in zone */
|
||||
} RF_ZoneData_t;
|
||||
|
||||
/*
|
||||
* Linked list containing zone data
|
||||
*/
|
||||
typedef struct RF_ZoneList_s RF_ZoneList_t;
|
||||
struct RF_ZoneList_s {
|
||||
RF_ZoneData_t zone; /* for each disk */
|
||||
RF_ZoneList_t *next;
|
||||
};
|
||||
|
||||
typedef struct RF_Geometry_s {
|
||||
char disk_name[RF_MAX_DISKNAME_LEN]; /* name for a type of disk */
|
||||
long tracks_per_cyl; /* tracks in a cylinder */
|
||||
/* assume 1 head per track, 1 set of read/write electronics */
|
||||
long num_zones; /* number of ZBR zones on disk */
|
||||
RF_TICS_t revolution_time; /* milliseconds per revolution */
|
||||
RF_TICS_t seek_one_cyl; /* adjacent cylinder seek time */
|
||||
RF_TICS_t seek_max_stroke; /* end to end seek time */
|
||||
RF_TICS_t seek_avg; /* random from/to average time */
|
||||
/*
|
||||
* seek time = a * (x-1)^0.5 + b * (x-1) + c
|
||||
* x >= 1 is the seek distance in cylinders
|
||||
*/
|
||||
RF_TICS_t seek_sqrt_coeff; /* a */
|
||||
RF_TICS_t seek_linear_coeff; /* b */
|
||||
RF_TICS_t seek_constant_coeff; /* c */
|
||||
RF_ZoneList_t *zbr_data; /* linked list with ZBR data */
|
||||
RF_TICS_t time_to_sleep; /* seconds of idle time before disks goes to sleep */
|
||||
RF_TICS_t time_to_spinup; /* seconds spin up takes */
|
||||
} RF_Geometry_t;
|
||||
|
||||
typedef struct RF_GeometryList_s RF_GeometryList_t;
|
||||
struct RF_GeometryList_s {
|
||||
RF_Geometry_t *disk;
|
||||
RF_GeometryList_t *next;
|
||||
};
|
||||
|
||||
typedef struct RF_DiskStats_s {
|
||||
long num_events;
|
||||
RF_TICS_t seek_sum;
|
||||
RF_TICS_t seekSq_sum;
|
||||
RF_TICS_t rotate_sum;
|
||||
RF_TICS_t rotateSq_sum;
|
||||
RF_TICS_t transfer_sum;
|
||||
RF_TICS_t transferSq_sum;
|
||||
RF_TICS_t access_sum;
|
||||
RF_TICS_t accessSq_sum;
|
||||
RF_TICS_t sleep_sum;
|
||||
RF_TICS_t idle_sum;
|
||||
RF_TICS_t rw_sum;
|
||||
RF_TICS_t spinup_sum;
|
||||
RF_TICS_t last_acc; /* time the last acces was finished */
|
||||
} RF_DiskStats_t;
|
||||
|
||||
struct RF_DiskState_s {
|
||||
int row;
|
||||
int col;
|
||||
RF_Geometry_t *geom;
|
||||
long sectors_per_block; /* formatted per disk */
|
||||
long last_block_index; /* format result for convenience */
|
||||
RF_TICS_t index_offset; /* powerup head offset to index mark */
|
||||
long cur_track; /* current track */
|
||||
long cur_cyl; /* current cylinder */
|
||||
RF_DiskStats_t stats; /* disk statistics */
|
||||
|
||||
RF_TICS_t queueFinishTime; /* used by shortest-seek code */
|
||||
long lastBlock;
|
||||
FILE *traceFile;
|
||||
};
|
||||
typedef struct RF_DiskState_s RF_DiskState_t;
|
||||
|
||||
extern RF_TICS_t rf_globalSpinup;
|
||||
|
||||
void rf_InitDisk(RF_DiskState_t *disk, char *disk_name, char *disk_db, long init_cyl,
|
||||
long init_track, RF_TICS_t init_offset, int row, int col);
|
||||
void rf_StopStats(RF_DiskState_t *disk, RF_TICS_t cur_time);
|
||||
void rf_Report_stats(RF_DiskState_t *disk, long *numEventsPtr, RF_TICS_t *avgSeekPtr,
|
||||
RF_TICS_t *avgRotatePtr, RF_TICS_t *avgTransferPtr, RF_TICS_t *avgAccessPtr,
|
||||
RF_TICS_t *SleepPtr, RF_TICS_t *IdlePtr, RF_TICS_t *RwPtr, RF_TICS_t *SpinupPtr);
|
||||
int rf_Access_time(RF_TICS_t *access_time, RF_TICS_t cur_time,
|
||||
RF_SectorNum_t block, RF_SectorCount_t numblocks, RF_DiskState_t *disk,
|
||||
RF_TICS_t *media_done_time, long update);
|
||||
void rf_GeometryDoReadCapacity(RF_DiskState_t *disk, RF_SectorCount_t *numBlocks,
|
||||
int *blockSize);
|
||||
|
||||
#endif /* !_RF__RF_GEOMETRY_H_ */
|
|
@ -0,0 +1,273 @@
|
|||
/* $NetBSD: rf_heap.c,v 1.1 1998/11/13 04:20:30 oster Exp $ */
|
||||
/*
|
||||
* Copyright (c) 1995 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Author: Mark Holland
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and
|
||||
* its documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
||||
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*/
|
||||
|
||||
/* We manage a heap of data,key pairs, where the key a simple data type
|
||||
* and the data is any singular data type. We allow the caller to add
|
||||
* pairs, remote pairs, peek at the top pair, and do delete/add combinations.
|
||||
* The latter are efficient because we only reheap once.
|
||||
*
|
||||
* David Kotz 1990? and 1993
|
||||
*
|
||||
* Modify the heap to work with events, with the smallest time on the top.
|
||||
* Song Bac Toh, 1994
|
||||
*/
|
||||
|
||||
/* :
|
||||
* Log: rf_heap.c,v
|
||||
* Revision 1.8 1996/07/28 20:31:39 jimz
|
||||
* i386netbsd port
|
||||
* true/false fixup
|
||||
*
|
||||
* Revision 1.7 1996/06/09 02:36:46 jimz
|
||||
* lots of little crufty cleanup- fixup whitespace
|
||||
* issues, comment #ifdefs, improve typing in some
|
||||
* places (esp size-related)
|
||||
*
|
||||
* Revision 1.6 1996/05/30 11:29:41 jimz
|
||||
* Numerous bug fixes. Stripe lock release code disagreed with the taking code
|
||||
* about when stripes should be locked (I made it consistent: no parity, no lock)
|
||||
* There was a lot of extra serialization of I/Os which I've removed- a lot of
|
||||
* it was to calculate values for the cache code, which is no longer with us.
|
||||
* More types, function, macro cleanup. Added code to properly quiesce the array
|
||||
* on shutdown. Made a lot of stuff array-specific which was (bogusly) general
|
||||
* before. Fixed memory allocation, freeing bugs.
|
||||
*
|
||||
* Revision 1.5 1996/05/27 18:56:37 jimz
|
||||
* more code cleanup
|
||||
* better typing
|
||||
* compiles in all 3 environments
|
||||
*
|
||||
* Revision 1.4 1995/12/01 19:03:58 root
|
||||
* added copyright info
|
||||
*
|
||||
*/
|
||||
|
||||
#include "rf_types.h"
|
||||
#include "rf_heap.h"
|
||||
#include "rf_general.h"
|
||||
|
||||
/* return RF_TRUE if the two requests in the heap match */
|
||||
#define Matching_REQUESTS(HeapData1, HeapData2) \
|
||||
((HeapData1->disk == HeapData2->disk) && \
|
||||
(HeapData1->req_code == HeapData2->req_code))
|
||||
|
||||
/* getting around in the heap */
|
||||
/* we don't use the 0th element of the array */
|
||||
#define ROOT 1
|
||||
#define LCHILD(p) (2 * (p))
|
||||
#define RCHILD(p) (2 * (p) + 1)
|
||||
#define PARENT(c) ((c) / 2)
|
||||
|
||||
/* @SUBTITLE "Debugging macros" */
|
||||
/* The following are used for debugging our callers
|
||||
* as well as internal stuff
|
||||
*/
|
||||
|
||||
#define CHECK_INVARIANTS 1
|
||||
|
||||
#ifdef CHECK_INVARIANTS
|
||||
#define INVARIANT2(x, y) \
|
||||
{ \
|
||||
if (!(x)) { \
|
||||
fprintf(stderr, "INVARIANT false: in \"%s\", line %d\n", \
|
||||
__FILE__, __LINE__); \
|
||||
fprintf(stderr, (y)); \
|
||||
exit(1); \
|
||||
} \
|
||||
}
|
||||
|
||||
/*
|
||||
#define INVARIANT3(x, y, z) \
|
||||
{ \
|
||||
if (!(x)) { \
|
||||
fprintf(stderr, "INVARIANT false: in \"%s\", line %d\n", \
|
||||
__FILE__, __LINE__); \
|
||||
fprintf(stderr, (y), (z)); \
|
||||
exit(1); \
|
||||
} \
|
||||
}
|
||||
*/
|
||||
#else /* CHECK_INVARIANTS */
|
||||
/* #define INVARIANT2(x, y) */
|
||||
/* #define INVARIANT3(x, y, z) already defined in modularize.h */
|
||||
#endif /* CHECK_INVARIANTS */
|
||||
|
||||
/**** Rachad, must add to general debug structure */
|
||||
|
||||
|
||||
/* @SUBTITLE "InitHeap: Allocate a new heap" */
|
||||
/* might return NULL if no free memory */
|
||||
RF_Heap_t rf_InitHeap(int maxsize)
|
||||
{
|
||||
RF_Heap_t hp;
|
||||
|
||||
RF_ASSERT(maxsize > 0);
|
||||
RF_Malloc(hp, sizeof(struct RF_Heap_s),(RF_Heap_t));
|
||||
if (hp == NULL) {
|
||||
fprintf(stderr, "InitHeap: No memory for heap\n");
|
||||
return(NULL);
|
||||
}
|
||||
|
||||
RF_Malloc(hp->heap,sizeof(RF_HeapEntry_t)*(maxsize+1),(RF_HeapEntry_t *));
|
||||
if (hp->heap == NULL) {
|
||||
fprintf(stderr, "InitHeap: No memory for heap of %d elements\n",
|
||||
maxsize);
|
||||
RF_Free(hp,-1); /* -1 means don't cause an error if the size does not match */
|
||||
return(NULL);
|
||||
}
|
||||
|
||||
hp->numheap = 0;
|
||||
hp->maxsize = maxsize;
|
||||
|
||||
return(hp);
|
||||
}
|
||||
|
||||
/* @SUBTITLE "FreeHeap: delete a heap" */
|
||||
void rf_FreeHeap(RF_Heap_t hp)
|
||||
{
|
||||
if (hp != NULL) {
|
||||
RF_Free(hp->heap,sizeof(RF_HeapEntry_t)*(hp->maxsize+1));
|
||||
RF_Free(hp,sizeof(struct RF_Heap_s));
|
||||
}
|
||||
}
|
||||
|
||||
/* @SUBTITLE "AddHeap: Add an element to the heap" */
|
||||
void rf_AddHeap(RF_Heap_t hp, RF_HeapData_t *data, RF_HeapKey_t key)
|
||||
{
|
||||
int node;
|
||||
|
||||
INVARIANT2(hp != NULL, "AddHeap: NULL heap\n");
|
||||
INVARIANT2((hp->numheap < RF_HEAP_MAX), "AddHeap: Heap overflowed\n");
|
||||
|
||||
/* use new space end of heap */
|
||||
node = ++(hp->numheap);
|
||||
|
||||
/* and reheap */
|
||||
while (node != ROOT && hp->heap[PARENT(node)].key > key) {
|
||||
hp->heap[node] = hp->heap[PARENT(node)];
|
||||
node = PARENT(node);
|
||||
}
|
||||
|
||||
hp->heap[node].data = data;
|
||||
hp->heap[node].key = key;
|
||||
}
|
||||
|
||||
/* @SUBTITLE "TopHeap: Return top element of heap" */
|
||||
int rf_TopHeap(RF_Heap_t hp, RF_HeapData_t **data, RF_HeapKey_t *key)
|
||||
{
|
||||
INVARIANT2(hp != NULL, "TopHeap: NULL heap\n");
|
||||
|
||||
if (hp->numheap > 0) {
|
||||
if (data)
|
||||
*data = hp->heap[ROOT].data;
|
||||
if (key)
|
||||
*key = hp->heap[ROOT].key;
|
||||
return(RF_HEAP_FOUND);
|
||||
}
|
||||
else {
|
||||
return(RF_HEAP_NONE);
|
||||
}
|
||||
}
|
||||
|
||||
/* @SUBTITLE "RepHeap: Replace top of heap with given element and reheap" */
|
||||
/* note that hp->numheap does not change, and should already be > 0 */
|
||||
void rf_RepHeap(RF_Heap_t hp, RF_HeapData_t *data, RF_HeapKey_t key)
|
||||
{
|
||||
int node; /* node in heap */
|
||||
int lchild, rchild; /* left and right children of node */
|
||||
int left, right; /* left and right children exist? */
|
||||
int swapped; /* swap was made? */
|
||||
RF_HeapEntry_t *heap; /* pointer to the base of this heap array */
|
||||
|
||||
INVARIANT2(hp != NULL, "RepHeap: NULL heap\n");
|
||||
|
||||
/* If heap is empty just add this element */
|
||||
/* if used properly this case should never come up */
|
||||
if (hp->numheap == 0) {
|
||||
rf_AddHeap(hp, data, key);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
heap = hp->heap; /* cache the heap base pointer */
|
||||
|
||||
node = ROOT;
|
||||
|
||||
do {
|
||||
lchild = LCHILD(node);
|
||||
rchild = RCHILD(node);
|
||||
left = (lchild <= hp->numheap);
|
||||
right = (rchild <= hp->numheap);
|
||||
|
||||
/* Both children exist: which is smaller? */
|
||||
if (left && right)
|
||||
if (heap[lchild].key < heap[rchild].key)
|
||||
right = RF_HEAP_NONE;
|
||||
else
|
||||
left = RF_HEAP_NONE;
|
||||
|
||||
/* Now only one of left and right is true. compare it with us */
|
||||
if (left && heap[lchild].key < key) {
|
||||
/* swap with left child */
|
||||
heap[node] = heap[lchild];
|
||||
node = lchild;
|
||||
swapped = RF_HEAP_FOUND;
|
||||
} else if (right && heap[rchild].key < key) {
|
||||
/* swap with right child */
|
||||
heap[node] = heap[rchild];
|
||||
node = rchild;
|
||||
swapped = RF_HEAP_FOUND;
|
||||
} else
|
||||
swapped = RF_HEAP_NONE;
|
||||
} while (swapped);
|
||||
|
||||
/* final resting place for new element */
|
||||
heap[node].key = key;
|
||||
heap[node].data = data;
|
||||
}
|
||||
|
||||
/* @SUBTITLE "RemHeap: Remove top element and reheap" */
|
||||
int rf_RemHeap(RF_Heap_t hp, RF_HeapData_t **data, RF_HeapKey_t *key)
|
||||
{
|
||||
int node;
|
||||
|
||||
/* we don't check hp's validity because TopHeap will do it for us */
|
||||
|
||||
/* get the top element into data and key, if any */
|
||||
if (rf_TopHeap(hp, data, key)) {
|
||||
/* there was something there, so replace top with last element */
|
||||
node = hp->numheap--;
|
||||
if (hp->numheap > 0)
|
||||
rf_RepHeap(hp, hp->heap[node].data, hp->heap[node].key);
|
||||
|
||||
return(RF_HEAP_FOUND);
|
||||
} else{
|
||||
return(RF_HEAP_NONE);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,127 @@
|
|||
/* $NetBSD: rf_heap.h,v 1.1 1998/11/13 04:20:30 oster Exp $ */
|
||||
/*
|
||||
* Copyright (c) 1995 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Author: Mark Holland
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and
|
||||
* its documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
||||
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*/
|
||||
|
||||
/* @TITLE "heap.h - interface to heap management implementation */
|
||||
/* We manage a heap of data,key pairs, where the key could be any
|
||||
* simple data type
|
||||
* and the data is any pointer data type. We allow the caller to add
|
||||
* pairs, remote pairs, peek at the top pair, and do delete/add combinations.
|
||||
* The latter are efficient because we only reheap once.
|
||||
*
|
||||
* David Kotz 1990? and 1993
|
||||
*/
|
||||
|
||||
/* :
|
||||
* Log: rf_heap.h,v
|
||||
* Revision 1.8 1996/05/30 11:29:41 jimz
|
||||
* Numerous bug fixes. Stripe lock release code disagreed with the taking code
|
||||
* about when stripes should be locked (I made it consistent: no parity, no lock)
|
||||
* There was a lot of extra serialization of I/Os which I've removed- a lot of
|
||||
* it was to calculate values for the cache code, which is no longer with us.
|
||||
* More types, function, macro cleanup. Added code to properly quiesce the array
|
||||
* on shutdown. Made a lot of stuff array-specific which was (bogusly) general
|
||||
* before. Fixed memory allocation, freeing bugs.
|
||||
*
|
||||
* Revision 1.7 1996/05/27 18:56:37 jimz
|
||||
* more code cleanup
|
||||
* better typing
|
||||
* compiles in all 3 environments
|
||||
*
|
||||
* Revision 1.6 1996/05/23 21:46:35 jimz
|
||||
* checkpoint in code cleanup (release prep)
|
||||
* lots of types, function names have been fixed
|
||||
*
|
||||
* Revision 1.5 1995/12/01 19:04:07 root
|
||||
* added copyright info
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _RF__RF_HEAP_H_
|
||||
#define _RF__RF_HEAP_H_
|
||||
|
||||
#include "rf_types.h"
|
||||
#include "rf_raid.h"
|
||||
#include "rf_dag.h"
|
||||
#include "rf_desc.h"
|
||||
|
||||
#define RF_HEAP_MAX 10240
|
||||
|
||||
#define RF_HEAP_FOUND 1
|
||||
#define RF_HEAP_NONE 0
|
||||
|
||||
typedef RF_TICS_t RF_HeapKey_t;
|
||||
|
||||
typedef struct RF_HeapData_s RF_HeapData_t;
|
||||
typedef struct RF_Heap_s *RF_Heap_t;
|
||||
typedef struct RF_HeapEntry_s RF_HeapEntry_t;
|
||||
|
||||
/* heap data */
|
||||
struct RF_HeapData_s {
|
||||
RF_TICS_t eventTime;
|
||||
int disk;
|
||||
int (*CompleteFunc)(); /* function to be called upon completion */
|
||||
void *argument; /* argument to be passed to CompleteFunc */
|
||||
int owner; /* which task is resposable for this request */
|
||||
int row;
|
||||
int col; /* coordinates of disk */
|
||||
RF_Raid_t *raidPtr;
|
||||
void *diskid;
|
||||
/* Dag event */
|
||||
RF_RaidAccessDesc_t *desc;
|
||||
};
|
||||
|
||||
struct RF_HeapEntry_s {
|
||||
RF_HeapData_t *data; /* the arbitrary data */
|
||||
RF_HeapKey_t key; /* key for comparison */
|
||||
};
|
||||
|
||||
struct RF_Heap_s {
|
||||
RF_HeapEntry_t *heap; /* the heap in use (an array) */
|
||||
int numheap; /* number of elements in heap */
|
||||
int maxsize;
|
||||
};
|
||||
|
||||
/* set up heap to hold maxsize nodes */
|
||||
RF_Heap_t rf_InitHeap(int maxsize);
|
||||
|
||||
/* delete a heap data structure */
|
||||
void rf_FreeHeap(RF_Heap_t hp);
|
||||
|
||||
/* add the element to the heap */
|
||||
void rf_AddHeap(RF_Heap_t hp, RF_HeapData_t *data, RF_HeapKey_t key);
|
||||
|
||||
/* return top of the heap, without removing it from heap (FALSE if empty) */
|
||||
int rf_TopHeap(RF_Heap_t hp, RF_HeapData_t **data, RF_HeapKey_t *key);
|
||||
|
||||
/* replace the heap's top item with a new item, and reheap */
|
||||
void rf_RepHeap(RF_Heap_t hp, RF_HeapData_t *data, RF_HeapKey_t key);
|
||||
|
||||
/* remove the heap's top item, if any (FALSE if empty heap) */
|
||||
int rf_RemHeap(RF_Heap_t hp, RF_HeapData_t **data, RF_HeapKey_t *key);
|
||||
|
||||
#endif /* !_RF__RF_HEAP_H_ */
|
|
@ -0,0 +1,72 @@
|
|||
/* $NetBSD: rf_hist.h,v 1.1 1998/11/13 04:20:30 oster Exp $ */
|
||||
/*
|
||||
* rf_hist.h
|
||||
*
|
||||
* Histgram operations for RAIDframe stats
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 1995 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Author: Jim Zelenka
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and
|
||||
* its documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
||||
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*/
|
||||
/* :
|
||||
* Log: rf_hist.h,v
|
||||
* Revision 1.3 1996/06/09 02:36:46 jimz
|
||||
* lots of little crufty cleanup- fixup whitespace
|
||||
* issues, comment #ifdefs, improve typing in some
|
||||
* places (esp size-related)
|
||||
*
|
||||
* Revision 1.2 1996/05/31 22:26:54 jimz
|
||||
* fix a lot of mapping problems, memory allocation problems
|
||||
* found some weird lock issues, fixed 'em
|
||||
* more code cleanup
|
||||
*
|
||||
* Revision 1.1 1996/05/31 10:33:05 jimz
|
||||
* Initial revision
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _RF__RF_HIST_H_
|
||||
#define _RF__RF_HIST_H_
|
||||
|
||||
#include "rf_types.h"
|
||||
|
||||
#define RF_HIST_RESOLUTION 5
|
||||
#define RF_HIST_MIN_VAL 0
|
||||
#define RF_HIST_MAX_VAL 1000
|
||||
#define RF_HIST_RANGE (RF_HIST_MAX_VAL - RF_HIST_MIN_VAL)
|
||||
#define RF_HIST_NUM_BUCKETS (RF_HIST_RANGE / RF_HIST_RESOLUTION + 1)
|
||||
|
||||
typedef RF_uint32 RF_Hist_t;
|
||||
|
||||
#define RF_HIST_ADD(_hist_,_val_) { \
|
||||
RF_Hist_t val; \
|
||||
val = ((RF_Hist_t)(_val_)) / 1000; \
|
||||
if (val >= RF_HIST_MAX_VAL) \
|
||||
_hist_[RF_HIST_NUM_BUCKETS-1]++; \
|
||||
else \
|
||||
_hist_[(val - RF_HIST_MIN_VAL) / RF_HIST_RESOLUTION]++; \
|
||||
}
|
||||
|
||||
#endif /* !_RF__RF_HIST_H_ */
|
|
@ -0,0 +1,360 @@
|
|||
/* $NetBSD: rf_interdecluster.c,v 1.1 1998/11/13 04:20:30 oster Exp $ */
|
||||
/*
|
||||
* Copyright (c) 1995 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Author: Khalil Amiri
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and
|
||||
* its documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
||||
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*/
|
||||
|
||||
/************************************************************
|
||||
*
|
||||
* rf_interdecluster.c -- implements interleaved declustering
|
||||
*
|
||||
************************************************************/
|
||||
|
||||
/* :
|
||||
* Log: rf_interdecluster.c,v
|
||||
* Revision 1.24 1996/08/02 13:20:38 jimz
|
||||
* get rid of bogus (long) casts
|
||||
*
|
||||
* Revision 1.23 1996/07/31 16:56:18 jimz
|
||||
* dataBytesPerStripe, sectorsPerDisk init arch-indep.
|
||||
*
|
||||
* Revision 1.22 1996/07/29 14:05:12 jimz
|
||||
* fix numPUs/numRUs confusion (everything is now numRUs)
|
||||
* clean up some commenting, return values
|
||||
*
|
||||
* Revision 1.21 1996/07/22 19:52:16 jimz
|
||||
* switched node params to RF_DagParam_t, a union of
|
||||
* a 64-bit int and a void *, for better portability
|
||||
* attempted hpux port, but failed partway through for
|
||||
* lack of a single C compiler capable of compiling all
|
||||
* source files
|
||||
*
|
||||
* Revision 1.20 1996/07/18 22:57:14 jimz
|
||||
* port simulator to AIX
|
||||
*
|
||||
* Revision 1.19 1996/07/13 00:00:59 jimz
|
||||
* sanitized generalized reconstruction architecture
|
||||
* cleaned up head sep, rbuf problems
|
||||
*
|
||||
* Revision 1.18 1996/06/19 17:53:48 jimz
|
||||
* move GetNumSparePUs, InstallSpareTable ops into layout switch
|
||||
*
|
||||
* Revision 1.17 1996/06/11 15:17:55 wvcii
|
||||
* added include of rf_interdecluster.h
|
||||
* fixed parameter list of rf_ConfigureInterDecluster
|
||||
* fixed return type of rf_GetNumSparePUsInterDecluster
|
||||
* removed include of rf_raid1.h
|
||||
*
|
||||
* Revision 1.16 1996/06/11 08:55:15 jimz
|
||||
* improved error-checking at configuration time
|
||||
*
|
||||
* Revision 1.15 1996/06/10 11:55:47 jimz
|
||||
* Straightened out some per-array/not-per-array distinctions, fixed
|
||||
* a couple bugs related to confusion. Added shutdown lists. Removed
|
||||
* layout shutdown function (now subsumed by shutdown lists).
|
||||
*
|
||||
* Revision 1.14 1996/06/07 22:26:27 jimz
|
||||
* type-ify which_ru (RF_ReconUnitNum_t)
|
||||
*
|
||||
* Revision 1.13 1996/06/07 21:33:04 jimz
|
||||
* begin using consistent types for sector numbers,
|
||||
* stripe numbers, row+col numbers, recon unit numbers
|
||||
*
|
||||
* Revision 1.12 1996/06/06 18:41:48 jimz
|
||||
* add interleaved declustering dag selection
|
||||
*
|
||||
* Revision 1.11 1996/06/02 17:31:48 jimz
|
||||
* Moved a lot of global stuff into array structure, where it belongs.
|
||||
* Fixed up paritylogging, pss modules in this manner. Some general
|
||||
* code cleanup. Removed lots of dead code, some dead files.
|
||||
*
|
||||
* Revision 1.10 1996/05/31 22:26:54 jimz
|
||||
* fix a lot of mapping problems, memory allocation problems
|
||||
* found some weird lock issues, fixed 'em
|
||||
* more code cleanup
|
||||
*
|
||||
* Revision 1.9 1996/05/31 05:03:01 amiri
|
||||
* fixed a bug related to sparing layout.
|
||||
*
|
||||
* Revision 1.8 1996/05/27 18:56:37 jimz
|
||||
* more code cleanup
|
||||
* better typing
|
||||
* compiles in all 3 environments
|
||||
*
|
||||
* Revision 1.7 1996/05/24 01:59:45 jimz
|
||||
* another checkpoint in code cleanup for release
|
||||
* time to sync kernel tree
|
||||
*
|
||||
* Revision 1.6 1996/05/23 00:33:23 jimz
|
||||
* code cleanup: move all debug decls to rf_options.c, all extern
|
||||
* debug decls to rf_options.h, all debug vars preceded by rf_
|
||||
*
|
||||
* Revision 1.5 1996/05/18 19:51:34 jimz
|
||||
* major code cleanup- fix syntax, make some types consistent,
|
||||
* add prototypes, clean out dead code, et cetera
|
||||
*
|
||||
* Revision 1.4 1996/05/03 19:50:38 wvcii
|
||||
* removed include of rf_redstripe.h
|
||||
* fixed change log parameters in header
|
||||
*
|
||||
*/
|
||||
|
||||
#include "rf_types.h"
|
||||
#include "rf_raid.h"
|
||||
#include "rf_interdecluster.h"
|
||||
#include "rf_dag.h"
|
||||
#include "rf_dagutils.h"
|
||||
#include "rf_dagfuncs.h"
|
||||
#include "rf_threadid.h"
|
||||
#include "rf_general.h"
|
||||
#include "rf_utils.h"
|
||||
#include "rf_dagffrd.h"
|
||||
#include "rf_dagdegrd.h"
|
||||
#include "rf_dagffwr.h"
|
||||
#include "rf_dagdegwr.h"
|
||||
|
||||
typedef struct RF_InterdeclusterConfigInfo_s {
|
||||
RF_RowCol_t **stripeIdentifier; /* filled in at config time
|
||||
* and used by IdentifyStripe */
|
||||
RF_StripeCount_t numSparingRegions;
|
||||
RF_StripeCount_t stripeUnitsPerSparingRegion;
|
||||
RF_SectorNum_t mirrorStripeOffset;
|
||||
} RF_InterdeclusterConfigInfo_t;
|
||||
|
||||
int rf_ConfigureInterDecluster(
|
||||
RF_ShutdownList_t **listp,
|
||||
RF_Raid_t *raidPtr,
|
||||
RF_Config_t *cfgPtr)
|
||||
{
|
||||
RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
|
||||
RF_StripeCount_t num_used_stripeUnitsPerDisk;
|
||||
RF_InterdeclusterConfigInfo_t *info;
|
||||
RF_RowCol_t i, tmp, SUs_per_region;
|
||||
|
||||
/* create an Interleaved Declustering configuration structure */
|
||||
RF_MallocAndAdd(info, sizeof(RF_InterdeclusterConfigInfo_t), (RF_InterdeclusterConfigInfo_t *),
|
||||
raidPtr->cleanupList);
|
||||
if (info == NULL)
|
||||
return(ENOMEM);
|
||||
layoutPtr->layoutSpecificInfo = (void *) info;
|
||||
|
||||
/* fill in the config structure. */
|
||||
SUs_per_region = raidPtr->numCol * (raidPtr->numCol - 1);
|
||||
info->stripeIdentifier = rf_make_2d_array(SUs_per_region, 2 , raidPtr->cleanupList);
|
||||
if (info->stripeIdentifier == NULL)
|
||||
return(ENOMEM);
|
||||
for (i=0; i< SUs_per_region; i++) {
|
||||
info->stripeIdentifier[i][0] = i / (raidPtr->numCol-1);
|
||||
tmp = i / raidPtr->numCol;
|
||||
info->stripeIdentifier[i][1] = (i+1+tmp) % raidPtr->numCol;
|
||||
}
|
||||
|
||||
/* no spare tables */
|
||||
RF_ASSERT(raidPtr->numRow == 1);
|
||||
|
||||
/* fill in the remaining layout parameters */
|
||||
|
||||
/* total number of stripes should a multiple of 2*numCol: Each sparing region consists of
|
||||
2*numCol stripes: n-1 primary copy, n-1 secondary copy and 2 for spare .. */
|
||||
num_used_stripeUnitsPerDisk = layoutPtr->stripeUnitsPerDisk - (layoutPtr->stripeUnitsPerDisk %
|
||||
(2*raidPtr->numCol) );
|
||||
info->numSparingRegions = num_used_stripeUnitsPerDisk / (2*raidPtr->numCol);
|
||||
/* this is in fact the number of stripe units (that are primary data copies) in the sparing region */
|
||||
info->stripeUnitsPerSparingRegion = raidPtr->numCol * (raidPtr->numCol - 1);
|
||||
info->mirrorStripeOffset = info->numSparingRegions * (raidPtr->numCol+1);
|
||||
layoutPtr->numStripe = info->numSparingRegions * info->stripeUnitsPerSparingRegion;
|
||||
layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << raidPtr->logBytesPerSector;
|
||||
layoutPtr->numDataCol = 1;
|
||||
layoutPtr->dataSectorsPerStripe = layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit;
|
||||
layoutPtr->numParityCol = 1;
|
||||
|
||||
layoutPtr->dataStripeUnitsPerDisk = num_used_stripeUnitsPerDisk;
|
||||
|
||||
raidPtr->sectorsPerDisk =
|
||||
num_used_stripeUnitsPerDisk * layoutPtr->sectorsPerStripeUnit;
|
||||
|
||||
raidPtr->totalSectors =
|
||||
(layoutPtr->numStripe) * layoutPtr->sectorsPerStripeUnit;
|
||||
|
||||
layoutPtr->stripeUnitsPerDisk = raidPtr->sectorsPerDisk / layoutPtr->sectorsPerStripeUnit;
|
||||
|
||||
return(0);
|
||||
}
|
||||
|
||||
int rf_GetDefaultNumFloatingReconBuffersInterDecluster(RF_Raid_t *raidPtr)
|
||||
{
|
||||
return(30);
|
||||
}
|
||||
|
||||
RF_HeadSepLimit_t rf_GetDefaultHeadSepLimitInterDecluster(RF_Raid_t *raidPtr)
|
||||
{
|
||||
return(raidPtr->sectorsPerDisk);
|
||||
}
|
||||
|
||||
RF_ReconUnitCount_t rf_GetNumSpareRUsInterDecluster(
|
||||
RF_Raid_t *raidPtr)
|
||||
{
|
||||
RF_InterdeclusterConfigInfo_t *info = (RF_InterdeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
|
||||
|
||||
return ( 2 * ((RF_ReconUnitCount_t) info->numSparingRegions) );
|
||||
/* the layout uses two stripe units per disk as spare within each sparing region */
|
||||
}
|
||||
|
||||
/* Maps to the primary copy of the data, i.e. the first mirror pair */
|
||||
void rf_MapSectorInterDecluster(
|
||||
RF_Raid_t *raidPtr,
|
||||
RF_RaidAddr_t raidSector,
|
||||
RF_RowCol_t *row,
|
||||
RF_RowCol_t *col,
|
||||
RF_SectorNum_t *diskSector,
|
||||
int remap)
|
||||
{
|
||||
RF_InterdeclusterConfigInfo_t *info = (RF_InterdeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
|
||||
RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit;
|
||||
RF_StripeNum_t su_offset_into_disk, mirror_su_offset_into_disk;
|
||||
RF_StripeNum_t sparing_region_id, index_within_region;
|
||||
int col_before_remap;
|
||||
|
||||
*row = 0;
|
||||
sparing_region_id = SUID / info->stripeUnitsPerSparingRegion;
|
||||
index_within_region = SUID % info->stripeUnitsPerSparingRegion;
|
||||
su_offset_into_disk = index_within_region % (raidPtr->numCol-1);
|
||||
mirror_su_offset_into_disk = index_within_region / raidPtr->numCol;
|
||||
col_before_remap = index_within_region / (raidPtr->numCol-1);
|
||||
|
||||
if (!remap) {
|
||||
*col = col_before_remap;;
|
||||
*diskSector = ( su_offset_into_disk + ( (raidPtr->numCol-1) * sparing_region_id) ) *
|
||||
raidPtr->Layout.sectorsPerStripeUnit;
|
||||
*diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit);
|
||||
}
|
||||
else {
|
||||
/* remap sector to spare space...*/
|
||||
*diskSector = sparing_region_id * (raidPtr->numCol+1) * raidPtr->Layout.sectorsPerStripeUnit;
|
||||
*diskSector += (raidPtr->numCol-1) * raidPtr->Layout.sectorsPerStripeUnit;
|
||||
*diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit);
|
||||
*col = (index_within_region + 1 + mirror_su_offset_into_disk) % raidPtr->numCol;
|
||||
*col = (*col + 1) % raidPtr->numCol;
|
||||
if (*col == col_before_remap) *col = (*col + 1) % raidPtr->numCol;
|
||||
}
|
||||
}
|
||||
|
||||
/* Maps to the second copy of the mirror pair. */
|
||||
void rf_MapParityInterDecluster(
|
||||
RF_Raid_t *raidPtr,
|
||||
RF_RaidAddr_t raidSector,
|
||||
RF_RowCol_t *row,
|
||||
RF_RowCol_t *col,
|
||||
RF_SectorNum_t *diskSector,
|
||||
int remap)
|
||||
{
|
||||
RF_InterdeclusterConfigInfo_t *info = (RF_InterdeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
|
||||
RF_StripeNum_t sparing_region_id, index_within_region, mirror_su_offset_into_disk;
|
||||
RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit;
|
||||
int col_before_remap;
|
||||
|
||||
sparing_region_id = SUID / info->stripeUnitsPerSparingRegion;
|
||||
index_within_region = SUID % info->stripeUnitsPerSparingRegion;
|
||||
mirror_su_offset_into_disk = index_within_region / raidPtr->numCol;
|
||||
col_before_remap = (index_within_region + 1 + mirror_su_offset_into_disk) % raidPtr->numCol;
|
||||
|
||||
*row = 0;
|
||||
if (!remap) {
|
||||
*col = col_before_remap;
|
||||
*diskSector = info->mirrorStripeOffset * raidPtr->Layout.sectorsPerStripeUnit;
|
||||
*diskSector += sparing_region_id * (raidPtr->numCol-1) * raidPtr->Layout.sectorsPerStripeUnit;
|
||||
*diskSector += mirror_su_offset_into_disk * raidPtr->Layout.sectorsPerStripeUnit;
|
||||
*diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit);
|
||||
}
|
||||
else {
|
||||
/* remap parity to spare space ... */
|
||||
*diskSector = sparing_region_id * (raidPtr->numCol+1) * raidPtr->Layout.sectorsPerStripeUnit;
|
||||
*diskSector += (raidPtr->numCol) * raidPtr->Layout.sectorsPerStripeUnit;
|
||||
*diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit);
|
||||
*col = index_within_region / (raidPtr->numCol-1);
|
||||
*col = (*col + 1) % raidPtr->numCol;
|
||||
if (*col == col_before_remap) *col = (*col + 1) % raidPtr->numCol;
|
||||
}
|
||||
}
|
||||
|
||||
void rf_IdentifyStripeInterDecluster(
|
||||
RF_Raid_t *raidPtr,
|
||||
RF_RaidAddr_t addr,
|
||||
RF_RowCol_t **diskids,
|
||||
RF_RowCol_t *outRow)
|
||||
{
|
||||
RF_InterdeclusterConfigInfo_t *info = (RF_InterdeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
|
||||
RF_StripeNum_t SUID;
|
||||
|
||||
SUID = addr / raidPtr->Layout.sectorsPerStripeUnit;
|
||||
SUID = SUID % info->stripeUnitsPerSparingRegion;
|
||||
|
||||
*outRow = 0;
|
||||
*diskids = info->stripeIdentifier[ SUID ];
|
||||
}
|
||||
|
||||
void rf_MapSIDToPSIDInterDecluster(
|
||||
RF_RaidLayout_t *layoutPtr,
|
||||
RF_StripeNum_t stripeID,
|
||||
RF_StripeNum_t *psID,
|
||||
RF_ReconUnitNum_t *which_ru)
|
||||
{
|
||||
*which_ru = 0;
|
||||
*psID = stripeID;
|
||||
}
|
||||
|
||||
/******************************************************************************
|
||||
* select a graph to perform a single-stripe access
|
||||
*
|
||||
* Parameters: raidPtr - description of the physical array
|
||||
* type - type of operation (read or write) requested
|
||||
* asmap - logical & physical addresses for this access
|
||||
* createFunc - name of function to use to create the graph
|
||||
*****************************************************************************/
|
||||
|
||||
void rf_RAIDIDagSelect(
|
||||
RF_Raid_t *raidPtr,
|
||||
RF_IoType_t type,
|
||||
RF_AccessStripeMap_t *asmap,
|
||||
RF_VoidFuncPtr *createFunc)
|
||||
{
|
||||
RF_ASSERT(RF_IO_IS_R_OR_W(type));
|
||||
|
||||
if (asmap->numDataFailed + asmap->numParityFailed > 1) {
|
||||
RF_ERRORMSG("Multiple disks failed in a single group! Aborting I/O operation.\n");
|
||||
*createFunc = NULL;
|
||||
return;
|
||||
}
|
||||
|
||||
*createFunc = (type == RF_IO_TYPE_READ) ? (RF_VoidFuncPtr)rf_CreateFaultFreeReadDAG : (RF_VoidFuncPtr)rf_CreateRaidOneWriteDAG;
|
||||
if (type == RF_IO_TYPE_READ) {
|
||||
if (asmap->numDataFailed == 0)
|
||||
*createFunc = (RF_VoidFuncPtr)rf_CreateMirrorPartitionReadDAG;
|
||||
else
|
||||
*createFunc = (RF_VoidFuncPtr)rf_CreateRaidOneDegradedReadDAG;
|
||||
}
|
||||
else
|
||||
*createFunc = (RF_VoidFuncPtr)rf_CreateRaidOneWriteDAG;
|
||||
}
|
|
@ -0,0 +1,111 @@
|
|||
/* $NetBSD: rf_interdecluster.h,v 1.1 1998/11/13 04:20:30 oster Exp $ */
|
||||
/*
|
||||
* Copyright (c) 1995 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Author: Khalil Amiri
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and
|
||||
* its documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
||||
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*/
|
||||
|
||||
/* rf_interdecluster.h
|
||||
* header file for Interleaved Declustering
|
||||
*/
|
||||
|
||||
/*
|
||||
* :
|
||||
* Log: rf_interdecluster.h,v
|
||||
* Revision 1.13 1996/07/29 14:05:12 jimz
|
||||
* fix numPUs/numRUs confusion (everything is now numRUs)
|
||||
* clean up some commenting, return values
|
||||
*
|
||||
* Revision 1.12 1996/07/22 19:52:16 jimz
|
||||
* switched node params to RF_DagParam_t, a union of
|
||||
* a 64-bit int and a void *, for better portability
|
||||
* attempted hpux port, but failed partway through for
|
||||
* lack of a single C compiler capable of compiling all
|
||||
* source files
|
||||
*
|
||||
* Revision 1.11 1996/07/13 00:00:59 jimz
|
||||
* sanitized generalized reconstruction architecture
|
||||
* cleaned up head sep, rbuf problems
|
||||
*
|
||||
* Revision 1.10 1996/06/10 11:55:47 jimz
|
||||
* Straightened out some per-array/not-per-array distinctions, fixed
|
||||
* a couple bugs related to confusion. Added shutdown lists. Removed
|
||||
* layout shutdown function (now subsumed by shutdown lists).
|
||||
*
|
||||
* Revision 1.9 1996/06/07 22:26:27 jimz
|
||||
* type-ify which_ru (RF_ReconUnitNum_t)
|
||||
*
|
||||
* Revision 1.8 1996/06/07 21:33:04 jimz
|
||||
* begin using consistent types for sector numbers,
|
||||
* stripe numbers, row+col numbers, recon unit numbers
|
||||
*
|
||||
* Revision 1.7 1996/06/06 18:41:58 jimz
|
||||
* add RAIDIDagSelect
|
||||
*
|
||||
* Revision 1.6 1996/05/31 22:26:54 jimz
|
||||
* fix a lot of mapping problems, memory allocation problems
|
||||
* found some weird lock issues, fixed 'em
|
||||
* more code cleanup
|
||||
*
|
||||
* Revision 1.5 1996/05/27 18:56:37 jimz
|
||||
* more code cleanup
|
||||
* better typing
|
||||
* compiles in all 3 environments
|
||||
*
|
||||
* Revision 1.4 1996/05/24 01:59:45 jimz
|
||||
* another checkpoint in code cleanup for release
|
||||
* time to sync kernel tree
|
||||
*
|
||||
* Revision 1.3 1996/05/18 19:51:34 jimz
|
||||
* major code cleanup- fix syntax, make some types consistent,
|
||||
* add prototypes, clean out dead code, et cetera
|
||||
*
|
||||
* Revision 1.2 1995/12/01 19:07:25 root
|
||||
* added copyright info
|
||||
*
|
||||
* Revision 1.1 1995/11/28 21:38:27 amiri
|
||||
* Initial revision
|
||||
*/
|
||||
|
||||
#ifndef _RF__RF_INTERDECLUSTER_H_
|
||||
#define _RF__RF_INTERDECLUSTER_H_
|
||||
|
||||
int rf_ConfigureInterDecluster(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
|
||||
RF_Config_t *cfgPtr);
|
||||
int rf_GetDefaultNumFloatingReconBuffersInterDecluster(RF_Raid_t *raidPtr);
|
||||
RF_HeadSepLimit_t rf_GetDefaultHeadSepLimitInterDecluster(RF_Raid_t *raidPtr);
|
||||
RF_ReconUnitCount_t rf_GetNumSpareRUsInterDecluster(RF_Raid_t *raidPtr);
|
||||
void rf_MapSectorInterDecluster(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector,
|
||||
RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap);
|
||||
void rf_MapParityInterDecluster(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector,
|
||||
RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap);
|
||||
void rf_IdentifyStripeInterDecluster(RF_Raid_t *raidPtr, RF_RaidAddr_t addr,
|
||||
RF_RowCol_t **diskids, RF_RowCol_t *outRow);
|
||||
void rf_MapSIDToPSIDInterDecluster(RF_RaidLayout_t *layoutPtr,
|
||||
RF_StripeNum_t stripeID, RF_StripeNum_t *psID,
|
||||
RF_ReconUnitNum_t *which_ru);
|
||||
void rf_RAIDIDagSelect(RF_Raid_t *raidPtr, RF_IoType_t type,
|
||||
RF_AccessStripeMap_t *asmap, RF_VoidFuncPtr *createFunc);
|
||||
|
||||
#endif /* !_RF__RF_INTERDECLUSTER_H_ */
|
|
@ -0,0 +1,54 @@
|
|||
/* $NetBSD: rf_invertq.c,v 1.1 1998/11/13 04:20:30 oster Exp $ */
|
||||
/*
|
||||
* Copyright (c) 1995 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Author: Daniel Stodolsky
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and
|
||||
* its documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
||||
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*/
|
||||
|
||||
/* :
|
||||
* Log: rf_invertq.c,v
|
||||
* Revision 1.5 1996/07/29 16:36:36 jimz
|
||||
* include rf_archs.h here, not rf_invertq.h, to avoid VPATH
|
||||
* problems in OSF/1 kernel
|
||||
*
|
||||
* Revision 1.4 1995/11/30 15:57:27 wvcii
|
||||
* added copyright info
|
||||
*
|
||||
*/
|
||||
|
||||
#ifdef _KERNEL
|
||||
#define KERNEL
|
||||
#endif
|
||||
|
||||
#include "rf_archs.h"
|
||||
#include "rf_pqdeg.h"
|
||||
#ifdef KERNEL
|
||||
#ifndef __NetBSD__
|
||||
#include <raidframe/du_data/rf_invertq.h>
|
||||
#else
|
||||
#include "rf_invertq.h" /* XXX this is a hack. */
|
||||
#endif /* !__NetBSD__ */
|
||||
#else /* KERNEL */
|
||||
#include "rf_invertq.h"
|
||||
#endif /* KERNEL */
|
|
@ -0,0 +1,72 @@
|
|||
/* $NetBSD: rf_invertq.h,v 1.1 1998/11/13 04:20:30 oster Exp $ */
|
||||
/*
|
||||
* rf_invertq.h
|
||||
*/
|
||||
/*
|
||||
* This is normally a generated file. Not so for NetBSD.
|
||||
*/
|
||||
|
||||
#ifndef _RF__RF_INVERTQ_H_
|
||||
#define _RF__RF_INVERTQ_H_
|
||||
|
||||
#ifdef _KERNEL
|
||||
#define KERNEL
|
||||
#endif
|
||||
|
||||
/*
|
||||
* rf_geniq.c must include rf_archs.h before including
|
||||
* this file (to get VPATH magic right with the way we
|
||||
* generate this file in kernel trees)
|
||||
*/
|
||||
/* #include "rf_archs.h" */
|
||||
|
||||
#if (RF_INCLUDE_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
|
||||
|
||||
#define RF_Q_COLS 32
|
||||
RF_ua32_t rf_rn = {
|
||||
1, 2, 4, 8, 16, 5, 10, 20, 13, 26, 17, 7, 14, 28, 29, 31, 27, 19, 3, 6, 12, 24, 21, 15, 30, 25, 23, 11, 22, 9, 18, 1, };
|
||||
RF_ua32_t rf_qfor[32] = {
|
||||
/* i = 0 */ { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, },
|
||||
/* i = 1 */ { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 5, 7, 1, 3, 13, 15, 9, 11, 21, 23, 17, 19, 29, 31, 25, 27, },
|
||||
/* i = 2 */ { 0, 4, 8, 12, 16, 20, 24, 28, 5, 1, 13, 9, 21, 17, 29, 25, 10, 14, 2, 6, 26, 30, 18, 22, 15, 11, 7, 3, 31, 27, 23, 19, },
|
||||
/* i = 3 */ { 0, 8, 16, 24, 5, 13, 21, 29, 10, 2, 26, 18, 15, 7, 31, 23, 20, 28, 4, 12, 17, 25, 1, 9, 30, 22, 14, 6, 27, 19, 11, 3, },
|
||||
/* i = 4 */ { 0, 16, 5, 21, 10, 26, 15, 31, 20, 4, 17, 1, 30, 14, 27, 11, 13, 29, 8, 24, 7, 23, 2, 18, 25, 9, 28, 12, 19, 3, 22, 6, },
|
||||
/* i = 5 */ { 0, 5, 10, 15, 20, 17, 30, 27, 13, 8, 7, 2, 25, 28, 19, 22, 26, 31, 16, 21, 14, 11, 4, 1, 23, 18, 29, 24, 3, 6, 9, 12, },
|
||||
/* i = 6 */ { 0, 10, 20, 30, 13, 7, 25, 19, 26, 16, 14, 4, 23, 29, 3, 9, 17, 27, 5, 15, 28, 22, 8, 2, 11, 1, 31, 21, 6, 12, 18, 24, },
|
||||
/* i = 7 */ { 0, 20, 13, 25, 26, 14, 23, 3, 17, 5, 28, 8, 11, 31, 6, 18, 7, 19, 10, 30, 29, 9, 16, 4, 22, 2, 27, 15, 12, 24, 1, 21, },
|
||||
/* i = 8 */ { 0, 13, 26, 23, 17, 28, 11, 6, 7, 10, 29, 16, 22, 27, 12, 1, 14, 3, 20, 25, 31, 18, 5, 8, 9, 4, 19, 30, 24, 21, 2, 15, },
|
||||
/* i = 9 */ { 0, 26, 17, 11, 7, 29, 22, 12, 14, 20, 31, 5, 9, 19, 24, 2, 28, 6, 13, 23, 27, 1, 10, 16, 18, 8, 3, 25, 21, 15, 4, 30, },
|
||||
/* i = 10 */ { 0, 17, 7, 22, 14, 31, 9, 24, 28, 13, 27, 10, 18, 3, 21, 4, 29, 12, 26, 11, 19, 2, 20, 5, 1, 16, 6, 23, 15, 30, 8, 25, },
|
||||
/* i = 11 */ { 0, 7, 14, 9, 28, 27, 18, 21, 29, 26, 19, 20, 1, 6, 15, 8, 31, 24, 17, 22, 3, 4, 13, 10, 2, 5, 12, 11, 30, 25, 16, 23, },
|
||||
/* i = 12 */ { 0, 14, 28, 18, 29, 19, 1, 15, 31, 17, 3, 13, 2, 12, 30, 16, 27, 21, 7, 9, 6, 8, 26, 20, 4, 10, 24, 22, 25, 23, 5, 11, },
|
||||
/* i = 13 */ { 0, 28, 29, 1, 31, 3, 2, 30, 27, 7, 6, 26, 4, 24, 25, 5, 19, 15, 14, 18, 12, 16, 17, 13, 8, 20, 21, 9, 23, 11, 10, 22, },
|
||||
/* i = 14 */ { 0, 29, 31, 2, 27, 6, 4, 25, 19, 14, 12, 17, 8, 21, 23, 10, 3, 30, 28, 1, 24, 5, 7, 26, 16, 13, 15, 18, 11, 22, 20, 9, },
|
||||
/* i = 15 */ { 0, 31, 27, 4, 19, 12, 8, 23, 3, 28, 24, 7, 16, 15, 11, 20, 6, 25, 29, 2, 21, 10, 14, 17, 5, 26, 30, 1, 22, 9, 13, 18, },
|
||||
/* i = 16 */ { 0, 27, 19, 8, 3, 24, 16, 11, 6, 29, 21, 14, 5, 30, 22, 13, 12, 23, 31, 4, 15, 20, 28, 7, 10, 17, 25, 2, 9, 18, 26, 1, },
|
||||
/* i = 17 */ { 0, 19, 3, 16, 6, 21, 5, 22, 12, 31, 15, 28, 10, 25, 9, 26, 24, 11, 27, 8, 30, 13, 29, 14, 20, 7, 23, 4, 18, 1, 17, 2, },
|
||||
/* i = 18 */ { 0, 3, 6, 5, 12, 15, 10, 9, 24, 27, 30, 29, 20, 23, 18, 17, 21, 22, 19, 16, 25, 26, 31, 28, 13, 14, 11, 8, 1, 2, 7, 4, },
|
||||
/* i = 19 */ { 0, 6, 12, 10, 24, 30, 20, 18, 21, 19, 25, 31, 13, 11, 1, 7, 15, 9, 3, 5, 23, 17, 27, 29, 26, 28, 22, 16, 2, 4, 14, 8, },
|
||||
/* i = 20 */ { 0, 12, 24, 20, 21, 25, 13, 1, 15, 3, 23, 27, 26, 22, 2, 14, 30, 18, 6, 10, 11, 7, 19, 31, 17, 29, 9, 5, 4, 8, 28, 16, },
|
||||
/* i = 21 */ { 0, 24, 21, 13, 15, 23, 26, 2, 30, 6, 11, 19, 17, 9, 4, 28, 25, 1, 12, 20, 22, 14, 3, 27, 7, 31, 18, 10, 8, 16, 29, 5, },
|
||||
/* i = 22 */ { 0, 21, 15, 26, 30, 11, 17, 4, 25, 12, 22, 3, 7, 18, 8, 29, 23, 2, 24, 13, 9, 28, 6, 19, 14, 27, 1, 20, 16, 5, 31, 10, },
|
||||
/* i = 23 */ { 0, 15, 30, 17, 25, 22, 7, 8, 23, 24, 9, 6, 14, 1, 16, 31, 11, 4, 21, 26, 18, 29, 12, 3, 28, 19, 2, 13, 5, 10, 27, 20, },
|
||||
/* i = 24 */ { 0, 30, 25, 7, 23, 9, 14, 16, 11, 21, 18, 12, 28, 2, 5, 27, 22, 8, 15, 17, 1, 31, 24, 6, 29, 3, 4, 26, 10, 20, 19, 13, },
|
||||
/* i = 25 */ { 0, 25, 23, 14, 11, 18, 28, 5, 22, 15, 1, 24, 29, 4, 10, 19, 9, 16, 30, 7, 2, 27, 21, 12, 31, 6, 8, 17, 20, 13, 3, 26, },
|
||||
/* i = 26 */ { 0, 23, 11, 28, 22, 1, 29, 10, 9, 30, 2, 21, 31, 8, 20, 3, 18, 5, 25, 14, 4, 19, 15, 24, 27, 12, 16, 7, 13, 26, 6, 17, },
|
||||
/* i = 27 */ { 0, 11, 22, 29, 9, 2, 31, 20, 18, 25, 4, 15, 27, 16, 13, 6, 1, 10, 23, 28, 8, 3, 30, 21, 19, 24, 5, 14, 26, 17, 12, 7, },
|
||||
/* i = 28 */ { 0, 22, 9, 31, 18, 4, 27, 13, 1, 23, 8, 30, 19, 5, 26, 12, 2, 20, 11, 29, 16, 6, 25, 15, 3, 21, 10, 28, 17, 7, 24, 14, },
|
||||
/* i = 29 */ { 0, 9, 18, 27, 1, 8, 19, 26, 2, 11, 16, 25, 3, 10, 17, 24, 4, 13, 22, 31, 5, 12, 23, 30, 6, 15, 20, 29, 7, 14, 21, 28, },
|
||||
/* i = 30 */ { 0, 18, 1, 19, 2, 16, 3, 17, 4, 22, 5, 23, 6, 20, 7, 21, 8, 26, 9, 27, 10, 24, 11, 25, 12, 30, 13, 31, 14, 28, 15, 29, },
|
||||
/* i = 31 */ { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, },
|
||||
};
|
||||
#define RF_Q_DATA_COL(col_num) rf_rn[col_num],rf_qfor[28-(col_num)]
|
||||
#ifdef KERNEL
|
||||
RF_ua1024_t rf_qinv[1]; /* don't compile monster table into kernel */
|
||||
#elif defined(NO_PQ)
|
||||
RF_ua1024_t rf_qinv[29*29];
|
||||
#else /* !KERNEL && NO_PQ */
|
||||
|
||||
#endif /* !KERNEL && NO_PQ */
|
||||
|
||||
#endif /* (RF_INCLUDE_PQ > 0) || (RF_INCLUDE_RAID6 > 0) */
|
||||
#endif /* !_RF__RF_INVERTQ_H_ */
|
|
@ -0,0 +1,70 @@
|
|||
/* $NetBSD: rf_kintf.h,v 1.1 1998/11/13 04:20:30 oster Exp $ */
|
||||
/*
|
||||
* rf_kintf.h
|
||||
*
|
||||
* RAIDframe exported kernel interface
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 1995 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Author: Jim Zelenka
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and
|
||||
* its documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
||||
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*/
|
||||
/*
|
||||
* :
|
||||
* Log: rf_kintf.h,v
|
||||
* Revision 1.2 1996/06/03 23:28:26 jimz
|
||||
* more bugfixes
|
||||
* check in tree to sync for IPDS runs with current bugfixes
|
||||
* there still may be a problem with threads in the script test
|
||||
* getting I/Os stuck- not trivially reproducible (runs ~50 times
|
||||
* in a row without getting stuck)
|
||||
*
|
||||
* Revision 1.1 1996/05/31 18:59:14 jimz
|
||||
* Initial revision
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _RF__RF_KINTF_H_
|
||||
#define _RF__RF_KINTF_H_
|
||||
|
||||
#include "rf_types.h"
|
||||
|
||||
int rf_boot(void);
|
||||
int rf_open(dev_t dev, int flag, int fmt);
|
||||
int rf_close(dev_t dev, int flag, int fmt);
|
||||
void rf_strategy(struct buf *bp);
|
||||
void rf_minphys(struct buf *bp);
|
||||
int rf_read(dev_t dev, struct uio *uio);
|
||||
int rf_write(dev_t dev, struct uio *uio);
|
||||
int rf_size(dev_t dev);
|
||||
int rf_ioctl(dev_t dev, int cmd, caddr_t data, int flag);
|
||||
void rf_ReconKernelThread(void);
|
||||
int rf_GetSpareTableFromDaemon(RF_SparetWait_t *req);
|
||||
caddr_t rf_MapToKernelSpace(struct buf *bp, caddr_t addr);
|
||||
int rf_BzeroWithRemap(struct buf *bp, char *databuf, int len);
|
||||
int rf_DoAccessKernel(RF_Raid_t *raidPtr, struct buf *bp,
|
||||
RF_RaidAccessFlags_t flags, void (*cbFunc)(struct buf *), void *cbArg);
|
||||
int rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req);
|
||||
|
||||
#endif /* _RF__RF_KINTF_H_ */
|
|
@ -0,0 +1,719 @@
|
|||
/* $NetBSD: rf_layout.c,v 1.1 1998/11/13 04:20:30 oster Exp $ */
|
||||
/*
|
||||
* Copyright (c) 1995 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Author: Mark Holland
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and
|
||||
* its documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
||||
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*/
|
||||
|
||||
/* rf_layout.c -- driver code dealing with layout and mapping issues
|
||||
*/
|
||||
|
||||
/*
|
||||
* :
|
||||
* Log: rf_layout.c,v
|
||||
* Revision 1.71 1996/08/20 22:41:30 jimz
|
||||
* add declustered evenodd
|
||||
*
|
||||
* Revision 1.70 1996/07/31 16:56:18 jimz
|
||||
* dataBytesPerStripe, sectorsPerDisk init arch-indep.
|
||||
*
|
||||
* Revision 1.69 1996/07/31 15:34:46 jimz
|
||||
* add EvenOdd
|
||||
*
|
||||
* Revision 1.68 1996/07/29 14:05:12 jimz
|
||||
* fix numPUs/numRUs confusion (everything is now numRUs)
|
||||
* clean up some commenting, return values
|
||||
*
|
||||
* Revision 1.67 1996/07/27 23:36:08 jimz
|
||||
* Solaris port of simulator
|
||||
*
|
||||
* Revision 1.66 1996/07/27 18:40:24 jimz
|
||||
* cleanup sweep
|
||||
*
|
||||
* Revision 1.65 1996/07/18 22:57:14 jimz
|
||||
* port simulator to AIX
|
||||
*
|
||||
* Revision 1.64 1996/07/15 17:22:18 jimz
|
||||
* nit-pick code cleanup
|
||||
* resolve stdlib problems on DEC OSF
|
||||
*
|
||||
* Revision 1.63 1996/07/13 00:00:59 jimz
|
||||
* sanitized generalized reconstruction architecture
|
||||
* cleaned up head sep, rbuf problems
|
||||
*
|
||||
* Revision 1.62 1996/07/11 19:08:00 jimz
|
||||
* generalize reconstruction mechanism
|
||||
* allow raid1 reconstructs via copyback (done with array
|
||||
* quiesced, not online, therefore not disk-directed)
|
||||
*
|
||||
* Revision 1.61 1996/06/19 22:23:01 jimz
|
||||
* parity verification is now a layout-configurable thing
|
||||
* not all layouts currently support it (correctly, anyway)
|
||||
*
|
||||
* Revision 1.60 1996/06/19 17:53:48 jimz
|
||||
* move GetNumSparePUs, InstallSpareTable ops into layout switch
|
||||
*
|
||||
* Revision 1.59 1996/06/19 14:57:58 jimz
|
||||
* move layout-specific config parsing hooks into RF_LayoutSW_t
|
||||
* table in rf_layout.c
|
||||
*
|
||||
* Revision 1.58 1996/06/10 11:55:47 jimz
|
||||
* Straightened out some per-array/not-per-array distinctions, fixed
|
||||
* a couple bugs related to confusion. Added shutdown lists. Removed
|
||||
* layout shutdown function (now subsumed by shutdown lists).
|
||||
*
|
||||
* Revision 1.57 1996/06/07 22:26:27 jimz
|
||||
* type-ify which_ru (RF_ReconUnitNum_t)
|
||||
*
|
||||
* Revision 1.56 1996/06/07 21:33:04 jimz
|
||||
* begin using consistent types for sector numbers,
|
||||
* stripe numbers, row+col numbers, recon unit numbers
|
||||
*
|
||||
* Revision 1.55 1996/06/06 18:41:35 jimz
|
||||
* change interleaved declustering dag selection to an
|
||||
* interleaved-declustering-specific routine (so we can
|
||||
* use the partitioned mirror node)
|
||||
*
|
||||
* Revision 1.54 1996/06/05 18:06:02 jimz
|
||||
* Major code cleanup. The Great Renaming is now done.
|
||||
* Better modularity. Better typing. Fixed a bunch of
|
||||
* synchronization bugs. Made a lot of global stuff
|
||||
* per-desc or per-array. Removed dead code.
|
||||
*
|
||||
* Revision 1.53 1996/06/03 23:28:26 jimz
|
||||
* more bugfixes
|
||||
* check in tree to sync for IPDS runs with current bugfixes
|
||||
* there still may be a problem with threads in the script test
|
||||
* getting I/Os stuck- not trivially reproducible (runs ~50 times
|
||||
* in a row without getting stuck)
|
||||
*
|
||||
* Revision 1.52 1996/06/02 17:31:48 jimz
|
||||
* Moved a lot of global stuff into array structure, where it belongs.
|
||||
* Fixed up paritylogging, pss modules in this manner. Some general
|
||||
* code cleanup. Removed lots of dead code, some dead files.
|
||||
*
|
||||
* Revision 1.51 1996/05/31 22:26:54 jimz
|
||||
* fix a lot of mapping problems, memory allocation problems
|
||||
* found some weird lock issues, fixed 'em
|
||||
* more code cleanup
|
||||
*
|
||||
* Revision 1.50 1996/05/30 23:22:16 jimz
|
||||
* bugfixes of serialization, timing problems
|
||||
* more cleanup
|
||||
*
|
||||
* Revision 1.49 1996/05/30 11:29:41 jimz
|
||||
* Numerous bug fixes. Stripe lock release code disagreed with the taking code
|
||||
* about when stripes should be locked (I made it consistent: no parity, no lock)
|
||||
* There was a lot of extra serialization of I/Os which I've removed- a lot of
|
||||
* it was to calculate values for the cache code, which is no longer with us.
|
||||
* More types, function, macro cleanup. Added code to properly quiesce the array
|
||||
* on shutdown. Made a lot of stuff array-specific which was (bogusly) general
|
||||
* before. Fixed memory allocation, freeing bugs.
|
||||
*
|
||||
* Revision 1.48 1996/05/27 18:56:37 jimz
|
||||
* more code cleanup
|
||||
* better typing
|
||||
* compiles in all 3 environments
|
||||
*
|
||||
* Revision 1.47 1996/05/24 22:17:04 jimz
|
||||
* continue code + namespace cleanup
|
||||
* typed a bunch of flags
|
||||
*
|
||||
* Revision 1.46 1996/05/24 01:59:45 jimz
|
||||
* another checkpoint in code cleanup for release
|
||||
* time to sync kernel tree
|
||||
*
|
||||
* Revision 1.45 1996/05/23 21:46:35 jimz
|
||||
* checkpoint in code cleanup (release prep)
|
||||
* lots of types, function names have been fixed
|
||||
*
|
||||
* Revision 1.44 1996/05/18 19:51:34 jimz
|
||||
* major code cleanup- fix syntax, make some types consistent,
|
||||
* add prototypes, clean out dead code, et cetera
|
||||
*
|
||||
* Revision 1.43 1996/02/22 16:46:35 amiri
|
||||
* modified chained declustering to use a seperate DAG selection routine
|
||||
*
|
||||
* Revision 1.42 1995/12/01 19:16:11 root
|
||||
* added copyright info
|
||||
*
|
||||
* Revision 1.41 1995/11/28 21:31:02 amiri
|
||||
* added Interleaved Declustering to switch table
|
||||
*
|
||||
* Revision 1.40 1995/11/20 14:35:17 arw
|
||||
* moved rf_StartThroughputStats in DefaultWrite and DefaultRead
|
||||
*
|
||||
* Revision 1.39 1995/11/19 16:28:46 wvcii
|
||||
* replaced LaunchDAGState with CreateDAGState, ExecuteDAGState
|
||||
*
|
||||
* Revision 1.38 1995/11/17 19:00:41 wvcii
|
||||
* added MapQ entries to switch table
|
||||
*
|
||||
* Revision 1.37 1995/11/17 16:58:13 amiri
|
||||
* Added the Chained Declustering architecture ('C'),
|
||||
* essentially a variant of mirroring.
|
||||
*
|
||||
* Revision 1.36 1995/11/16 16:16:10 amiri
|
||||
* Added RAID5 with rotated sparing ('R' configuration)
|
||||
*
|
||||
* Revision 1.35 1995/11/07 15:41:17 wvcii
|
||||
* modified state lists: DefaultStates, VSReadStates
|
||||
* necessary to support new states (LaunchDAGState, ProcessDAGState)
|
||||
*
|
||||
* Revision 1.34 1995/10/18 01:23:20 amiri
|
||||
* added ifndef SIMULATE wrapper around rf_StartThroughputStats()
|
||||
*
|
||||
* Revision 1.33 1995/10/13 15:05:46 arw
|
||||
* added rf_StartThroughputStats to DefaultRead and DefaultWrite
|
||||
*
|
||||
* Revision 1.32 1995/10/12 16:04:23 jimz
|
||||
* added config names to mapsw entires
|
||||
*
|
||||
* Revision 1.31 1995/10/04 03:57:48 wvcii
|
||||
* added raid level 1 to mapsw
|
||||
*
|
||||
* Revision 1.30 1995/09/07 01:26:55 jimz
|
||||
* Achive basic compilation in kernel. Kernel functionality
|
||||
* is not guaranteed at all, but it'll compile. Mostly. I hope.
|
||||
*
|
||||
* Revision 1.29 1995/07/28 21:43:42 robby
|
||||
* checkin after leaving for Rice. Bye
|
||||
*
|
||||
* Revision 1.28 1995/07/26 03:26:14 robby
|
||||
* *** empty log message ***
|
||||
*
|
||||
* Revision 1.27 1995/07/21 19:47:52 rachad
|
||||
* Added raid 0 /5 with caching architectures
|
||||
*
|
||||
* Revision 1.26 1995/07/21 19:29:27 robby
|
||||
* added virtual striping states
|
||||
*
|
||||
* Revision 1.25 1995/07/10 21:41:47 robby
|
||||
* switched to have my own virtual stripng write function from the cache
|
||||
*
|
||||
* Revision 1.24 1995/07/10 20:51:59 robby
|
||||
* added virtual striping states
|
||||
*
|
||||
* Revision 1.23 1995/07/10 16:57:42 robby
|
||||
* updated alloclistelem struct to the correct struct name
|
||||
*
|
||||
* Revision 1.22 1995/07/08 20:06:11 rachad
|
||||
* *** empty log message ***
|
||||
*
|
||||
* Revision 1.21 1995/07/08 19:43:16 cfb
|
||||
* *** empty log message ***
|
||||
*
|
||||
* Revision 1.20 1995/07/08 18:05:39 rachad
|
||||
* Linked up Claudsons code with the real cache
|
||||
*
|
||||
* Revision 1.19 1995/07/06 14:29:36 robby
|
||||
* added defaults states list to the layout switch
|
||||
*
|
||||
* Revision 1.18 1995/06/23 13:40:34 robby
|
||||
* updeated to prototypes in rf_layout.h
|
||||
*
|
||||
*/
|
||||
|
||||
#include "rf_types.h"
|
||||
#include "rf_archs.h"
|
||||
#include "rf_raid.h"
|
||||
#include "rf_configure.h"
|
||||
#include "rf_dag.h"
|
||||
#include "rf_desc.h"
|
||||
#include "rf_decluster.h"
|
||||
#include "rf_pq.h"
|
||||
#include "rf_declusterPQ.h"
|
||||
#include "rf_raid0.h"
|
||||
#include "rf_raid1.h"
|
||||
#include "rf_raid4.h"
|
||||
#include "rf_raid5.h"
|
||||
#include "rf_states.h"
|
||||
#if RF_INCLUDE_RAID5_RS > 0
|
||||
#include "rf_raid5_rotatedspare.h"
|
||||
#endif /* RF_INCLUDE_RAID5_RS > 0 */
|
||||
#if RF_INCLUDE_CHAINDECLUSTER > 0
|
||||
#include "rf_chaindecluster.h"
|
||||
#endif /* RF_INCLUDE_CHAINDECLUSTER > 0 */
|
||||
#if RF_INCLUDE_INTERDECLUSTER > 0
|
||||
#include "rf_interdecluster.h"
|
||||
#endif /* RF_INCLUDE_INTERDECLUSTER > 0 */
|
||||
#if RF_INCLUDE_PARITYLOGGING > 0
|
||||
#include "rf_paritylogging.h"
|
||||
#endif /* RF_INCLUDE_PARITYLOGGING > 0 */
|
||||
#if RF_INCLUDE_EVENODD > 0
|
||||
#include "rf_evenodd.h"
|
||||
#endif /* RF_INCLUDE_EVENODD > 0 */
|
||||
#include "rf_general.h"
|
||||
#include "rf_driver.h"
|
||||
#include "rf_parityscan.h"
|
||||
#include "rf_reconbuffer.h"
|
||||
#include "rf_reconutil.h"
|
||||
|
||||
/***********************************************************************
|
||||
*
|
||||
* the layout switch defines all the layouts that are supported.
|
||||
* fields are: layout ID, init routine, shutdown routine, map
|
||||
* sector, map parity, identify stripe, dag selection, map stripeid
|
||||
* to parity stripe id (optional), num faults tolerated, special
|
||||
* flags.
|
||||
*
|
||||
***********************************************************************/
|
||||
|
||||
static RF_AccessState_t DefaultStates[] = {rf_QuiesceState,
|
||||
rf_IncrAccessesCountState, rf_MapState, rf_LockState, rf_CreateDAGState,
|
||||
rf_ExecuteDAGState, rf_ProcessDAGState, rf_DecrAccessesCountState,
|
||||
rf_CleanupState, rf_LastState};
|
||||
|
||||
#if defined(__NetBSD__) && !defined(_KERNEL)
|
||||
/* XXX Gross hack to shutup gcc -- it complains that DefaultStates is not
|
||||
used when compiling this in userland.. I hate to burst it's bubble, but
|
||||
DefaultStates is used all over the place here in the initialization of
|
||||
lots of data structures. GO */
|
||||
RF_AccessState_t *NothingAtAll = DefaultStates;
|
||||
#endif
|
||||
|
||||
#if defined(__NetBSD__) && defined(_KERNEL)
|
||||
/* XXX Remove static so GCC doesn't complain about these being unused! */
|
||||
int distSpareYes = 1;
|
||||
int distSpareNo = 0;
|
||||
#else
|
||||
static int distSpareYes = 1;
|
||||
static int distSpareNo = 0;
|
||||
#endif
|
||||
#ifdef KERNEL
|
||||
#define RF_NK2(a,b)
|
||||
#else /* KERNEL */
|
||||
#define RF_NK2(a,b) a,b,
|
||||
#endif /* KERNEL */
|
||||
|
||||
#if RF_UTILITY > 0
|
||||
#define RF_NU(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p)
|
||||
#else /* RF_UTILITY > 0 */
|
||||
#define RF_NU(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p
|
||||
#endif /* RF_UTILITY > 0 */
|
||||
|
||||
static RF_LayoutSW_t mapsw[] = {
|
||||
/* parity declustering */
|
||||
{'T', "Parity declustering",
|
||||
RF_NK2(rf_MakeLayoutSpecificDeclustered, &distSpareNo)
|
||||
RF_NU(
|
||||
rf_ConfigureDeclustered,
|
||||
rf_MapSectorDeclustered, rf_MapParityDeclustered, NULL,
|
||||
rf_IdentifyStripeDeclustered,
|
||||
rf_RaidFiveDagSelect,
|
||||
rf_MapSIDToPSIDDeclustered,
|
||||
rf_GetDefaultHeadSepLimitDeclustered,
|
||||
rf_GetDefaultNumFloatingReconBuffersDeclustered,
|
||||
NULL, NULL,
|
||||
rf_SubmitReconBufferBasic,
|
||||
rf_VerifyParityBasic,
|
||||
1,
|
||||
DefaultStates,
|
||||
0)
|
||||
},
|
||||
|
||||
/* parity declustering with distributed sparing */
|
||||
{'D', "Distributed sparing parity declustering",
|
||||
RF_NK2(rf_MakeLayoutSpecificDeclustered, &distSpareYes)
|
||||
RF_NU(
|
||||
rf_ConfigureDeclusteredDS,
|
||||
rf_MapSectorDeclustered, rf_MapParityDeclustered, NULL,
|
||||
rf_IdentifyStripeDeclustered,
|
||||
rf_RaidFiveDagSelect,
|
||||
rf_MapSIDToPSIDDeclustered,
|
||||
rf_GetDefaultHeadSepLimitDeclustered,
|
||||
rf_GetDefaultNumFloatingReconBuffersDeclustered,
|
||||
rf_GetNumSpareRUsDeclustered, rf_InstallSpareTable,
|
||||
rf_SubmitReconBufferBasic,
|
||||
rf_VerifyParityBasic,
|
||||
1,
|
||||
DefaultStates,
|
||||
RF_DISTRIBUTE_SPARE|RF_BD_DECLUSTERED)
|
||||
},
|
||||
|
||||
#if RF_INCLUDE_DECL_PQ > 0
|
||||
/* declustered P+Q */
|
||||
{'Q', "Declustered P+Q",
|
||||
RF_NK2(rf_MakeLayoutSpecificDeclustered, &distSpareNo)
|
||||
RF_NU(
|
||||
rf_ConfigureDeclusteredPQ,
|
||||
rf_MapSectorDeclusteredPQ, rf_MapParityDeclusteredPQ, rf_MapQDeclusteredPQ,
|
||||
rf_IdentifyStripeDeclusteredPQ,
|
||||
rf_PQDagSelect,
|
||||
rf_MapSIDToPSIDDeclustered,
|
||||
rf_GetDefaultHeadSepLimitDeclustered,
|
||||
rf_GetDefaultNumFloatingReconBuffersPQ,
|
||||
NULL, NULL,
|
||||
NULL,
|
||||
rf_VerifyParityBasic,
|
||||
2,
|
||||
DefaultStates,
|
||||
0)
|
||||
},
|
||||
#endif /* RF_INCLUDE_DECL_PQ > 0 */
|
||||
|
||||
#if RF_INCLUDE_RAID5_RS > 0
|
||||
/* RAID 5 with rotated sparing */
|
||||
{'R', "RAID Level 5 rotated sparing",
|
||||
RF_NK2(rf_MakeLayoutSpecificNULL, NULL)
|
||||
RF_NU(
|
||||
rf_ConfigureRAID5_RS,
|
||||
rf_MapSectorRAID5_RS, rf_MapParityRAID5_RS, NULL,
|
||||
rf_IdentifyStripeRAID5_RS,
|
||||
rf_RaidFiveDagSelect,
|
||||
rf_MapSIDToPSIDRAID5_RS,
|
||||
rf_GetDefaultHeadSepLimitRAID5,
|
||||
rf_GetDefaultNumFloatingReconBuffersRAID5,
|
||||
rf_GetNumSpareRUsRAID5_RS, NULL,
|
||||
rf_SubmitReconBufferBasic,
|
||||
rf_VerifyParityBasic,
|
||||
1,
|
||||
DefaultStates,
|
||||
RF_DISTRIBUTE_SPARE)
|
||||
},
|
||||
#endif /* RF_INCLUDE_RAID5_RS > 0 */
|
||||
|
||||
#if RF_INCLUDE_CHAINDECLUSTER > 0
|
||||
/* Chained Declustering */
|
||||
{'C', "Chained Declustering",
|
||||
RF_NK2(rf_MakeLayoutSpecificNULL, NULL)
|
||||
RF_NU(
|
||||
rf_ConfigureChainDecluster,
|
||||
rf_MapSectorChainDecluster, rf_MapParityChainDecluster, NULL,
|
||||
rf_IdentifyStripeChainDecluster,
|
||||
rf_RAIDCDagSelect,
|
||||
rf_MapSIDToPSIDChainDecluster,
|
||||
NULL,
|
||||
NULL,
|
||||
rf_GetNumSpareRUsChainDecluster, NULL,
|
||||
rf_SubmitReconBufferBasic,
|
||||
rf_VerifyParityBasic,
|
||||
1,
|
||||
DefaultStates,
|
||||
0)
|
||||
},
|
||||
#endif /* RF_INCLUDE_CHAINDECLUSTER > 0 */
|
||||
|
||||
#if RF_INCLUDE_INTERDECLUSTER > 0
|
||||
/* Interleaved Declustering */
|
||||
{'I', "Interleaved Declustering",
|
||||
RF_NK2(rf_MakeLayoutSpecificNULL, NULL)
|
||||
RF_NU(
|
||||
rf_ConfigureInterDecluster,
|
||||
rf_MapSectorInterDecluster, rf_MapParityInterDecluster, NULL,
|
||||
rf_IdentifyStripeInterDecluster,
|
||||
rf_RAIDIDagSelect,
|
||||
rf_MapSIDToPSIDInterDecluster,
|
||||
rf_GetDefaultHeadSepLimitInterDecluster,
|
||||
rf_GetDefaultNumFloatingReconBuffersInterDecluster,
|
||||
rf_GetNumSpareRUsInterDecluster, NULL,
|
||||
rf_SubmitReconBufferBasic,
|
||||
rf_VerifyParityBasic,
|
||||
1,
|
||||
DefaultStates,
|
||||
RF_DISTRIBUTE_SPARE)
|
||||
},
|
||||
#endif /* RF_INCLUDE_INTERDECLUSTER > 0 */
|
||||
|
||||
#if RF_INCLUDE_RAID0 > 0
|
||||
/* RAID level 0 */
|
||||
{'0', "RAID Level 0",
|
||||
RF_NK2(rf_MakeLayoutSpecificNULL, NULL)
|
||||
RF_NU(
|
||||
rf_ConfigureRAID0,
|
||||
rf_MapSectorRAID0, rf_MapParityRAID0, NULL,
|
||||
rf_IdentifyStripeRAID0,
|
||||
rf_RAID0DagSelect,
|
||||
rf_MapSIDToPSIDRAID0,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL, NULL,
|
||||
NULL,
|
||||
rf_VerifyParityRAID0,
|
||||
0,
|
||||
DefaultStates,
|
||||
0)
|
||||
},
|
||||
#endif /* RF_INCLUDE_RAID0 > 0 */
|
||||
|
||||
#if RF_INCLUDE_RAID1 > 0
|
||||
/* RAID level 1 */
|
||||
{'1', "RAID Level 1",
|
||||
RF_NK2(rf_MakeLayoutSpecificNULL, NULL)
|
||||
RF_NU(
|
||||
rf_ConfigureRAID1,
|
||||
rf_MapSectorRAID1, rf_MapParityRAID1, NULL,
|
||||
rf_IdentifyStripeRAID1,
|
||||
rf_RAID1DagSelect,
|
||||
rf_MapSIDToPSIDRAID1,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL, NULL,
|
||||
rf_SubmitReconBufferRAID1,
|
||||
rf_VerifyParityRAID1,
|
||||
1,
|
||||
DefaultStates,
|
||||
0)
|
||||
},
|
||||
#endif /* RF_INCLUDE_RAID1 > 0 */
|
||||
|
||||
#if RF_INCLUDE_RAID4 > 0
|
||||
/* RAID level 4 */
|
||||
{'4', "RAID Level 4",
|
||||
RF_NK2(rf_MakeLayoutSpecificNULL, NULL)
|
||||
RF_NU(
|
||||
rf_ConfigureRAID4,
|
||||
rf_MapSectorRAID4, rf_MapParityRAID4, NULL,
|
||||
rf_IdentifyStripeRAID4,
|
||||
rf_RaidFiveDagSelect,
|
||||
rf_MapSIDToPSIDRAID4,
|
||||
rf_GetDefaultHeadSepLimitRAID4,
|
||||
rf_GetDefaultNumFloatingReconBuffersRAID4,
|
||||
NULL, NULL,
|
||||
rf_SubmitReconBufferBasic,
|
||||
rf_VerifyParityBasic,
|
||||
1,
|
||||
DefaultStates,
|
||||
0)
|
||||
},
|
||||
#endif /* RF_INCLUDE_RAID4 > 0 */
|
||||
|
||||
#if RF_INCLUDE_RAID5 > 0
|
||||
/* RAID level 5 */
|
||||
{'5', "RAID Level 5",
|
||||
RF_NK2(rf_MakeLayoutSpecificNULL, NULL)
|
||||
RF_NU(
|
||||
rf_ConfigureRAID5,
|
||||
rf_MapSectorRAID5, rf_MapParityRAID5, NULL,
|
||||
rf_IdentifyStripeRAID5,
|
||||
rf_RaidFiveDagSelect,
|
||||
rf_MapSIDToPSIDRAID5,
|
||||
rf_GetDefaultHeadSepLimitRAID5,
|
||||
rf_GetDefaultNumFloatingReconBuffersRAID5,
|
||||
NULL, NULL,
|
||||
rf_SubmitReconBufferBasic,
|
||||
rf_VerifyParityBasic,
|
||||
1,
|
||||
DefaultStates,
|
||||
0)
|
||||
},
|
||||
#endif /* RF_INCLUDE_RAID5 > 0 */
|
||||
|
||||
#if RF_INCLUDE_EVENODD > 0
|
||||
/* Evenodd */
|
||||
{'E', "EvenOdd",
|
||||
RF_NK2(rf_MakeLayoutSpecificNULL, NULL)
|
||||
RF_NU(
|
||||
rf_ConfigureEvenOdd,
|
||||
rf_MapSectorRAID5, rf_MapParityEvenOdd, rf_MapEEvenOdd,
|
||||
rf_IdentifyStripeEvenOdd,
|
||||
rf_EODagSelect,
|
||||
rf_MapSIDToPSIDRAID5,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL, NULL,
|
||||
NULL, /* no reconstruction, yet */
|
||||
rf_VerifyParityEvenOdd,
|
||||
2,
|
||||
DefaultStates,
|
||||
0)
|
||||
},
|
||||
#endif /* RF_INCLUDE_EVENODD > 0 */
|
||||
|
||||
#if RF_INCLUDE_EVENODD > 0
|
||||
/* Declustered Evenodd */
|
||||
{'e', "Declustered EvenOdd",
|
||||
RF_NK2(rf_MakeLayoutSpecificDeclustered, &distSpareNo)
|
||||
RF_NU(
|
||||
rf_ConfigureDeclusteredPQ,
|
||||
rf_MapSectorDeclusteredPQ, rf_MapParityDeclusteredPQ, rf_MapQDeclusteredPQ,
|
||||
rf_IdentifyStripeDeclusteredPQ,
|
||||
rf_EODagSelect,
|
||||
rf_MapSIDToPSIDRAID5,
|
||||
rf_GetDefaultHeadSepLimitDeclustered,
|
||||
rf_GetDefaultNumFloatingReconBuffersPQ,
|
||||
NULL, NULL,
|
||||
NULL, /* no reconstruction, yet */
|
||||
rf_VerifyParityEvenOdd,
|
||||
2,
|
||||
DefaultStates,
|
||||
0)
|
||||
},
|
||||
#endif /* RF_INCLUDE_EVENODD > 0 */
|
||||
|
||||
#if RF_INCLUDE_PARITYLOGGING > 0
|
||||
/* parity logging */
|
||||
{'L', "Parity logging",
|
||||
RF_NK2(rf_MakeLayoutSpecificNULL, NULL)
|
||||
RF_NU(
|
||||
rf_ConfigureParityLogging,
|
||||
rf_MapSectorParityLogging, rf_MapParityParityLogging, NULL,
|
||||
rf_IdentifyStripeParityLogging,
|
||||
rf_ParityLoggingDagSelect,
|
||||
rf_MapSIDToPSIDParityLogging,
|
||||
rf_GetDefaultHeadSepLimitParityLogging,
|
||||
rf_GetDefaultNumFloatingReconBuffersParityLogging,
|
||||
NULL, NULL,
|
||||
rf_SubmitReconBufferBasic,
|
||||
NULL,
|
||||
1,
|
||||
DefaultStates,
|
||||
0)
|
||||
},
|
||||
#endif /* RF_INCLUDE_PARITYLOGGING > 0 */
|
||||
|
||||
/* end-of-list marker */
|
||||
{ '\0', NULL,
|
||||
RF_NK2(NULL, NULL)
|
||||
RF_NU(
|
||||
NULL,
|
||||
NULL, NULL, NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL, NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
0,
|
||||
NULL,
|
||||
0)
|
||||
}
|
||||
};
|
||||
|
||||
RF_LayoutSW_t *rf_GetLayout(RF_ParityConfig_t parityConfig)
|
||||
{
|
||||
RF_LayoutSW_t *p;
|
||||
|
||||
/* look up the specific layout */
|
||||
for (p=&mapsw[0]; p->parityConfig; p++)
|
||||
if (p->parityConfig == parityConfig)
|
||||
break;
|
||||
if (!p->parityConfig)
|
||||
return(NULL);
|
||||
RF_ASSERT(p->parityConfig == parityConfig);
|
||||
return(p);
|
||||
}
|
||||
|
||||
#if RF_UTILITY == 0
|
||||
/*****************************************************************************************
|
||||
*
|
||||
* ConfigureLayout --
|
||||
*
|
||||
* read the configuration file and set up the RAID layout parameters. After reading
|
||||
* common params, invokes the layout-specific configuration routine to finish
|
||||
* the configuration.
|
||||
*
|
||||
****************************************************************************************/
|
||||
int rf_ConfigureLayout(
|
||||
RF_ShutdownList_t **listp,
|
||||
RF_Raid_t *raidPtr,
|
||||
RF_Config_t *cfgPtr)
|
||||
{
|
||||
RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
|
||||
RF_ParityConfig_t parityConfig;
|
||||
RF_LayoutSW_t *p;
|
||||
int retval;
|
||||
|
||||
layoutPtr->sectorsPerStripeUnit = cfgPtr->sectPerSU;
|
||||
layoutPtr->SUsPerPU = cfgPtr->SUsPerPU;
|
||||
layoutPtr->SUsPerRU = cfgPtr->SUsPerRU;
|
||||
parityConfig = cfgPtr->parityConfig;
|
||||
|
||||
layoutPtr->stripeUnitsPerDisk = raidPtr->sectorsPerDisk / layoutPtr->sectorsPerStripeUnit;
|
||||
|
||||
p = rf_GetLayout(parityConfig);
|
||||
if (p == NULL) {
|
||||
RF_ERRORMSG1("Unknown parity configuration '%c'", parityConfig);
|
||||
return(EINVAL);
|
||||
}
|
||||
RF_ASSERT(p->parityConfig == parityConfig);
|
||||
layoutPtr->map = p;
|
||||
|
||||
/* initialize the specific layout */
|
||||
|
||||
retval = (p->Configure)(listp, raidPtr, cfgPtr);
|
||||
|
||||
if (retval)
|
||||
return(retval);
|
||||
|
||||
layoutPtr->dataBytesPerStripe = layoutPtr->dataSectorsPerStripe << raidPtr->logBytesPerSector;
|
||||
raidPtr->sectorsPerDisk = layoutPtr->stripeUnitsPerDisk * layoutPtr->sectorsPerStripeUnit;
|
||||
|
||||
if (rf_forceNumFloatingReconBufs >= 0) {
|
||||
raidPtr->numFloatingReconBufs = rf_forceNumFloatingReconBufs;
|
||||
}
|
||||
else {
|
||||
raidPtr->numFloatingReconBufs = rf_GetDefaultNumFloatingReconBuffers(raidPtr);
|
||||
}
|
||||
|
||||
if (rf_forceHeadSepLimit >= 0) {
|
||||
raidPtr->headSepLimit = rf_forceHeadSepLimit;
|
||||
}
|
||||
else {
|
||||
raidPtr->headSepLimit = rf_GetDefaultHeadSepLimit(raidPtr);
|
||||
}
|
||||
|
||||
printf("RAIDFRAME: Configure (%s): total number of sectors is %lu (%lu MB)\n",
|
||||
layoutPtr->map->configName,
|
||||
(unsigned long)raidPtr->totalSectors,
|
||||
(unsigned long)(raidPtr->totalSectors / 1024 * (1<<raidPtr->logBytesPerSector) / 1024));
|
||||
if (raidPtr->headSepLimit >= 0) {
|
||||
printf("RAIDFRAME(%s): Using %ld floating recon bufs with head sep limit %ld\n",
|
||||
layoutPtr->map->configName, (long)raidPtr->numFloatingReconBufs, (long)raidPtr->headSepLimit);
|
||||
}
|
||||
else {
|
||||
printf("RAIDFRAME(%s): Using %ld floating recon bufs with no head sep limit\n",
|
||||
layoutPtr->map->configName, (long)raidPtr->numFloatingReconBufs);
|
||||
}
|
||||
|
||||
return(0);
|
||||
}
|
||||
|
||||
/* typically there is a 1-1 mapping between stripes and parity stripes.
|
||||
* however, the declustering code supports packing multiple stripes into
|
||||
* a single parity stripe, so as to increase the size of the reconstruction
|
||||
* unit without affecting the size of the stripe unit. This routine finds
|
||||
* the parity stripe identifier associated with a stripe ID. There is also
|
||||
* a RaidAddressToParityStripeID macro in layout.h
|
||||
*/
|
||||
RF_StripeNum_t rf_MapStripeIDToParityStripeID(layoutPtr, stripeID, which_ru)
|
||||
RF_RaidLayout_t *layoutPtr;
|
||||
RF_StripeNum_t stripeID;
|
||||
RF_ReconUnitNum_t *which_ru;
|
||||
{
|
||||
RF_StripeNum_t parityStripeID;
|
||||
|
||||
/* quick exit in the common case of SUsPerPU==1 */
|
||||
if ((layoutPtr->SUsPerPU == 1) || !layoutPtr->map->MapSIDToPSID) {
|
||||
*which_ru = 0;
|
||||
return(stripeID);
|
||||
}
|
||||
else {
|
||||
(layoutPtr->map->MapSIDToPSID)(layoutPtr, stripeID, &parityStripeID, which_ru);
|
||||
}
|
||||
return(parityStripeID);
|
||||
}
|
||||
#endif /* RF_UTILITY == 0 */
|
|
@ -0,0 +1,492 @@
|
|||
/* $NetBSD: rf_layout.h,v 1.1 1998/11/13 04:20:30 oster Exp $ */
|
||||
/*
|
||||
* Copyright (c) 1995 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Author: Mark Holland
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and
|
||||
* its documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
||||
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*/
|
||||
|
||||
/* rf_layout.h -- header file defining layout data structures
|
||||
*/
|
||||
|
||||
/*
|
||||
* :
|
||||
* Log: rf_layout.h,v
|
||||
* Revision 1.50 1996/11/05 21:10:40 jimz
|
||||
* failed pda generalization
|
||||
*
|
||||
* Revision 1.49 1996/07/29 14:05:12 jimz
|
||||
* fix numPUs/numRUs confusion (everything is now numRUs)
|
||||
* clean up some commenting, return values
|
||||
*
|
||||
* Revision 1.48 1996/07/22 19:52:16 jimz
|
||||
* switched node params to RF_DagParam_t, a union of
|
||||
* a 64-bit int and a void *, for better portability
|
||||
* attempted hpux port, but failed partway through for
|
||||
* lack of a single C compiler capable of compiling all
|
||||
* source files
|
||||
*
|
||||
* Revision 1.47 1996/07/18 22:57:14 jimz
|
||||
* port simulator to AIX
|
||||
*
|
||||
* Revision 1.46 1996/07/13 00:00:59 jimz
|
||||
* sanitized generalized reconstruction architecture
|
||||
* cleaned up head sep, rbuf problems
|
||||
*
|
||||
* Revision 1.45 1996/07/11 19:08:00 jimz
|
||||
* generalize reconstruction mechanism
|
||||
* allow raid1 reconstructs via copyback (done with array
|
||||
* quiesced, not online, therefore not disk-directed)
|
||||
*
|
||||
* Revision 1.44 1996/06/19 22:23:01 jimz
|
||||
* parity verification is now a layout-configurable thing
|
||||
* not all layouts currently support it (correctly, anyway)
|
||||
*
|
||||
* Revision 1.43 1996/06/19 17:53:48 jimz
|
||||
* move GetNumSparePUs, InstallSpareTable ops into layout switch
|
||||
*
|
||||
* Revision 1.42 1996/06/19 14:56:48 jimz
|
||||
* move layout-specific config parsing hooks into RF_LayoutSW_t
|
||||
* table in rf_layout.c
|
||||
*
|
||||
* Revision 1.41 1996/06/10 11:55:47 jimz
|
||||
* Straightened out some per-array/not-per-array distinctions, fixed
|
||||
* a couple bugs related to confusion. Added shutdown lists. Removed
|
||||
* layout shutdown function (now subsumed by shutdown lists).
|
||||
*
|
||||
* Revision 1.40 1996/06/07 22:26:27 jimz
|
||||
* type-ify which_ru (RF_ReconUnitNum_t)
|
||||
*
|
||||
* Revision 1.39 1996/06/07 21:33:04 jimz
|
||||
* begin using consistent types for sector numbers,
|
||||
* stripe numbers, row+col numbers, recon unit numbers
|
||||
*
|
||||
* Revision 1.38 1996/06/03 23:28:26 jimz
|
||||
* more bugfixes
|
||||
* check in tree to sync for IPDS runs with current bugfixes
|
||||
* there still may be a problem with threads in the script test
|
||||
* getting I/Os stuck- not trivially reproducible (runs ~50 times
|
||||
* in a row without getting stuck)
|
||||
*
|
||||
* Revision 1.37 1996/05/31 22:26:54 jimz
|
||||
* fix a lot of mapping problems, memory allocation problems
|
||||
* found some weird lock issues, fixed 'em
|
||||
* more code cleanup
|
||||
*
|
||||
* Revision 1.36 1996/05/30 11:29:41 jimz
|
||||
* Numerous bug fixes. Stripe lock release code disagreed with the taking code
|
||||
* about when stripes should be locked (I made it consistent: no parity, no lock)
|
||||
* There was a lot of extra serialization of I/Os which I've removed- a lot of
|
||||
* it was to calculate values for the cache code, which is no longer with us.
|
||||
* More types, function, macro cleanup. Added code to properly quiesce the array
|
||||
* on shutdown. Made a lot of stuff array-specific which was (bogusly) general
|
||||
* before. Fixed memory allocation, freeing bugs.
|
||||
*
|
||||
* Revision 1.35 1996/05/27 18:56:37 jimz
|
||||
* more code cleanup
|
||||
* better typing
|
||||
* compiles in all 3 environments
|
||||
*
|
||||
* Revision 1.34 1996/05/24 22:17:04 jimz
|
||||
* continue code + namespace cleanup
|
||||
* typed a bunch of flags
|
||||
*
|
||||
* Revision 1.33 1996/05/24 04:28:55 jimz
|
||||
* release cleanup ckpt
|
||||
*
|
||||
* Revision 1.32 1996/05/24 01:59:45 jimz
|
||||
* another checkpoint in code cleanup for release
|
||||
* time to sync kernel tree
|
||||
*
|
||||
* Revision 1.31 1996/05/23 21:46:35 jimz
|
||||
* checkpoint in code cleanup (release prep)
|
||||
* lots of types, function names have been fixed
|
||||
*
|
||||
* Revision 1.30 1996/05/18 19:51:34 jimz
|
||||
* major code cleanup- fix syntax, make some types consistent,
|
||||
* add prototypes, clean out dead code, et cetera
|
||||
*
|
||||
* Revision 1.29 1995/12/01 19:16:19 root
|
||||
* added copyright info
|
||||
*
|
||||
* Revision 1.28 1995/11/28 21:26:49 amiri
|
||||
* defined a declustering flag RF_BD_DECLUSTERED
|
||||
*
|
||||
* Revision 1.27 1995/11/17 19:00:59 wvcii
|
||||
* created MapQ entry in switch table
|
||||
* added prototyping to MapParity
|
||||
*
|
||||
* Revision 1.26 1995/11/07 15:40:27 wvcii
|
||||
* changed prototype of SeclectionFunc in mapsw
|
||||
* function no longer returns numHdrSucc, numTermAnt
|
||||
*
|
||||
* Revision 1.25 1995/10/12 20:57:08 arw
|
||||
* added lots of comments
|
||||
*
|
||||
* Revision 1.24 1995/10/12 16:04:08 jimz
|
||||
* added config name to mapsw
|
||||
*
|
||||
* Revision 1.23 1995/07/26 03:28:31 robby
|
||||
* intermediary checkin
|
||||
*
|
||||
* Revision 1.22 1995/07/10 20:51:08 robby
|
||||
* added to the asm info for the virtual striping locks
|
||||
*
|
||||
* Revision 1.21 1995/07/10 16:57:47 robby
|
||||
* updated alloclistelem struct to the correct struct name
|
||||
*
|
||||
* Revision 1.20 1995/07/08 20:06:11 rachad
|
||||
* *** empty log message ***
|
||||
*
|
||||
* Revision 1.19 1995/07/08 18:05:39 rachad
|
||||
* Linked up Claudsons code with the real cache
|
||||
*
|
||||
* Revision 1.18 1995/07/06 14:29:36 robby
|
||||
* added defaults states list to the layout switch
|
||||
*
|
||||
* Revision 1.17 1995/06/23 13:40:14 robby
|
||||
* updeated to prototypes in rf_layout.h
|
||||
*
|
||||
* Revision 1.16 1995/06/08 22:11:03 holland
|
||||
* bug fixes related to mutiple-row arrays
|
||||
*
|
||||
* Revision 1.15 1995/05/24 21:43:23 wvcii
|
||||
* added field numParityLogCol to RaidLayout
|
||||
*
|
||||
* Revision 1.14 95/05/02 22:46:53 holland
|
||||
* minor code cleanups.
|
||||
*
|
||||
* Revision 1.13 1995/05/02 12:48:01 holland
|
||||
* eliminated some unused code.
|
||||
*
|
||||
* Revision 1.12 1995/05/01 13:28:00 holland
|
||||
* parity range locks, locking disk requests, recon+parityscan in kernel, etc.
|
||||
*
|
||||
* Revision 1.11 1995/03/15 20:01:17 holland
|
||||
* added REMAP and DONT_REMAP
|
||||
*
|
||||
* Revision 1.10 1995/03/09 19:54:11 rachad
|
||||
* Added suport for threadless simulator
|
||||
*
|
||||
* Revision 1.9 1995/03/03 21:48:58 holland
|
||||
* minor changes.
|
||||
*
|
||||
* Revision 1.8 1995/03/01 20:25:48 holland
|
||||
* kernelization changes
|
||||
*
|
||||
* Revision 1.7 1995/02/03 22:31:36 holland
|
||||
* many changes related to kernelization
|
||||
*
|
||||
* Revision 1.6 1995/01/30 14:53:46 holland
|
||||
* extensive changes related to making DoIO non-blocking
|
||||
*
|
||||
* Revision 1.5 1995/01/24 23:58:46 holland
|
||||
* multi-way recon XOR, plus various small changes
|
||||
*
|
||||
* Revision 1.4 1995/01/04 19:28:35 holland
|
||||
* corrected comments around mapsw
|
||||
*
|
||||
* Revision 1.3 1994/11/28 22:15:45 danner
|
||||
* Added type field to the physdiskaddr struct.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _RF__RF_LAYOUT_H_
|
||||
#define _RF__RF_LAYOUT_H_
|
||||
|
||||
#include "rf_types.h"
|
||||
#include "rf_archs.h"
|
||||
#include "rf_alloclist.h"
|
||||
|
||||
/*****************************************************************************************
|
||||
*
|
||||
* This structure identifies all layout-specific operations and parameters.
|
||||
*
|
||||
****************************************************************************************/
|
||||
|
||||
typedef struct RF_LayoutSW_s {
|
||||
RF_ParityConfig_t parityConfig;
|
||||
char *configName;
|
||||
|
||||
#ifndef KERNEL
|
||||
/* layout-specific parsing */
|
||||
int (*MakeLayoutSpecific)(FILE *fp, RF_Config_t *cfgPtr, void *arg);
|
||||
void *makeLayoutSpecificArg;
|
||||
#endif /* !KERNEL */
|
||||
|
||||
#if RF_UTILITY == 0
|
||||
/* initialization routine */
|
||||
int (*Configure)(RF_ShutdownList_t **shutdownListp, RF_Raid_t *raidPtr, RF_Config_t *cfgPtr);
|
||||
|
||||
/* routine to map RAID sector address -> physical (row, col, offset) */
|
||||
void (*MapSector)(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector,
|
||||
RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap);
|
||||
|
||||
/* routine to map RAID sector address -> physical (r,c,o) of parity unit */
|
||||
void (*MapParity)(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector,
|
||||
RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap);
|
||||
|
||||
/* routine to map RAID sector address -> physical (r,c,o) of Q unit */
|
||||
void (*MapQ)(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector, RF_RowCol_t *row,
|
||||
RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap);
|
||||
|
||||
/* routine to identify the disks comprising a stripe */
|
||||
void (*IdentifyStripe)(RF_Raid_t *raidPtr, RF_RaidAddr_t addr,
|
||||
RF_RowCol_t **diskids, RF_RowCol_t *outRow);
|
||||
|
||||
/* routine to select a dag */
|
||||
void (*SelectionFunc)(RF_Raid_t *raidPtr, RF_IoType_t type,
|
||||
RF_AccessStripeMap_t *asmap,
|
||||
RF_VoidFuncPtr *);
|
||||
#if 0
|
||||
void (**createFunc)(RF_Raid_t *,
|
||||
RF_AccessStripeMap_t *,
|
||||
RF_DagHeader_t *, void *,
|
||||
RF_RaidAccessFlags_t,
|
||||
RF_AllocListElem_t *));
|
||||
|
||||
#endif
|
||||
|
||||
/* map a stripe ID to a parity stripe ID. This is typically the identity mapping */
|
||||
void (*MapSIDToPSID)(RF_RaidLayout_t *layoutPtr, RF_StripeNum_t stripeID,
|
||||
RF_StripeNum_t *psID, RF_ReconUnitNum_t *which_ru);
|
||||
|
||||
/* get default head separation limit (may be NULL) */
|
||||
RF_HeadSepLimit_t (*GetDefaultHeadSepLimit)(RF_Raid_t *raidPtr);
|
||||
|
||||
/* get default num recon buffers (may be NULL) */
|
||||
int (*GetDefaultNumFloatingReconBuffers)(RF_Raid_t *raidPtr);
|
||||
|
||||
/* get number of spare recon units (may be NULL) */
|
||||
RF_ReconUnitCount_t (*GetNumSpareRUs)(RF_Raid_t *raidPtr);
|
||||
|
||||
/* spare table installation (may be NULL) */
|
||||
int (*InstallSpareTable)(RF_Raid_t *raidPtr, RF_RowCol_t frow, RF_RowCol_t fcol);
|
||||
|
||||
/* recon buffer submission function */
|
||||
int (*SubmitReconBuffer)(RF_ReconBuffer_t *rbuf, int keep_it,
|
||||
int use_committed);
|
||||
|
||||
/*
|
||||
* verify that parity information for a stripe is correct
|
||||
* see rf_parityscan.h for return vals
|
||||
*/
|
||||
int (*VerifyParity)(RF_Raid_t *raidPtr, RF_RaidAddr_t raidAddr,
|
||||
RF_PhysDiskAddr_t *parityPDA, int correct_it, RF_RaidAccessFlags_t flags);
|
||||
|
||||
/* number of faults tolerated by this mapping */
|
||||
int faultsTolerated;
|
||||
|
||||
/* states to step through in an access. Must end with "LastState".
|
||||
* The default is DefaultStates in rf_layout.c */
|
||||
RF_AccessState_t *states;
|
||||
|
||||
RF_AccessStripeMapFlags_t flags;
|
||||
#endif /* RF_UTILITY == 0 */
|
||||
} RF_LayoutSW_t;
|
||||
|
||||
/* enables remapping to spare location under dist sparing */
|
||||
#define RF_REMAP 1
|
||||
#define RF_DONT_REMAP 0
|
||||
|
||||
/*
|
||||
* Flags values for RF_AccessStripeMapFlags_t
|
||||
*/
|
||||
#define RF_NO_STRIPE_LOCKS 0x0001 /* suppress stripe locks */
|
||||
#define RF_DISTRIBUTE_SPARE 0x0002 /* distribute spare space in archs that support it */
|
||||
#define RF_BD_DECLUSTERED 0x0004 /* declustering uses block designs */
|
||||
|
||||
/*************************************************************************
|
||||
*
|
||||
* this structure forms the layout component of the main Raid
|
||||
* structure. It describes everything needed to define and perform
|
||||
* the mapping of logical RAID addresses <-> physical disk addresses.
|
||||
*
|
||||
*************************************************************************/
|
||||
struct RF_RaidLayout_s {
|
||||
/* configuration parameters */
|
||||
RF_SectorCount_t sectorsPerStripeUnit; /* number of sectors in one stripe unit */
|
||||
RF_StripeCount_t SUsPerPU; /* stripe units per parity unit */
|
||||
RF_StripeCount_t SUsPerRU; /* stripe units per reconstruction unit */
|
||||
|
||||
/* redundant-but-useful info computed from the above, used in all layouts */
|
||||
RF_StripeCount_t numStripe; /* total number of stripes in the array */
|
||||
RF_SectorCount_t dataSectorsPerStripe;
|
||||
RF_StripeCount_t dataStripeUnitsPerDisk;
|
||||
u_int bytesPerStripeUnit;
|
||||
u_int dataBytesPerStripe;
|
||||
RF_StripeCount_t numDataCol; /* number of SUs of data per stripe (name here is a la RAID4) */
|
||||
RF_StripeCount_t numParityCol; /* number of SUs of parity per stripe. Always 1 for now */
|
||||
RF_StripeCount_t numParityLogCol; /* number of SUs of parity log per stripe. Always 1 for now */
|
||||
RF_StripeCount_t stripeUnitsPerDisk;
|
||||
|
||||
RF_LayoutSW_t *map; /* ptr to struct holding mapping fns and information */
|
||||
void *layoutSpecificInfo; /* ptr to a structure holding layout-specific params */
|
||||
};
|
||||
|
||||
/*****************************************************************************************
|
||||
*
|
||||
* The mapping code returns a pointer to a list of AccessStripeMap structures, which
|
||||
* describes all the mapping information about an access. The list contains one
|
||||
* AccessStripeMap structure per stripe touched by the access. Each element in the list
|
||||
* contains a stripe identifier and a pointer to a list of PhysDiskAddr structuress. Each
|
||||
* element in this latter list describes the physical location of a stripe unit accessed
|
||||
* within the corresponding stripe.
|
||||
*
|
||||
****************************************************************************************/
|
||||
|
||||
#define RF_PDA_TYPE_DATA 0
|
||||
#define RF_PDA_TYPE_PARITY 1
|
||||
#define RF_PDA_TYPE_Q 2
|
||||
|
||||
struct RF_PhysDiskAddr_s {
|
||||
RF_RowCol_t row,col; /* disk identifier */
|
||||
RF_SectorNum_t startSector; /* sector offset into the disk */
|
||||
RF_SectorCount_t numSector; /* number of sectors accessed */
|
||||
int type; /* used by higher levels: currently, data, parity, or q */
|
||||
caddr_t bufPtr; /* pointer to buffer supplying/receiving data */
|
||||
RF_RaidAddr_t raidAddress; /* raid address corresponding to this physical disk address */
|
||||
RF_PhysDiskAddr_t *next;
|
||||
};
|
||||
|
||||
#define RF_MAX_FAILED_PDA RF_MAXCOL
|
||||
|
||||
struct RF_AccessStripeMap_s {
|
||||
RF_StripeNum_t stripeID; /* the stripe index */
|
||||
RF_RaidAddr_t raidAddress; /* the starting raid address within this stripe */
|
||||
RF_RaidAddr_t endRaidAddress; /* raid address one sector past the end of the access */
|
||||
RF_SectorCount_t totalSectorsAccessed; /* total num sectors identified in physInfo list */
|
||||
RF_StripeCount_t numStripeUnitsAccessed; /* total num elements in physInfo list */
|
||||
int numDataFailed; /* number of failed data disks accessed */
|
||||
int numParityFailed; /* number of failed parity disks accessed (0 or 1) */
|
||||
int numQFailed; /* number of failed Q units accessed (0 or 1) */
|
||||
RF_AccessStripeMapFlags_t flags; /* various flags */
|
||||
#if 0
|
||||
RF_PhysDiskAddr_t *failedPDA; /* points to the PDA that has failed */
|
||||
RF_PhysDiskAddr_t *failedPDAtwo; /* points to the second PDA that has failed, if any */
|
||||
#else
|
||||
int numFailedPDAs; /* number of failed phys addrs */
|
||||
RF_PhysDiskAddr_t *failedPDAs[RF_MAX_FAILED_PDA]; /* array of failed phys addrs */
|
||||
#endif
|
||||
RF_PhysDiskAddr_t *physInfo; /* a list of PhysDiskAddr structs */
|
||||
RF_PhysDiskAddr_t *parityInfo; /* list of physical addrs for the parity (P of P + Q ) */
|
||||
RF_PhysDiskAddr_t *qInfo; /* list of physical addrs for the Q of P + Q */
|
||||
RF_LockReqDesc_t lockReqDesc; /* used for stripe locking */
|
||||
RF_RowCol_t origRow; /* the original row: we may redirect the acc to a different row */
|
||||
RF_AccessStripeMap_t *next;
|
||||
};
|
||||
|
||||
/* flag values */
|
||||
#define RF_ASM_REDIR_LARGE_WRITE 0x00000001 /* allows large-write creation code to redirect failed accs */
|
||||
#define RF_ASM_BAILOUT_DAG_USED 0x00000002 /* allows us to detect recursive calls to the bailout write dag */
|
||||
#define RF_ASM_FLAGS_LOCK_TRIED 0x00000004 /* we've acquired the lock on the first parity range in this parity stripe */
|
||||
#define RF_ASM_FLAGS_LOCK_TRIED2 0x00000008 /* we've acquired the lock on the 2nd parity range in this parity stripe */
|
||||
#define RF_ASM_FLAGS_FORCE_TRIED 0x00000010 /* we've done the force-recon call on this parity stripe */
|
||||
#define RF_ASM_FLAGS_RECON_BLOCKED 0x00000020 /* we blocked recon => we must unblock it later */
|
||||
|
||||
struct RF_AccessStripeMapHeader_s {
|
||||
RF_StripeCount_t numStripes; /* total number of stripes touched by this acc */
|
||||
RF_AccessStripeMap_t *stripeMap; /* pointer to the actual map. Also used for making lists */
|
||||
RF_AccessStripeMapHeader_t *next;
|
||||
};
|
||||
|
||||
/*****************************************************************************************
|
||||
*
|
||||
* various routines mapping addresses in the RAID address space. These work across
|
||||
* all layouts. DON'T PUT ANY LAYOUT-SPECIFIC CODE HERE.
|
||||
*
|
||||
****************************************************************************************/
|
||||
|
||||
/* return the identifier of the stripe containing the given address */
|
||||
#define rf_RaidAddressToStripeID(_layoutPtr_, _addr_) \
|
||||
( ((_addr_) / (_layoutPtr_)->sectorsPerStripeUnit) / (_layoutPtr_)->numDataCol )
|
||||
|
||||
/* return the raid address of the start of the indicates stripe ID */
|
||||
#define rf_StripeIDToRaidAddress(_layoutPtr_, _sid_) \
|
||||
( ((_sid_) * (_layoutPtr_)->sectorsPerStripeUnit) * (_layoutPtr_)->numDataCol )
|
||||
|
||||
/* return the identifier of the stripe containing the given stripe unit id */
|
||||
#define rf_StripeUnitIDToStripeID(_layoutPtr_, _addr_) \
|
||||
( (_addr_) / (_layoutPtr_)->numDataCol )
|
||||
|
||||
/* return the identifier of the stripe unit containing the given address */
|
||||
#define rf_RaidAddressToStripeUnitID(_layoutPtr_, _addr_) \
|
||||
( ((_addr_) / (_layoutPtr_)->sectorsPerStripeUnit) )
|
||||
|
||||
/* return the RAID address of next stripe boundary beyond the given address */
|
||||
#define rf_RaidAddressOfNextStripeBoundary(_layoutPtr_, _addr_) \
|
||||
( (((_addr_)/(_layoutPtr_)->dataSectorsPerStripe)+1) * (_layoutPtr_)->dataSectorsPerStripe )
|
||||
|
||||
/* return the RAID address of the start of the stripe containing the given address */
|
||||
#define rf_RaidAddressOfPrevStripeBoundary(_layoutPtr_, _addr_) \
|
||||
( (((_addr_)/(_layoutPtr_)->dataSectorsPerStripe)+0) * (_layoutPtr_)->dataSectorsPerStripe )
|
||||
|
||||
/* return the RAID address of next stripe unit boundary beyond the given address */
|
||||
#define rf_RaidAddressOfNextStripeUnitBoundary(_layoutPtr_, _addr_) \
|
||||
( (((_addr_)/(_layoutPtr_)->sectorsPerStripeUnit)+1L)*(_layoutPtr_)->sectorsPerStripeUnit )
|
||||
|
||||
/* return the RAID address of the start of the stripe unit containing RAID address _addr_ */
|
||||
#define rf_RaidAddressOfPrevStripeUnitBoundary(_layoutPtr_, _addr_) \
|
||||
( (((_addr_)/(_layoutPtr_)->sectorsPerStripeUnit)+0)*(_layoutPtr_)->sectorsPerStripeUnit )
|
||||
|
||||
/* returns the offset into the stripe. used by RaidAddressStripeAligned */
|
||||
#define rf_RaidAddressStripeOffset(_layoutPtr_, _addr_) \
|
||||
( (_addr_) % ((_layoutPtr_)->dataSectorsPerStripe) )
|
||||
|
||||
/* returns the offset into the stripe unit. */
|
||||
#define rf_StripeUnitOffset(_layoutPtr_, _addr_) \
|
||||
( (_addr_) % ((_layoutPtr_)->sectorsPerStripeUnit) )
|
||||
|
||||
/* returns nonzero if the given RAID address is stripe-aligned */
|
||||
#define rf_RaidAddressStripeAligned( __layoutPtr__, __addr__ ) \
|
||||
( rf_RaidAddressStripeOffset(__layoutPtr__, __addr__) == 0 )
|
||||
|
||||
/* returns nonzero if the given address is stripe-unit aligned */
|
||||
#define rf_StripeUnitAligned( __layoutPtr__, __addr__ ) \
|
||||
( rf_StripeUnitOffset(__layoutPtr__, __addr__) == 0 )
|
||||
|
||||
/* convert an address expressed in RAID blocks to/from an addr expressed in bytes */
|
||||
#define rf_RaidAddressToByte(_raidPtr_, _addr_) \
|
||||
( (_addr_) << ( (_raidPtr_)->logBytesPerSector ) )
|
||||
|
||||
#define rf_ByteToRaidAddress(_raidPtr_, _addr_) \
|
||||
( (_addr_) >> ( (_raidPtr_)->logBytesPerSector ) )
|
||||
|
||||
/* convert a raid address to/from a parity stripe ID. Conversion to raid address is easy,
|
||||
* since we're asking for the address of the first sector in the parity stripe. Conversion to a
|
||||
* parity stripe ID is more complex, since stripes are not contiguously allocated in
|
||||
* parity stripes.
|
||||
*/
|
||||
#define rf_RaidAddressToParityStripeID(_layoutPtr_, _addr_, _ru_num_) \
|
||||
rf_MapStripeIDToParityStripeID( (_layoutPtr_), rf_RaidAddressToStripeID( (_layoutPtr_), (_addr_) ), (_ru_num_) )
|
||||
|
||||
#define rf_ParityStripeIDToRaidAddress(_layoutPtr_, _psid_) \
|
||||
( (_psid_) * (_layoutPtr_)->SUsPerPU * (_layoutPtr_)->numDataCol * (_layoutPtr_)->sectorsPerStripeUnit )
|
||||
|
||||
RF_LayoutSW_t *rf_GetLayout(RF_ParityConfig_t parityConfig);
|
||||
int rf_ConfigureLayout(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
|
||||
RF_Config_t *cfgPtr);
|
||||
RF_StripeNum_t rf_MapStripeIDToParityStripeID(RF_RaidLayout_t *layoutPtr,
|
||||
RF_StripeNum_t stripeID, RF_ReconUnitNum_t *which_ru);
|
||||
|
||||
#endif /* !_RF__RF_LAYOUT_H_ */
|
|
@ -0,0 +1,975 @@
|
|||
/* $NetBSD: rf_map.c,v 1.1 1998/11/13 04:20:31 oster Exp $ */
|
||||
/*
|
||||
* Copyright (c) 1995 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Author: Mark Holland
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and
|
||||
* its documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
||||
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*/
|
||||
|
||||
/**************************************************************************
|
||||
*
|
||||
* map.c -- main code for mapping RAID addresses to physical disk addresses
|
||||
*
|
||||
**************************************************************************/
|
||||
|
||||
/*
|
||||
* :
|
||||
* Log: rf_map.c,v
|
||||
* Revision 1.53 1996/11/05 21:10:40 jimz
|
||||
* failed pda generalization
|
||||
*
|
||||
* Revision 1.52 1996/08/20 19:58:39 jimz
|
||||
* initialize numParityFailed and numQFailed to 0 in MarkFailuresInASMList
|
||||
*
|
||||
* Revision 1.51 1996/08/19 22:26:31 jimz
|
||||
* add Chang's bugfixes for double-disk failures in MarkFailuresInASMList
|
||||
*
|
||||
* Revision 1.50 1996/08/19 21:38:06 jimz
|
||||
* stripeOffset was uninitialized in CheckStripeForFailures
|
||||
*
|
||||
* Revision 1.49 1996/07/31 15:34:56 jimz
|
||||
* evenodd changes; bugfixes for double-degraded archs, generalize
|
||||
* some formerly PQ-only functions
|
||||
*
|
||||
* Revision 1.48 1996/07/27 23:36:08 jimz
|
||||
* Solaris port of simulator
|
||||
*
|
||||
* Revision 1.47 1996/07/22 19:52:16 jimz
|
||||
* switched node params to RF_DagParam_t, a union of
|
||||
* a 64-bit int and a void *, for better portability
|
||||
* attempted hpux port, but failed partway through for
|
||||
* lack of a single C compiler capable of compiling all
|
||||
* source files
|
||||
*
|
||||
* Revision 1.46 1996/06/10 12:50:57 jimz
|
||||
* Add counters to freelists to track number of allocations, frees,
|
||||
* grows, max size, etc. Adjust a couple sets of PRIME params based
|
||||
* on the results.
|
||||
*
|
||||
* Revision 1.45 1996/06/10 11:55:47 jimz
|
||||
* Straightened out some per-array/not-per-array distinctions, fixed
|
||||
* a couple bugs related to confusion. Added shutdown lists. Removed
|
||||
* layout shutdown function (now subsumed by shutdown lists).
|
||||
*
|
||||
* Revision 1.44 1996/06/09 02:36:46 jimz
|
||||
* lots of little crufty cleanup- fixup whitespace
|
||||
* issues, comment #ifdefs, improve typing in some
|
||||
* places (esp size-related)
|
||||
*
|
||||
* Revision 1.43 1996/06/07 21:33:04 jimz
|
||||
* begin using consistent types for sector numbers,
|
||||
* stripe numbers, row+col numbers, recon unit numbers
|
||||
*
|
||||
* Revision 1.42 1996/06/05 18:06:02 jimz
|
||||
* Major code cleanup. The Great Renaming is now done.
|
||||
* Better modularity. Better typing. Fixed a bunch of
|
||||
* synchronization bugs. Made a lot of global stuff
|
||||
* per-desc or per-array. Removed dead code.
|
||||
*
|
||||
* Revision 1.41 1996/06/03 23:28:26 jimz
|
||||
* more bugfixes
|
||||
* check in tree to sync for IPDS runs with current bugfixes
|
||||
* there still may be a problem with threads in the script test
|
||||
* getting I/Os stuck- not trivially reproducible (runs ~50 times
|
||||
* in a row without getting stuck)
|
||||
*
|
||||
* Revision 1.40 1996/05/31 22:26:54 jimz
|
||||
* fix a lot of mapping problems, memory allocation problems
|
||||
* found some weird lock issues, fixed 'em
|
||||
* more code cleanup
|
||||
*
|
||||
* Revision 1.39 1996/05/30 23:22:16 jimz
|
||||
* bugfixes of serialization, timing problems
|
||||
* more cleanup
|
||||
*
|
||||
* Revision 1.38 1996/05/30 11:29:41 jimz
|
||||
* Numerous bug fixes. Stripe lock release code disagreed with the taking code
|
||||
* about when stripes should be locked (I made it consistent: no parity, no lock)
|
||||
* There was a lot of extra serialization of I/Os which I've removed- a lot of
|
||||
* it was to calculate values for the cache code, which is no longer with us.
|
||||
* More types, function, macro cleanup. Added code to properly quiesce the array
|
||||
* on shutdown. Made a lot of stuff array-specific which was (bogusly) general
|
||||
* before. Fixed memory allocation, freeing bugs.
|
||||
*
|
||||
* Revision 1.37 1996/05/27 18:56:37 jimz
|
||||
* more code cleanup
|
||||
* better typing
|
||||
* compiles in all 3 environments
|
||||
*
|
||||
* Revision 1.36 1996/05/23 21:46:35 jimz
|
||||
* checkpoint in code cleanup (release prep)
|
||||
* lots of types, function names have been fixed
|
||||
*
|
||||
* Revision 1.35 1996/05/23 00:33:23 jimz
|
||||
* code cleanup: move all debug decls to rf_options.c, all extern
|
||||
* debug decls to rf_options.h, all debug vars preceded by rf_
|
||||
*
|
||||
* Revision 1.34 1996/05/20 16:14:45 jimz
|
||||
* switch to rf_{mutex,cond}_{init,destroy}
|
||||
*
|
||||
* Revision 1.33 1996/05/18 19:51:34 jimz
|
||||
* major code cleanup- fix syntax, make some types consistent,
|
||||
* add prototypes, clean out dead code, et cetera
|
||||
*
|
||||
* Revision 1.32 1996/05/17 00:51:47 jimz
|
||||
* reformat for readability
|
||||
*
|
||||
* Revision 1.31 1996/05/16 23:06:26 jimz
|
||||
* convert asmhdr to use RF_FREELIST stuff
|
||||
*
|
||||
* Revision 1.30 1996/05/16 19:09:42 jimz
|
||||
* grow init asm freelist to 32
|
||||
*
|
||||
* Revision 1.29 1996/05/16 15:27:55 jimz
|
||||
* prime freelist pumps for asm and pda lists
|
||||
*
|
||||
* Revision 1.28 1996/05/02 14:58:35 jimz
|
||||
* legibility cleanup
|
||||
*
|
||||
* Revision 1.27 1995/12/12 18:10:06 jimz
|
||||
* MIN -> RF_MIN, MAX -> RF_MAX, ASSERT -> RF_ASSERT
|
||||
* fix 80-column brain damage in comments
|
||||
*
|
||||
* Revision 1.26 1995/12/01 19:25:06 root
|
||||
* added copyright info
|
||||
*
|
||||
* Revision 1.25 1995/11/17 19:01:57 wvcii
|
||||
* added call to MapQ in two fault tolerant case
|
||||
*
|
||||
* Revision 1.24 1995/11/17 15:10:53 wvcii
|
||||
* fixed bug in ASMCheckStatus - ASSERT was using disk sector addresses
|
||||
* rather than raidAddress
|
||||
*
|
||||
* Revision 1.23 1995/07/26 03:26:51 robby
|
||||
* map the allocation and freeing routines for some stuff non-static
|
||||
*
|
||||
* Revision 1.22 1995/06/28 09:33:45 holland
|
||||
* bug fixes related to dist sparing and multiple-row arrays
|
||||
*
|
||||
* Revision 1.21 1995/06/28 04:51:08 holland
|
||||
* added some asserts against zero-length accesses
|
||||
*
|
||||
* Revision 1.20 1995/06/23 13:40:06 robby
|
||||
* updeated to prototypes in rf_layout.h
|
||||
*
|
||||
*/
|
||||
|
||||
#include "rf_types.h"
|
||||
#include "rf_threadstuff.h"
|
||||
#include "rf_raid.h"
|
||||
#include "rf_general.h"
|
||||
#include "rf_map.h"
|
||||
#include "rf_freelist.h"
|
||||
#include "rf_shutdown.h"
|
||||
#include "rf_sys.h"
|
||||
|
||||
static void rf_FreePDAList(RF_PhysDiskAddr_t *start, RF_PhysDiskAddr_t *end, int count);
|
||||
static void rf_FreeASMList(RF_AccessStripeMap_t *start, RF_AccessStripeMap_t *end,
|
||||
int count);
|
||||
|
||||
/*****************************************************************************************
|
||||
*
|
||||
* MapAccess -- main 1st order mapping routine.
|
||||
*
|
||||
* Maps an access in the RAID address space to the corresponding set of physical disk
|
||||
* addresses. The result is returned as a list of AccessStripeMap structures, one per
|
||||
* stripe accessed. Each ASM structure contains a pointer to a list of PhysDiskAddr
|
||||
* structures, which describe the physical locations touched by the user access. Note
|
||||
* that this routine returns only static mapping information, i.e. the list of physical
|
||||
* addresses returned does not necessarily identify the set of physical locations that
|
||||
* will actually be read or written.
|
||||
*
|
||||
* The routine also maps the parity. The physical disk location returned always
|
||||
* indicates the entire parity unit, even when only a subset of it is being accessed.
|
||||
* This is because an access that is not stripe unit aligned but that spans a stripe
|
||||
* unit boundary may require access two distinct portions of the parity unit, and we
|
||||
* can't yet tell which portion(s) we'll actually need. We leave it up to the algorithm
|
||||
* selection code to decide what subset of the parity unit to access.
|
||||
*
|
||||
* Note that addresses in the RAID address space must always be maintained as
|
||||
* longs, instead of ints.
|
||||
*
|
||||
* This routine returns NULL if numBlocks is 0
|
||||
*
|
||||
****************************************************************************************/
|
||||
|
||||
RF_AccessStripeMapHeader_t *rf_MapAccess(raidPtr, raidAddress, numBlocks, buffer, remap)
|
||||
RF_Raid_t *raidPtr;
|
||||
RF_RaidAddr_t raidAddress; /* starting address in RAID address space */
|
||||
RF_SectorCount_t numBlocks; /* number of blocks in RAID address space to access */
|
||||
caddr_t buffer; /* buffer to supply/receive data */
|
||||
int remap; /* 1 => remap addresses to spare space */
|
||||
{
|
||||
RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
|
||||
RF_AccessStripeMapHeader_t *asm_hdr = NULL;
|
||||
RF_AccessStripeMap_t *asm_list = NULL, *asm_p = NULL;
|
||||
int faultsTolerated = layoutPtr->map->faultsTolerated;
|
||||
RF_RaidAddr_t startAddress = raidAddress; /* we'll change raidAddress along the way */
|
||||
RF_RaidAddr_t endAddress = raidAddress + numBlocks;
|
||||
RF_RaidDisk_t **disks = raidPtr->Disks;
|
||||
|
||||
RF_PhysDiskAddr_t *pda_p, *pda_q;
|
||||
RF_StripeCount_t numStripes = 0;
|
||||
RF_RaidAddr_t stripeRealEndAddress, stripeEndAddress, nextStripeUnitAddress;
|
||||
RF_RaidAddr_t startAddrWithinStripe, lastRaidAddr;
|
||||
RF_StripeCount_t totStripes;
|
||||
RF_StripeNum_t stripeID, lastSID, SUID, lastSUID;
|
||||
RF_AccessStripeMap_t *asmList, *t_asm;
|
||||
RF_PhysDiskAddr_t *pdaList, *t_pda;
|
||||
|
||||
/* allocate all the ASMs and PDAs up front */
|
||||
lastRaidAddr = raidAddress + numBlocks - 1 ;
|
||||
stripeID = rf_RaidAddressToStripeID(layoutPtr, raidAddress);
|
||||
lastSID = rf_RaidAddressToStripeID(layoutPtr, lastRaidAddr);
|
||||
totStripes = lastSID - stripeID + 1;
|
||||
SUID = rf_RaidAddressToStripeUnitID(layoutPtr, raidAddress);
|
||||
lastSUID = rf_RaidAddressToStripeUnitID(layoutPtr, lastRaidAddr);
|
||||
|
||||
asmList = rf_AllocASMList(totStripes);
|
||||
pdaList = rf_AllocPDAList(lastSUID - SUID + 1 + faultsTolerated * totStripes); /* may also need pda(s) per stripe for parity */
|
||||
|
||||
if (raidAddress+numBlocks > raidPtr->totalSectors) {
|
||||
RF_ERRORMSG1("Unable to map access because offset (%d) was invalid\n",
|
||||
(int)raidAddress);
|
||||
return(NULL);
|
||||
}
|
||||
|
||||
if (rf_mapDebug)
|
||||
rf_PrintRaidAddressInfo(raidPtr, raidAddress, numBlocks);
|
||||
for (; raidAddress < endAddress; ) {
|
||||
/* make the next stripe structure */
|
||||
RF_ASSERT(asmList);
|
||||
t_asm = asmList;
|
||||
asmList = asmList->next;
|
||||
bzero((char *)t_asm, sizeof(RF_AccessStripeMap_t));
|
||||
if (!asm_p)
|
||||
asm_list = asm_p = t_asm;
|
||||
else {
|
||||
asm_p->next = t_asm;
|
||||
asm_p = asm_p->next;
|
||||
}
|
||||
numStripes++;
|
||||
|
||||
/* map SUs from current location to the end of the stripe */
|
||||
asm_p->stripeID = /*rf_RaidAddressToStripeID(layoutPtr, raidAddress)*/ stripeID++;
|
||||
stripeRealEndAddress = rf_RaidAddressOfNextStripeBoundary(layoutPtr, raidAddress);
|
||||
stripeEndAddress = RF_MIN(endAddress,stripeRealEndAddress );
|
||||
asm_p->raidAddress = raidAddress;
|
||||
asm_p->endRaidAddress = stripeEndAddress;
|
||||
|
||||
/* map each stripe unit in the stripe */
|
||||
pda_p = NULL;
|
||||
startAddrWithinStripe = raidAddress; /* Raid addr of start of portion of access that is within this stripe */
|
||||
for (; raidAddress < stripeEndAddress; ) {
|
||||
RF_ASSERT(pdaList);
|
||||
t_pda = pdaList;
|
||||
pdaList = pdaList->next;
|
||||
bzero((char *)t_pda, sizeof(RF_PhysDiskAddr_t));
|
||||
if (!pda_p)
|
||||
asm_p->physInfo = pda_p = t_pda;
|
||||
else {
|
||||
pda_p->next = t_pda;
|
||||
pda_p = pda_p->next;
|
||||
}
|
||||
|
||||
pda_p->type = RF_PDA_TYPE_DATA;
|
||||
(layoutPtr->map->MapSector)(raidPtr, raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), remap);
|
||||
|
||||
/* mark any failures we find. failedPDA is don't-care if there is more than one failure */
|
||||
pda_p->raidAddress = raidAddress; /* the RAID address corresponding to this physical disk address */
|
||||
nextStripeUnitAddress = rf_RaidAddressOfNextStripeUnitBoundary(layoutPtr, raidAddress);
|
||||
pda_p->numSector = RF_MIN(endAddress, nextStripeUnitAddress) - raidAddress;
|
||||
RF_ASSERT(pda_p->numSector != 0);
|
||||
rf_ASMCheckStatus(raidPtr,pda_p,asm_p,disks,0);
|
||||
pda_p->bufPtr = buffer + rf_RaidAddressToByte(raidPtr, (raidAddress - startAddress));
|
||||
asm_p->totalSectorsAccessed += pda_p->numSector;
|
||||
asm_p->numStripeUnitsAccessed++;
|
||||
asm_p->origRow = pda_p->row; /* redundant but harmless to do this in every loop iteration */
|
||||
|
||||
raidAddress = RF_MIN(endAddress, nextStripeUnitAddress);
|
||||
}
|
||||
|
||||
/* Map the parity. At this stage, the startSector and numSector fields
|
||||
* for the parity unit are always set to indicate the entire parity unit.
|
||||
* We may modify this after mapping the data portion.
|
||||
*/
|
||||
switch (faultsTolerated)
|
||||
{
|
||||
case 0:
|
||||
break;
|
||||
case 1: /* single fault tolerant */
|
||||
RF_ASSERT(pdaList);
|
||||
t_pda = pdaList;
|
||||
pdaList = pdaList->next;
|
||||
bzero((char *)t_pda, sizeof(RF_PhysDiskAddr_t));
|
||||
pda_p = asm_p->parityInfo = t_pda;
|
||||
pda_p->type = RF_PDA_TYPE_PARITY;
|
||||
(layoutPtr->map->MapParity)(raidPtr, rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, startAddrWithinStripe),
|
||||
&(pda_p->row), &(pda_p->col), &(pda_p->startSector), remap);
|
||||
pda_p->numSector = layoutPtr->sectorsPerStripeUnit;
|
||||
/* raidAddr may be needed to find unit to redirect to */
|
||||
pda_p->raidAddress = rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, startAddrWithinStripe);
|
||||
rf_ASMCheckStatus(raidPtr,pda_p,asm_p,disks,1);
|
||||
rf_ASMParityAdjust(asm_p->parityInfo,startAddrWithinStripe,endAddress,layoutPtr,asm_p);
|
||||
|
||||
break;
|
||||
case 2: /* two fault tolerant */
|
||||
RF_ASSERT(pdaList && pdaList->next);
|
||||
t_pda = pdaList;
|
||||
pdaList = pdaList->next;
|
||||
bzero((char *)t_pda, sizeof(RF_PhysDiskAddr_t));
|
||||
pda_p = asm_p->parityInfo = t_pda;
|
||||
pda_p->type = RF_PDA_TYPE_PARITY;
|
||||
t_pda = pdaList;
|
||||
pdaList = pdaList->next;
|
||||
bzero((char *)t_pda, sizeof(RF_PhysDiskAddr_t));
|
||||
pda_q = asm_p->qInfo = t_pda;
|
||||
pda_q->type = RF_PDA_TYPE_Q;
|
||||
(layoutPtr->map->MapParity)(raidPtr, rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, startAddrWithinStripe),
|
||||
&(pda_p->row), &(pda_p->col), &(pda_p->startSector), remap);
|
||||
(layoutPtr->map->MapQ)(raidPtr, rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, startAddrWithinStripe),
|
||||
&(pda_q->row), &(pda_q->col), &(pda_q->startSector), remap);
|
||||
pda_q->numSector = pda_p->numSector = layoutPtr->sectorsPerStripeUnit;
|
||||
/* raidAddr may be needed to find unit to redirect to */
|
||||
pda_p->raidAddress = rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, startAddrWithinStripe);
|
||||
pda_q->raidAddress = rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, startAddrWithinStripe);
|
||||
/* failure mode stuff */
|
||||
rf_ASMCheckStatus(raidPtr,pda_p,asm_p,disks,1);
|
||||
rf_ASMCheckStatus(raidPtr,pda_q,asm_p,disks,1);
|
||||
rf_ASMParityAdjust(asm_p->parityInfo,startAddrWithinStripe,endAddress,layoutPtr,asm_p);
|
||||
rf_ASMParityAdjust(asm_p->qInfo,startAddrWithinStripe,endAddress,layoutPtr,asm_p);
|
||||
break;
|
||||
}
|
||||
}
|
||||
RF_ASSERT(asmList == NULL && pdaList == NULL);
|
||||
/* make the header structure */
|
||||
asm_hdr = rf_AllocAccessStripeMapHeader();
|
||||
RF_ASSERT(numStripes == totStripes);
|
||||
asm_hdr->numStripes = numStripes;
|
||||
asm_hdr->stripeMap = asm_list;
|
||||
|
||||
if (rf_mapDebug)
|
||||
rf_PrintAccessStripeMap(asm_hdr);
|
||||
return(asm_hdr);
|
||||
}
|
||||
|
||||
/*****************************************************************************************
|
||||
* This routine walks through an ASM list and marks the PDAs that have failed.
|
||||
* It's called only when a disk failure causes an in-flight DAG to fail.
|
||||
* The parity may consist of two components, but we want to use only one failedPDA
|
||||
* pointer. Thus we set failedPDA to point to the first parity component, and rely
|
||||
* on the rest of the code to do the right thing with this.
|
||||
****************************************************************************************/
|
||||
|
||||
void rf_MarkFailuresInASMList(raidPtr, asm_h)
|
||||
RF_Raid_t *raidPtr;
|
||||
RF_AccessStripeMapHeader_t *asm_h;
|
||||
{
|
||||
RF_RaidDisk_t **disks = raidPtr->Disks;
|
||||
RF_AccessStripeMap_t *asmap;
|
||||
RF_PhysDiskAddr_t *pda;
|
||||
|
||||
for (asmap = asm_h->stripeMap; asmap; asmap = asmap->next) {
|
||||
asmap->numDataFailed = asmap->numParityFailed = asmap->numQFailed = 0;
|
||||
asmap->numFailedPDAs = 0;
|
||||
bzero((char *)asmap->failedPDAs,
|
||||
RF_MAX_FAILED_PDA*sizeof(RF_PhysDiskAddr_t *));
|
||||
for (pda = asmap->physInfo; pda; pda=pda->next) {
|
||||
if (RF_DEAD_DISK(disks[pda->row][pda->col].status)) {
|
||||
printf("DEAD DISK BOGUSLY DETECTED!!\n");
|
||||
asmap->numDataFailed++;
|
||||
asmap->failedPDAs[asmap->numFailedPDAs] = pda;
|
||||
asmap->numFailedPDAs++;
|
||||
}
|
||||
}
|
||||
pda = asmap->parityInfo;
|
||||
if (pda && RF_DEAD_DISK(disks[pda->row][pda->col].status)) {
|
||||
asmap->numParityFailed++;
|
||||
asmap->failedPDAs[asmap->numFailedPDAs] = pda;
|
||||
asmap->numFailedPDAs++;
|
||||
}
|
||||
pda = asmap->qInfo;
|
||||
if (pda && RF_DEAD_DISK(disks[pda->row][pda->col].status)) {
|
||||
asmap->numQFailed++;
|
||||
asmap->failedPDAs[asmap->numFailedPDAs] = pda;
|
||||
asmap->numFailedPDAs++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*****************************************************************************************
|
||||
*
|
||||
* DuplicateASM -- duplicates an ASM and returns the new one
|
||||
*
|
||||
****************************************************************************************/
|
||||
RF_AccessStripeMap_t *rf_DuplicateASM(asmap)
|
||||
RF_AccessStripeMap_t *asmap;
|
||||
{
|
||||
RF_AccessStripeMap_t *new_asm;
|
||||
RF_PhysDiskAddr_t *pda, *new_pda, *t_pda;
|
||||
|
||||
new_pda = NULL;
|
||||
new_asm = rf_AllocAccessStripeMapComponent();
|
||||
bcopy((char *)asmap, (char *)new_asm, sizeof(RF_AccessStripeMap_t));
|
||||
new_asm->numFailedPDAs = 0; /* ??? */
|
||||
new_asm->failedPDAs[0] = NULL;
|
||||
new_asm->physInfo = NULL;
|
||||
new_asm->parityInfo = NULL;
|
||||
new_asm->next = NULL;
|
||||
|
||||
for (pda = asmap->physInfo; pda; pda=pda->next) { /* copy the physInfo list */
|
||||
t_pda = rf_AllocPhysDiskAddr();
|
||||
bcopy((char *)pda, (char *)t_pda, sizeof(RF_PhysDiskAddr_t));
|
||||
t_pda->next = NULL;
|
||||
if (!new_asm->physInfo) {new_asm->physInfo = t_pda; new_pda = t_pda;}
|
||||
else {new_pda->next = t_pda; new_pda = new_pda->next;}
|
||||
if (pda == asmap->failedPDAs[0])
|
||||
new_asm->failedPDAs[0] = t_pda;
|
||||
}
|
||||
for (pda = asmap->parityInfo; pda; pda=pda->next) { /* copy the parityInfo list */
|
||||
t_pda = rf_AllocPhysDiskAddr();
|
||||
bcopy((char *)pda, (char *)t_pda, sizeof(RF_PhysDiskAddr_t));
|
||||
t_pda->next = NULL;
|
||||
if (!new_asm->parityInfo) {new_asm->parityInfo = t_pda; new_pda = t_pda;}
|
||||
else {new_pda->next = t_pda; new_pda = new_pda->next;}
|
||||
if (pda == asmap->failedPDAs[0])
|
||||
new_asm->failedPDAs[0] = t_pda;
|
||||
}
|
||||
return(new_asm);
|
||||
}
|
||||
|
||||
/*****************************************************************************************
|
||||
*
|
||||
* DuplicatePDA -- duplicates a PDA and returns the new one
|
||||
*
|
||||
****************************************************************************************/
|
||||
RF_PhysDiskAddr_t *rf_DuplicatePDA(pda)
|
||||
RF_PhysDiskAddr_t *pda;
|
||||
{
|
||||
RF_PhysDiskAddr_t *new;
|
||||
|
||||
new = rf_AllocPhysDiskAddr();
|
||||
bcopy((char *)pda, (char *)new, sizeof(RF_PhysDiskAddr_t));
|
||||
return(new);
|
||||
}
|
||||
|
||||
/*****************************************************************************************
|
||||
*
|
||||
* routines to allocate and free list elements. All allocation routines zero the
|
||||
* structure before returning it.
|
||||
*
|
||||
* FreePhysDiskAddr is static. It should never be called directly, because
|
||||
* FreeAccessStripeMap takes care of freeing the PhysDiskAddr list.
|
||||
*
|
||||
****************************************************************************************/
|
||||
|
||||
static RF_FreeList_t *rf_asmhdr_freelist;
|
||||
#define RF_MAX_FREE_ASMHDR 128
|
||||
#define RF_ASMHDR_INC 16
|
||||
#define RF_ASMHDR_INITIAL 32
|
||||
|
||||
static RF_FreeList_t *rf_asm_freelist;
|
||||
#define RF_MAX_FREE_ASM 192
|
||||
#define RF_ASM_INC 24
|
||||
#define RF_ASM_INITIAL 64
|
||||
|
||||
static RF_FreeList_t *rf_pda_freelist;
|
||||
#define RF_MAX_FREE_PDA 192
|
||||
#define RF_PDA_INC 24
|
||||
#define RF_PDA_INITIAL 64
|
||||
|
||||
/* called at shutdown time. So far, all that is necessary is to release all the free lists */
|
||||
static void rf_ShutdownMapModule(void *);
|
||||
static void rf_ShutdownMapModule(ignored)
|
||||
void *ignored;
|
||||
{
|
||||
RF_FREELIST_DESTROY(rf_asmhdr_freelist,next,(RF_AccessStripeMapHeader_t *));
|
||||
RF_FREELIST_DESTROY(rf_pda_freelist,next,(RF_PhysDiskAddr_t *));
|
||||
RF_FREELIST_DESTROY(rf_asm_freelist,next,(RF_AccessStripeMap_t *));
|
||||
}
|
||||
|
||||
int rf_ConfigureMapModule(listp)
|
||||
RF_ShutdownList_t **listp;
|
||||
{
|
||||
int rc;
|
||||
|
||||
RF_FREELIST_CREATE(rf_asmhdr_freelist, RF_MAX_FREE_ASMHDR,
|
||||
RF_ASMHDR_INC, sizeof(RF_AccessStripeMapHeader_t));
|
||||
if (rf_asmhdr_freelist == NULL) {
|
||||
return(ENOMEM);
|
||||
}
|
||||
RF_FREELIST_CREATE(rf_asm_freelist, RF_MAX_FREE_ASM,
|
||||
RF_ASM_INC, sizeof(RF_AccessStripeMap_t));
|
||||
if (rf_asm_freelist == NULL) {
|
||||
RF_FREELIST_DESTROY(rf_asmhdr_freelist,next,(RF_AccessStripeMapHeader_t *));
|
||||
return(ENOMEM);
|
||||
}
|
||||
RF_FREELIST_CREATE(rf_pda_freelist, RF_MAX_FREE_PDA,
|
||||
RF_PDA_INC, sizeof(RF_PhysDiskAddr_t));
|
||||
if (rf_pda_freelist == NULL) {
|
||||
RF_FREELIST_DESTROY(rf_asmhdr_freelist,next,(RF_AccessStripeMapHeader_t *));
|
||||
RF_FREELIST_DESTROY(rf_pda_freelist,next,(RF_PhysDiskAddr_t *));
|
||||
return(ENOMEM);
|
||||
}
|
||||
|
||||
rc = rf_ShutdownCreate(listp, rf_ShutdownMapModule, NULL);
|
||||
if (rc) {
|
||||
RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", __FILE__,
|
||||
__LINE__, rc);
|
||||
rf_ShutdownMapModule(NULL);
|
||||
return(rc);
|
||||
}
|
||||
|
||||
RF_FREELIST_PRIME(rf_asmhdr_freelist, RF_ASMHDR_INITIAL,next,
|
||||
(RF_AccessStripeMapHeader_t *));
|
||||
RF_FREELIST_PRIME(rf_asm_freelist, RF_ASM_INITIAL,next,
|
||||
(RF_AccessStripeMap_t *));
|
||||
RF_FREELIST_PRIME(rf_pda_freelist, RF_PDA_INITIAL,next,
|
||||
(RF_PhysDiskAddr_t *));
|
||||
|
||||
return(0);
|
||||
}
|
||||
|
||||
RF_AccessStripeMapHeader_t *rf_AllocAccessStripeMapHeader()
|
||||
{
|
||||
RF_AccessStripeMapHeader_t *p;
|
||||
|
||||
RF_FREELIST_GET(rf_asmhdr_freelist,p,next,(RF_AccessStripeMapHeader_t *));
|
||||
bzero((char *)p, sizeof(RF_AccessStripeMapHeader_t));
|
||||
|
||||
return(p);
|
||||
}
|
||||
|
||||
|
||||
void rf_FreeAccessStripeMapHeader(p)
|
||||
RF_AccessStripeMapHeader_t *p;
|
||||
{
|
||||
RF_FREELIST_FREE(rf_asmhdr_freelist,p,next);
|
||||
}
|
||||
|
||||
RF_PhysDiskAddr_t *rf_AllocPhysDiskAddr()
|
||||
{
|
||||
RF_PhysDiskAddr_t *p;
|
||||
|
||||
RF_FREELIST_GET(rf_pda_freelist,p,next,(RF_PhysDiskAddr_t *));
|
||||
bzero((char *)p, sizeof(RF_PhysDiskAddr_t));
|
||||
|
||||
return(p);
|
||||
}
|
||||
|
||||
/* allocates a list of PDAs, locking the free list only once
|
||||
* when we have to call calloc, we do it one component at a time to simplify
|
||||
* the process of freeing the list at program shutdown. This should not be
|
||||
* much of a performance hit, because it should be very infrequently executed.
|
||||
*/
|
||||
RF_PhysDiskAddr_t *rf_AllocPDAList(count)
|
||||
int count;
|
||||
{
|
||||
RF_PhysDiskAddr_t *p = NULL;
|
||||
|
||||
RF_FREELIST_GET_N(rf_pda_freelist,p,next,(RF_PhysDiskAddr_t *),count);
|
||||
return(p);
|
||||
}
|
||||
|
||||
void rf_FreePhysDiskAddr(p)
|
||||
RF_PhysDiskAddr_t *p;
|
||||
{
|
||||
RF_FREELIST_FREE(rf_pda_freelist,p,next);
|
||||
}
|
||||
|
||||
static void rf_FreePDAList(l_start, l_end, count)
|
||||
RF_PhysDiskAddr_t *l_start, *l_end; /* pointers to start and end of list */
|
||||
int count; /* number of elements in list */
|
||||
{
|
||||
RF_FREELIST_FREE_N(rf_pda_freelist,l_start,next,(RF_PhysDiskAddr_t *),count);
|
||||
}
|
||||
|
||||
RF_AccessStripeMap_t *rf_AllocAccessStripeMapComponent()
|
||||
{
|
||||
RF_AccessStripeMap_t *p;
|
||||
|
||||
RF_FREELIST_GET(rf_asm_freelist,p,next,(RF_AccessStripeMap_t *));
|
||||
bzero((char *)p, sizeof(RF_AccessStripeMap_t));
|
||||
|
||||
return(p);
|
||||
}
|
||||
|
||||
/* this is essentially identical to AllocPDAList. I should combine the two.
|
||||
* when we have to call calloc, we do it one component at a time to simplify
|
||||
* the process of freeing the list at program shutdown. This should not be
|
||||
* much of a performance hit, because it should be very infrequently executed.
|
||||
*/
|
||||
RF_AccessStripeMap_t *rf_AllocASMList(count)
|
||||
int count;
|
||||
{
|
||||
RF_AccessStripeMap_t *p = NULL;
|
||||
|
||||
RF_FREELIST_GET_N(rf_asm_freelist,p,next,(RF_AccessStripeMap_t *),count);
|
||||
return(p);
|
||||
}
|
||||
|
||||
void rf_FreeAccessStripeMapComponent(p)
|
||||
RF_AccessStripeMap_t *p;
|
||||
{
|
||||
RF_FREELIST_FREE(rf_asm_freelist,p,next);
|
||||
}
|
||||
|
||||
static void rf_FreeASMList(l_start, l_end, count)
|
||||
RF_AccessStripeMap_t *l_start, *l_end;
|
||||
int count;
|
||||
{
|
||||
RF_FREELIST_FREE_N(rf_asm_freelist,l_start,next,(RF_AccessStripeMap_t *),count);
|
||||
}
|
||||
|
||||
void rf_FreeAccessStripeMap(hdr)
|
||||
RF_AccessStripeMapHeader_t *hdr;
|
||||
{
|
||||
RF_AccessStripeMap_t *p, *pt = NULL;
|
||||
RF_PhysDiskAddr_t *pdp, *trailer, *pdaList = NULL, *pdaEnd = NULL;
|
||||
int count = 0, t, asm_count = 0;
|
||||
|
||||
for (p = hdr->stripeMap; p; p=p->next) {
|
||||
|
||||
/* link the 3 pda lists into the accumulating pda list */
|
||||
|
||||
if (!pdaList) pdaList = p->qInfo; else pdaEnd->next = p->qInfo;
|
||||
for (trailer=NULL,pdp=p->qInfo; pdp; ) {trailer = pdp; pdp=pdp->next; count++;}
|
||||
if (trailer) pdaEnd = trailer;
|
||||
|
||||
if (!pdaList) pdaList = p->parityInfo; else pdaEnd->next = p->parityInfo;
|
||||
for (trailer=NULL,pdp=p->parityInfo; pdp; ) {trailer = pdp; pdp=pdp->next; count++;}
|
||||
if (trailer) pdaEnd = trailer;
|
||||
|
||||
if (!pdaList) pdaList = p->physInfo; else pdaEnd->next = p->physInfo;
|
||||
for (trailer=NULL,pdp=p->physInfo; pdp; ) {trailer = pdp; pdp=pdp->next; count++;}
|
||||
if (trailer) pdaEnd = trailer;
|
||||
|
||||
pt = p;
|
||||
asm_count++;
|
||||
}
|
||||
|
||||
/* debug only */
|
||||
for (t=0,pdp=pdaList; pdp; pdp=pdp->next)
|
||||
t++;
|
||||
RF_ASSERT(t == count);
|
||||
|
||||
if (pdaList)
|
||||
rf_FreePDAList(pdaList, pdaEnd, count);
|
||||
rf_FreeASMList(hdr->stripeMap, pt, asm_count);
|
||||
rf_FreeAccessStripeMapHeader(hdr);
|
||||
}
|
||||
|
||||
/* We can't use the large write optimization if there are any failures in the stripe.
|
||||
* In the declustered layout, there is no way to immediately determine what disks
|
||||
* constitute a stripe, so we actually have to hunt through the stripe looking for failures.
|
||||
* The reason we map the parity instead of just using asm->parityInfo->col is because
|
||||
* the latter may have been already redirected to a spare drive, which would
|
||||
* mess up the computation of the stripe offset.
|
||||
*
|
||||
* ASSUMES AT MOST ONE FAILURE IN THE STRIPE.
|
||||
*/
|
||||
int rf_CheckStripeForFailures(raidPtr, asmap)
|
||||
RF_Raid_t *raidPtr;
|
||||
RF_AccessStripeMap_t *asmap;
|
||||
{
|
||||
RF_RowCol_t trow, tcol, prow, pcol, *diskids, row, i;
|
||||
RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
|
||||
RF_StripeCount_t stripeOffset;
|
||||
int numFailures;
|
||||
RF_RaidAddr_t sosAddr;
|
||||
RF_SectorNum_t diskOffset, poffset;
|
||||
RF_RowCol_t testrow;
|
||||
|
||||
/* quick out in the fault-free case. */
|
||||
RF_LOCK_MUTEX(raidPtr->mutex);
|
||||
numFailures = raidPtr->numFailures;
|
||||
RF_UNLOCK_MUTEX(raidPtr->mutex);
|
||||
if (numFailures == 0) return(0);
|
||||
|
||||
sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress);
|
||||
row = asmap->physInfo->row;
|
||||
(layoutPtr->map->IdentifyStripe)(raidPtr, asmap->raidAddress, &diskids, &testrow);
|
||||
(layoutPtr->map->MapParity)(raidPtr, asmap->raidAddress, &prow, &pcol, &poffset, 0); /* get pcol */
|
||||
|
||||
/* this need not be true if we've redirected the access to a spare in another row
|
||||
RF_ASSERT(row == testrow);
|
||||
*/
|
||||
stripeOffset = 0;
|
||||
for (i=0; i<layoutPtr->numDataCol+layoutPtr->numParityCol; i++) {
|
||||
if (diskids[i] != pcol) {
|
||||
if (RF_DEAD_DISK(raidPtr->Disks[testrow][diskids[i]].status)) {
|
||||
if (raidPtr->status[testrow] != rf_rs_reconstructing)
|
||||
return(1);
|
||||
RF_ASSERT(raidPtr->reconControl[testrow]->fcol == diskids[i]);
|
||||
layoutPtr->map->MapSector(raidPtr,
|
||||
sosAddr + stripeOffset * layoutPtr->sectorsPerStripeUnit,
|
||||
&trow, &tcol, &diskOffset, 0);
|
||||
RF_ASSERT( (trow == testrow) && (tcol == diskids[i]) );
|
||||
if (!rf_CheckRUReconstructed(raidPtr->reconControl[testrow]->reconMap, diskOffset))
|
||||
return(1);
|
||||
asmap->flags |= RF_ASM_REDIR_LARGE_WRITE;
|
||||
return(0);
|
||||
}
|
||||
stripeOffset++;
|
||||
}
|
||||
}
|
||||
return(0);
|
||||
}
|
||||
|
||||
/*
|
||||
return the number of failed data units in the stripe.
|
||||
*/
|
||||
|
||||
int rf_NumFailedDataUnitsInStripe(raidPtr, asmap)
|
||||
RF_Raid_t *raidPtr;
|
||||
RF_AccessStripeMap_t *asmap;
|
||||
{
|
||||
RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
|
||||
RF_RowCol_t trow, tcol, row, i;
|
||||
RF_SectorNum_t diskOffset;
|
||||
RF_RaidAddr_t sosAddr;
|
||||
int numFailures;
|
||||
|
||||
/* quick out in the fault-free case. */
|
||||
RF_LOCK_MUTEX(raidPtr->mutex);
|
||||
numFailures = raidPtr->numFailures;
|
||||
RF_UNLOCK_MUTEX(raidPtr->mutex);
|
||||
if (numFailures == 0) return(0);
|
||||
numFailures = 0;
|
||||
|
||||
sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress);
|
||||
row = asmap->physInfo->row;
|
||||
for (i=0; i<layoutPtr->numDataCol; i++)
|
||||
{
|
||||
(layoutPtr->map->MapSector)(raidPtr, sosAddr + i * layoutPtr->sectorsPerStripeUnit,
|
||||
&trow, &tcol, &diskOffset, 0);
|
||||
if (RF_DEAD_DISK(raidPtr->Disks[trow][tcol].status))
|
||||
numFailures++;
|
||||
}
|
||||
|
||||
return numFailures;
|
||||
}
|
||||
|
||||
|
||||
/*****************************************************************************************
|
||||
*
|
||||
* debug routines
|
||||
*
|
||||
****************************************************************************************/
|
||||
|
||||
void rf_PrintAccessStripeMap(asm_h)
|
||||
RF_AccessStripeMapHeader_t *asm_h;
|
||||
{
|
||||
rf_PrintFullAccessStripeMap(asm_h, 0);
|
||||
}
|
||||
|
||||
void rf_PrintFullAccessStripeMap(asm_h, prbuf)
|
||||
RF_AccessStripeMapHeader_t *asm_h;
|
||||
int prbuf; /* flag to print buffer pointers */
|
||||
{
|
||||
int i;
|
||||
RF_AccessStripeMap_t *asmap = asm_h->stripeMap;
|
||||
RF_PhysDiskAddr_t *p;
|
||||
printf("%d stripes total\n", (int)asm_h->numStripes);
|
||||
for (; asmap; asmap = asmap->next) {
|
||||
/* printf("Num failures: %d\n",asmap->numDataFailed); */
|
||||
/* printf("Num sectors: %d\n",(int)asmap->totalSectorsAccessed); */
|
||||
printf("Stripe %d (%d sectors), failures: %d data, %d parity: ",
|
||||
(int) asmap->stripeID,
|
||||
(int) asmap->totalSectorsAccessed,
|
||||
(int) asmap->numDataFailed,
|
||||
(int) asmap->numParityFailed);
|
||||
if (asmap->parityInfo) {
|
||||
printf("Parity [r%d c%d s%d-%d", asmap->parityInfo->row, asmap->parityInfo->col,
|
||||
(int)asmap->parityInfo->startSector,
|
||||
(int)(asmap->parityInfo->startSector +
|
||||
asmap->parityInfo->numSector - 1));
|
||||
if (prbuf) printf(" b0x%lx",(unsigned long) asmap->parityInfo->bufPtr);
|
||||
if (asmap->parityInfo->next) {
|
||||
printf(", r%d c%d s%d-%d", asmap->parityInfo->next->row,
|
||||
asmap->parityInfo->next->col,
|
||||
(int) asmap->parityInfo->next->startSector,
|
||||
(int)(asmap->parityInfo->next->startSector +
|
||||
asmap->parityInfo->next->numSector - 1));
|
||||
if (prbuf) printf(" b0x%lx",(unsigned long) asmap->parityInfo->next->bufPtr);
|
||||
RF_ASSERT(asmap->parityInfo->next->next == NULL);
|
||||
}
|
||||
printf("]\n\t");
|
||||
}
|
||||
for (i=0,p=asmap->physInfo; p; p=p->next,i++) {
|
||||
printf("SU r%d c%d s%d-%d ", p->row, p->col, (int)p->startSector,
|
||||
(int)(p->startSector + p->numSector - 1));
|
||||
if (prbuf) printf("b0x%lx ", (unsigned long) p->bufPtr);
|
||||
if (i && !(i&1)) printf("\n\t");
|
||||
}
|
||||
printf("\n");
|
||||
p = asm_h->stripeMap->failedPDAs[0];
|
||||
if (asm_h->stripeMap->numDataFailed + asm_h->stripeMap->numParityFailed > 1) printf("[multiple failures]\n");
|
||||
else if (asm_h->stripeMap->numDataFailed + asm_h->stripeMap->numParityFailed > 0)
|
||||
printf("\t[Failed PDA: r%d c%d s%d-%d]\n",p->row, p->col,
|
||||
(int)p->startSector, (int)(p->startSector + p->numSector-1));
|
||||
}
|
||||
}
|
||||
|
||||
void rf_PrintRaidAddressInfo(raidPtr, raidAddr, numBlocks)
|
||||
RF_Raid_t *raidPtr;
|
||||
RF_RaidAddr_t raidAddr;
|
||||
RF_SectorCount_t numBlocks;
|
||||
{
|
||||
RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
|
||||
RF_RaidAddr_t ra, sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, raidAddr);
|
||||
|
||||
printf("Raid addrs of SU boundaries from start of stripe to end of access:\n\t");
|
||||
for (ra = sosAddr; ra <= raidAddr + numBlocks; ra += layoutPtr->sectorsPerStripeUnit) {
|
||||
printf("%d (0x%x), ",(int)ra, (int)ra);
|
||||
}
|
||||
printf("\n");
|
||||
printf("Offset into stripe unit: %d (0x%x)\n",
|
||||
(int)(raidAddr % layoutPtr->sectorsPerStripeUnit),
|
||||
(int)(raidAddr % layoutPtr->sectorsPerStripeUnit));
|
||||
}
|
||||
|
||||
/*
|
||||
given a parity descriptor and the starting address within a stripe,
|
||||
range restrict the parity descriptor to touch only the correct stuff.
|
||||
*/
|
||||
void rf_ASMParityAdjust(
|
||||
RF_PhysDiskAddr_t *toAdjust,
|
||||
RF_StripeNum_t startAddrWithinStripe,
|
||||
RF_SectorNum_t endAddress,
|
||||
RF_RaidLayout_t *layoutPtr,
|
||||
RF_AccessStripeMap_t *asm_p)
|
||||
{
|
||||
RF_PhysDiskAddr_t *new_pda;
|
||||
|
||||
/* when we're accessing only a portion of one stripe unit, we want the parity descriptor
|
||||
* to identify only the chunk of parity associated with the data. When the access spans
|
||||
* exactly one stripe unit boundary and is less than a stripe unit in size, it uses two disjoint
|
||||
* regions of the parity unit. When an access spans more than one stripe unit boundary, it
|
||||
* uses all of the parity unit.
|
||||
*
|
||||
* To better handle the case where stripe units are small, we may eventually want to change
|
||||
* the 2nd case so that if the SU size is below some threshold, we just read/write the whole
|
||||
* thing instead of breaking it up into two accesses.
|
||||
*/
|
||||
if (asm_p->numStripeUnitsAccessed == 1)
|
||||
{
|
||||
int x = (startAddrWithinStripe % layoutPtr->sectorsPerStripeUnit);
|
||||
toAdjust->startSector += x;
|
||||
toAdjust->raidAddress += x;
|
||||
toAdjust->numSector = asm_p->physInfo->numSector;
|
||||
RF_ASSERT(toAdjust->numSector != 0);
|
||||
}
|
||||
else
|
||||
if (asm_p->numStripeUnitsAccessed == 2 && asm_p->totalSectorsAccessed < layoutPtr->sectorsPerStripeUnit)
|
||||
{
|
||||
int x = (startAddrWithinStripe % layoutPtr->sectorsPerStripeUnit);
|
||||
|
||||
/* create a second pda and copy the parity map info into it */
|
||||
RF_ASSERT(toAdjust->next == NULL);
|
||||
new_pda = toAdjust->next = rf_AllocPhysDiskAddr();
|
||||
*new_pda = *toAdjust; /* structure assignment */
|
||||
new_pda->next = NULL;
|
||||
|
||||
/* adjust the start sector & number of blocks for the first parity pda */
|
||||
toAdjust->startSector += x;
|
||||
toAdjust->raidAddress += x;
|
||||
toAdjust->numSector = rf_RaidAddressOfNextStripeUnitBoundary(layoutPtr, startAddrWithinStripe) - startAddrWithinStripe;
|
||||
RF_ASSERT(toAdjust->numSector != 0);
|
||||
|
||||
/* adjust the second pda */
|
||||
new_pda->numSector = endAddress - rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, endAddress);
|
||||
/*new_pda->raidAddress = rf_RaidAddressOfNextStripeUnitBoundary(layoutPtr, toAdjust->raidAddress);*/
|
||||
RF_ASSERT(new_pda->numSector != 0);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
Check if a disk has been spared or failed. If spared,
|
||||
redirect the I/O.
|
||||
If it has been failed, record it in the asm pointer.
|
||||
Fourth arg is whether data or parity.
|
||||
*/
|
||||
void rf_ASMCheckStatus(
|
||||
RF_Raid_t *raidPtr,
|
||||
RF_PhysDiskAddr_t *pda_p,
|
||||
RF_AccessStripeMap_t *asm_p,
|
||||
RF_RaidDisk_t **disks,
|
||||
int parity)
|
||||
{
|
||||
RF_DiskStatus_t dstatus;
|
||||
RF_RowCol_t frow, fcol;
|
||||
|
||||
dstatus = disks[pda_p->row][pda_p->col].status;
|
||||
|
||||
if (dstatus == rf_ds_spared) {
|
||||
/* if the disk has been spared, redirect access to the spare */
|
||||
frow = pda_p->row; fcol = pda_p->col;
|
||||
pda_p->row = disks[frow][fcol].spareRow;
|
||||
pda_p->col = disks[frow][fcol].spareCol;
|
||||
}
|
||||
else if (dstatus == rf_ds_dist_spared) {
|
||||
/* ditto if disk has been spared to dist spare space */
|
||||
RF_RowCol_t or = pda_p->row, oc=pda_p->col;
|
||||
RF_SectorNum_t oo = pda_p->startSector;
|
||||
|
||||
if (pda_p -> type == RF_PDA_TYPE_DATA)
|
||||
raidPtr->Layout.map->MapSector(raidPtr, pda_p->raidAddress, &pda_p->row, &pda_p->col, &pda_p->startSector, RF_REMAP);
|
||||
else
|
||||
raidPtr->Layout.map->MapParity(raidPtr, pda_p->raidAddress, &pda_p->row, &pda_p->col, &pda_p->startSector, RF_REMAP);
|
||||
|
||||
if (rf_mapDebug) {
|
||||
printf("Redirected r %d c %d o %d -> r%d c %d o %d\n",or,oc,(int)oo,
|
||||
pda_p->row,pda_p->col,(int)pda_p->startSector);
|
||||
}
|
||||
} else if (RF_DEAD_DISK(dstatus)) {
|
||||
/* if the disk is inaccessible, mark the failure */
|
||||
if (parity)
|
||||
asm_p->numParityFailed++;
|
||||
else {
|
||||
asm_p->numDataFailed++;
|
||||
#if 0
|
||||
/* XXX Do we really want this spewing out on the console? GO */
|
||||
printf("DATA_FAILED!\n");
|
||||
#endif
|
||||
}
|
||||
asm_p->failedPDAs[asm_p->numFailedPDAs] = pda_p;
|
||||
asm_p->numFailedPDAs++;
|
||||
#if 0
|
||||
switch (asm_p->numParityFailed + asm_p->numDataFailed)
|
||||
{
|
||||
case 1:
|
||||
asm_p->failedPDAs[0] = pda_p;
|
||||
break;
|
||||
case 2:
|
||||
asm_p->failedPDAs[1] = pda_p;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
/* the redirected access should never span a stripe unit boundary */
|
||||
RF_ASSERT(rf_RaidAddressToStripeUnitID(&raidPtr->Layout,pda_p->raidAddress) ==
|
||||
rf_RaidAddressToStripeUnitID(&raidPtr->Layout,pda_p->raidAddress + pda_p->numSector -1));
|
||||
RF_ASSERT(pda_p->col != -1);
|
||||
}
|
|
@ -0,0 +1,133 @@
|
|||
/* $NetBSD: rf_map.h,v 1.1 1998/11/13 04:20:31 oster Exp $ */
|
||||
/*
|
||||
* Copyright (c) 1995 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Author: Mark Holland
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and
|
||||
* its documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
||||
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*/
|
||||
|
||||
/* rf_map.h */
|
||||
|
||||
/* :
|
||||
* Log: rf_map.h,v
|
||||
* Revision 1.9 1996/07/22 19:52:16 jimz
|
||||
* switched node params to RF_DagParam_t, a union of
|
||||
* a 64-bit int and a void *, for better portability
|
||||
* attempted hpux port, but failed partway through for
|
||||
* lack of a single C compiler capable of compiling all
|
||||
* source files
|
||||
*
|
||||
* Revision 1.8 1996/06/10 11:55:47 jimz
|
||||
* Straightened out some per-array/not-per-array distinctions, fixed
|
||||
* a couple bugs related to confusion. Added shutdown lists. Removed
|
||||
* layout shutdown function (now subsumed by shutdown lists).
|
||||
*
|
||||
* Revision 1.7 1996/06/07 21:33:04 jimz
|
||||
* begin using consistent types for sector numbers,
|
||||
* stripe numbers, row+col numbers, recon unit numbers
|
||||
*
|
||||
* Revision 1.6 1996/05/31 22:26:54 jimz
|
||||
* fix a lot of mapping problems, memory allocation problems
|
||||
* found some weird lock issues, fixed 'em
|
||||
* more code cleanup
|
||||
*
|
||||
* Revision 1.5 1996/05/30 11:29:41 jimz
|
||||
* Numerous bug fixes. Stripe lock release code disagreed with the taking code
|
||||
* about when stripes should be locked (I made it consistent: no parity, no lock)
|
||||
* There was a lot of extra serialization of I/Os which I've removed- a lot of
|
||||
* it was to calculate values for the cache code, which is no longer with us.
|
||||
* More types, function, macro cleanup. Added code to properly quiesce the array
|
||||
* on shutdown. Made a lot of stuff array-specific which was (bogusly) general
|
||||
* before. Fixed memory allocation, freeing bugs.
|
||||
*
|
||||
* Revision 1.4 1996/05/23 21:46:35 jimz
|
||||
* checkpoint in code cleanup (release prep)
|
||||
* lots of types, function names have been fixed
|
||||
*
|
||||
* Revision 1.3 1996/05/18 19:51:34 jimz
|
||||
* major code cleanup- fix syntax, make some types consistent,
|
||||
* add prototypes, clean out dead code, et cetera
|
||||
*
|
||||
* Revision 1.2 1995/12/01 19:25:14 root
|
||||
* added copyright info
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _RF__RF_MAP_H_
|
||||
#define _RF__RF_MAP_H_
|
||||
|
||||
#include "rf_types.h"
|
||||
#include "rf_alloclist.h"
|
||||
#include "rf_raid.h"
|
||||
|
||||
/* mapping structure allocation and free routines */
|
||||
RF_AccessStripeMapHeader_t *rf_MapAccess(RF_Raid_t *raidPtr,
|
||||
RF_RaidAddr_t raidAddress, RF_SectorCount_t numBlocks,
|
||||
caddr_t buffer, int remap);
|
||||
|
||||
void rf_MarkFailuresInASMList(RF_Raid_t *raidPtr,
|
||||
RF_AccessStripeMapHeader_t *asm_h);
|
||||
|
||||
RF_AccessStripeMap_t *rf_DuplicateASM(RF_AccessStripeMap_t *asmap);
|
||||
|
||||
RF_PhysDiskAddr_t *rf_DuplicatePDA(RF_PhysDiskAddr_t *pda);
|
||||
|
||||
int rf_ConfigureMapModule(RF_ShutdownList_t **listp);
|
||||
|
||||
RF_AccessStripeMapHeader_t *rf_AllocAccessStripeMapHeader(void);
|
||||
|
||||
void rf_FreeAccessStripeMapHeader(RF_AccessStripeMapHeader_t *p);
|
||||
|
||||
RF_PhysDiskAddr_t *rf_AllocPhysDiskAddr(void);
|
||||
|
||||
RF_PhysDiskAddr_t *rf_AllocPDAList(int count);
|
||||
|
||||
void rf_FreePhysDiskAddr(RF_PhysDiskAddr_t *p);
|
||||
|
||||
RF_AccessStripeMap_t *rf_AllocAccessStripeMapComponent(void);
|
||||
|
||||
RF_AccessStripeMap_t *rf_AllocASMList(int count);
|
||||
|
||||
void rf_FreeAccessStripeMapComponent(RF_AccessStripeMap_t *p);
|
||||
|
||||
void rf_FreeAccessStripeMap(RF_AccessStripeMapHeader_t *hdr);
|
||||
|
||||
int rf_CheckStripeForFailures(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap);
|
||||
|
||||
int rf_NumFailedDataUnitsInStripe(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap);
|
||||
|
||||
void rf_PrintAccessStripeMap(RF_AccessStripeMapHeader_t *asm_h);
|
||||
|
||||
void rf_PrintFullAccessStripeMap(RF_AccessStripeMapHeader_t *asm_h, int prbuf);
|
||||
|
||||
void rf_PrintRaidAddressInfo(RF_Raid_t *raidPtr, RF_RaidAddr_t raidAddr,
|
||||
RF_SectorCount_t numBlocks);
|
||||
|
||||
void rf_ASMParityAdjust(RF_PhysDiskAddr_t *toAdjust,
|
||||
RF_StripeNum_t startAddrWithinStripe, RF_SectorNum_t endAddress,
|
||||
RF_RaidLayout_t *layoutPtr, RF_AccessStripeMap_t *asm_p);
|
||||
|
||||
void rf_ASMCheckStatus(RF_Raid_t *raidPtr, RF_PhysDiskAddr_t *pda_p,
|
||||
RF_AccessStripeMap_t *asm_p, RF_RaidDisk_t **disks, int parity);
|
||||
|
||||
#endif /* !_RF__RF_MAP_H_ */
|
|
@ -0,0 +1,197 @@
|
|||
/* $NetBSD: rf_mcpair.c,v 1.1 1998/11/13 04:20:31 oster Exp $ */
|
||||
/*
|
||||
* Copyright (c) 1995 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Author: Jim Zelenka
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and
|
||||
* its documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
||||
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*/
|
||||
|
||||
/* rf_mcpair.c
|
||||
* an mcpair is a structure containing a mutex and a condition variable.
|
||||
* it's used to block the current thread until some event occurs.
|
||||
*/
|
||||
|
||||
/* :
|
||||
* Log: rf_mcpair.c,v
|
||||
* Revision 1.16 1996/06/19 22:23:01 jimz
|
||||
* parity verification is now a layout-configurable thing
|
||||
* not all layouts currently support it (correctly, anyway)
|
||||
*
|
||||
* Revision 1.15 1996/06/17 03:18:04 jimz
|
||||
* include shutdown.h for macroized ShutdownCreate
|
||||
*
|
||||
* Revision 1.14 1996/06/10 11:55:47 jimz
|
||||
* Straightened out some per-array/not-per-array distinctions, fixed
|
||||
* a couple bugs related to confusion. Added shutdown lists. Removed
|
||||
* layout shutdown function (now subsumed by shutdown lists).
|
||||
*
|
||||
* Revision 1.13 1996/06/05 18:06:02 jimz
|
||||
* Major code cleanup. The Great Renaming is now done.
|
||||
* Better modularity. Better typing. Fixed a bunch of
|
||||
* synchronization bugs. Made a lot of global stuff
|
||||
* per-desc or per-array. Removed dead code.
|
||||
*
|
||||
* Revision 1.12 1996/06/02 17:31:48 jimz
|
||||
* Moved a lot of global stuff into array structure, where it belongs.
|
||||
* Fixed up paritylogging, pss modules in this manner. Some general
|
||||
* code cleanup. Removed lots of dead code, some dead files.
|
||||
*
|
||||
* Revision 1.11 1996/05/30 23:22:16 jimz
|
||||
* bugfixes of serialization, timing problems
|
||||
* more cleanup
|
||||
*
|
||||
* Revision 1.10 1996/05/20 16:15:22 jimz
|
||||
* switch to rf_{mutex,cond}_{init,destroy}
|
||||
*
|
||||
* Revision 1.9 1996/05/18 19:51:34 jimz
|
||||
* major code cleanup- fix syntax, make some types consistent,
|
||||
* add prototypes, clean out dead code, et cetera
|
||||
*
|
||||
* Revision 1.8 1996/05/16 16:04:42 jimz
|
||||
* convert to return-val on FREELIST init
|
||||
*
|
||||
* Revision 1.7 1996/05/16 14:47:21 jimz
|
||||
* rewrote to use RF_FREELIST
|
||||
*
|
||||
* Revision 1.6 1995/12/01 19:25:43 root
|
||||
* added copyright info
|
||||
*
|
||||
*/
|
||||
|
||||
#include "rf_types.h"
|
||||
#include "rf_threadstuff.h"
|
||||
#include "rf_mcpair.h"
|
||||
#include "rf_debugMem.h"
|
||||
#include "rf_freelist.h"
|
||||
#include "rf_shutdown.h"
|
||||
|
||||
#if defined(__NetBSD__) && defined(_KERNEL)
|
||||
#include <sys/proc.h>
|
||||
|
||||
#endif
|
||||
|
||||
static RF_FreeList_t *rf_mcpair_freelist;
|
||||
|
||||
#define RF_MAX_FREE_MCPAIR 128
|
||||
#define RF_MCPAIR_INC 16
|
||||
#define RF_MCPAIR_INITIAL 24
|
||||
|
||||
static int init_mcpair(RF_MCPair_t *);
|
||||
static void clean_mcpair(RF_MCPair_t *);
|
||||
static void rf_ShutdownMCPair(void *);
|
||||
|
||||
|
||||
|
||||
static int init_mcpair(t)
|
||||
RF_MCPair_t *t;
|
||||
{
|
||||
int rc;
|
||||
|
||||
rc = rf_mutex_init(&t->mutex);
|
||||
if (rc) {
|
||||
RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__,
|
||||
__LINE__, rc);
|
||||
return(rc);
|
||||
}
|
||||
rc = rf_cond_init(&t->cond);
|
||||
if (rc) {
|
||||
RF_ERRORMSG3("Unable to init cond file %s line %d rc=%d\n", __FILE__,
|
||||
__LINE__, rc);
|
||||
rf_mutex_destroy(&t->mutex);
|
||||
return(rc);
|
||||
}
|
||||
return(0);
|
||||
}
|
||||
|
||||
static void clean_mcpair(t)
|
||||
RF_MCPair_t *t;
|
||||
{
|
||||
rf_mutex_destroy(&t->mutex);
|
||||
rf_cond_destroy(&t->cond);
|
||||
}
|
||||
|
||||
static void rf_ShutdownMCPair(ignored)
|
||||
void *ignored;
|
||||
{
|
||||
RF_FREELIST_DESTROY_CLEAN(rf_mcpair_freelist,next,(RF_MCPair_t *),clean_mcpair);
|
||||
}
|
||||
|
||||
int rf_ConfigureMCPair(listp)
|
||||
RF_ShutdownList_t **listp;
|
||||
{
|
||||
int rc;
|
||||
|
||||
RF_FREELIST_CREATE(rf_mcpair_freelist, RF_MAX_FREE_MCPAIR,
|
||||
RF_MCPAIR_INC, sizeof(RF_MCPair_t));
|
||||
rc = rf_ShutdownCreate(listp, rf_ShutdownMCPair, NULL);
|
||||
if (rc) {
|
||||
RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n",
|
||||
__FILE__, __LINE__, rc);
|
||||
rf_ShutdownMCPair(NULL);
|
||||
return(rc);
|
||||
}
|
||||
RF_FREELIST_PRIME_INIT(rf_mcpair_freelist, RF_MCPAIR_INITIAL,next,
|
||||
(RF_MCPair_t *),init_mcpair);
|
||||
return(0);
|
||||
}
|
||||
|
||||
RF_MCPair_t *rf_AllocMCPair()
|
||||
{
|
||||
RF_MCPair_t *t;
|
||||
|
||||
RF_FREELIST_GET_INIT(rf_mcpair_freelist,t,next,(RF_MCPair_t *),init_mcpair);
|
||||
if (t) {
|
||||
t->flag = 0;
|
||||
t->next = NULL;
|
||||
}
|
||||
return(t);
|
||||
}
|
||||
|
||||
void rf_FreeMCPair(t)
|
||||
RF_MCPair_t *t;
|
||||
{
|
||||
RF_FREELIST_FREE_CLEAN(rf_mcpair_freelist,t,next,clean_mcpair);
|
||||
}
|
||||
|
||||
/* the callback function used to wake you up when you use an mcpair to wait for something */
|
||||
void rf_MCPairWakeupFunc(mcpair)
|
||||
RF_MCPair_t *mcpair;
|
||||
{
|
||||
RF_LOCK_MUTEX(mcpair->mutex);
|
||||
mcpair->flag = 1;
|
||||
#if 0
|
||||
printf("MCPairWakeupFunc called!\n");
|
||||
#endif
|
||||
#ifdef KERNEL
|
||||
wakeup(&(mcpair->flag)); /* XXX Does this do anything useful!! GO */
|
||||
/* XXX Looks like the following is needed to truly get the
|
||||
functionality they were looking for here... This could be a side-effect
|
||||
of my using a tsleep in the NetBSD port though... XXX */
|
||||
#if defined(__NetBSD__) && defined(_KERNEL)
|
||||
wakeup(&(mcpair->cond)); /* XXX XXX XXX GO */
|
||||
#endif
|
||||
#else /* KERNEL */
|
||||
RF_SIGNAL_COND(mcpair->cond);
|
||||
#endif /* KERNEL */
|
||||
RF_UNLOCK_MUTEX(mcpair->mutex);
|
||||
}
|
|
@ -0,0 +1,61 @@
|
|||
/* $NetBSD: rf_mcpair.h,v 1.1 1998/11/13 04:20:31 oster Exp $ */
|
||||
/*
|
||||
* Copyright (c) 1995 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Author: Mark Holland
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and
|
||||
* its documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
||||
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*/
|
||||
|
||||
/* rf_mcpair.h
|
||||
* see comments in rf_mcpair.c
|
||||
*/
|
||||
|
||||
#ifndef _RF__RF_MCPAIR_H_
|
||||
#define _RF__RF_MCPAIR_H_
|
||||
|
||||
#include "rf_types.h"
|
||||
#include "rf_threadstuff.h"
|
||||
|
||||
struct RF_MCPair_s {
|
||||
RF_DECLARE_MUTEX(mutex)
|
||||
RF_DECLARE_COND(cond)
|
||||
int flag;
|
||||
RF_MCPair_t *next;
|
||||
};
|
||||
|
||||
#ifdef KERNEL
|
||||
#ifndef __NetBSD__
|
||||
#define RF_WAIT_MCPAIR(_mcp) mpsleep(&((_mcp)->flag), PZERO, "mcpair", 0, (void *) simple_lock_addr((_mcp)->mutex), MS_LOCK_SIMPLE)
|
||||
#else
|
||||
#define RF_WAIT_MCPAIR(_mcp) tsleep(&((_mcp)->flag), PRIBIO | PCATCH, "mcpair", 0)
|
||||
#endif
|
||||
#else /* KERNEL */
|
||||
#define RF_WAIT_MCPAIR(_mcp) RF_WAIT_COND((_mcp)->cond, (_mcp)->mutex)
|
||||
#endif /* KERNEL */
|
||||
|
||||
int rf_ConfigureMCPair(RF_ShutdownList_t **listp);
|
||||
RF_MCPair_t *rf_AllocMCPair(void);
|
||||
void rf_FreeMCPair(RF_MCPair_t *t);
|
||||
void rf_MCPairWakeupFunc(RF_MCPair_t *t);
|
||||
|
||||
#endif /* !_RF__RF_MCPAIR_H_ */
|
|
@ -0,0 +1,255 @@
|
|||
/* $NetBSD: rf_memchunk.c,v 1.1 1998/11/13 04:20:31 oster Exp $ */
|
||||
/*
|
||||
* Copyright (c) 1995 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Author: Mark Holland
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and
|
||||
* its documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
||||
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*/
|
||||
|
||||
/*********************************************************************************
|
||||
* rf_memchunk.c
|
||||
*
|
||||
* experimental code. I've found that the malloc and free calls in the DAG
|
||||
* creation code are very expensive. Since for any given workload the DAGs
|
||||
* created for different accesses are likely to be similar to each other, the
|
||||
* amount of memory used for any given DAG data structure is likely to be one
|
||||
* of a small number of values. For example, in UNIX, all reads and writes will
|
||||
* be less than 8k and will not span stripe unit boundaries. Thus in the absence
|
||||
* of failure, the only DAGs that will ever get created are single-node reads
|
||||
* and single-stripe-unit atomic read-modify-writes. So, I'm very likely to
|
||||
* be continually asking for chunks of memory equal to the sizes of these two
|
||||
* DAGs.
|
||||
*
|
||||
* This leads to the idea of holding on to these chunks of memory when the DAG is
|
||||
* freed and then, when a new DAG is created, trying to find such a chunk before
|
||||
* calling malloc.
|
||||
*
|
||||
* the "chunk list" is a list of lists. Each header node contains a size value
|
||||
* and a pointer to a list of chunk descriptors, each of which holds a pointer
|
||||
* to a chunk of memory of the indicated size.
|
||||
*
|
||||
* There is currently no way to purge memory out of the chunk list. My
|
||||
* initial thought on this is to have a low-priority thread that wakes up every
|
||||
* 1 or 2 seconds, purges all the chunks with low reuse counts, and sets all
|
||||
* the reuse counts to zero.
|
||||
*
|
||||
* This whole idea may be bad, since malloc may be able to do this more efficiently.
|
||||
* It's worth a try, though, and it can be turned off by setting useMemChunks to 0.
|
||||
*
|
||||
********************************************************************************/
|
||||
|
||||
/* :
|
||||
* Log: rf_memchunk.c,v
|
||||
* Revision 1.17 1996/07/27 23:36:08 jimz
|
||||
* Solaris port of simulator
|
||||
*
|
||||
* Revision 1.16 1996/06/10 11:55:47 jimz
|
||||
* Straightened out some per-array/not-per-array distinctions, fixed
|
||||
* a couple bugs related to confusion. Added shutdown lists. Removed
|
||||
* layout shutdown function (now subsumed by shutdown lists).
|
||||
*
|
||||
* Revision 1.15 1996/06/09 02:36:46 jimz
|
||||
* lots of little crufty cleanup- fixup whitespace
|
||||
* issues, comment #ifdefs, improve typing in some
|
||||
* places (esp size-related)
|
||||
*
|
||||
* Revision 1.14 1996/06/05 18:06:02 jimz
|
||||
* Major code cleanup. The Great Renaming is now done.
|
||||
* Better modularity. Better typing. Fixed a bunch of
|
||||
* synchronization bugs. Made a lot of global stuff
|
||||
* per-desc or per-array. Removed dead code.
|
||||
*
|
||||
* Revision 1.13 1996/06/02 17:31:48 jimz
|
||||
* Moved a lot of global stuff into array structure, where it belongs.
|
||||
* Fixed up paritylogging, pss modules in this manner. Some general
|
||||
* code cleanup. Removed lots of dead code, some dead files.
|
||||
*
|
||||
* Revision 1.12 1996/05/30 23:22:16 jimz
|
||||
* bugfixes of serialization, timing problems
|
||||
* more cleanup
|
||||
*
|
||||
* Revision 1.11 1996/05/27 18:56:37 jimz
|
||||
* more code cleanup
|
||||
* better typing
|
||||
* compiles in all 3 environments
|
||||
*
|
||||
* Revision 1.10 1996/05/23 00:33:23 jimz
|
||||
* code cleanup: move all debug decls to rf_options.c, all extern
|
||||
* debug decls to rf_options.h, all debug vars preceded by rf_
|
||||
*
|
||||
* Revision 1.9 1996/05/20 16:15:45 jimz
|
||||
* switch to rf_{mutex,cond}_{init,destroy}
|
||||
*
|
||||
* Revision 1.8 1996/05/18 19:51:34 jimz
|
||||
* major code cleanup- fix syntax, make some types consistent,
|
||||
* add prototypes, clean out dead code, et cetera
|
||||
*
|
||||
* Revision 1.7 1995/12/01 19:26:07 root
|
||||
* added copyright info
|
||||
*
|
||||
*/
|
||||
|
||||
#include "rf_types.h"
|
||||
#include "rf_threadstuff.h"
|
||||
#include "rf_debugMem.h"
|
||||
#include "rf_memchunk.h"
|
||||
#include "rf_general.h"
|
||||
#include "rf_options.h"
|
||||
#include "rf_shutdown.h"
|
||||
#include "rf_sys.h"
|
||||
|
||||
typedef struct RF_ChunkHdr_s RF_ChunkHdr_t;
|
||||
struct RF_ChunkHdr_s {
|
||||
int size;
|
||||
RF_ChunkDesc_t *list;
|
||||
RF_ChunkHdr_t *next;
|
||||
};
|
||||
|
||||
static RF_ChunkHdr_t *chunklist, *chunk_hdr_free_list;
|
||||
static RF_ChunkDesc_t *chunk_desc_free_list;
|
||||
RF_DECLARE_STATIC_MUTEX(chunkmutex)
|
||||
|
||||
static void rf_ShutdownMemChunk(void *);
|
||||
static RF_ChunkDesc_t *NewMemChunk(int, char *);
|
||||
|
||||
|
||||
static void rf_ShutdownMemChunk(ignored)
|
||||
void *ignored;
|
||||
{
|
||||
RF_ChunkDesc_t *pt, *p;
|
||||
RF_ChunkHdr_t *hdr, *ht;
|
||||
|
||||
if (rf_memChunkDebug)
|
||||
printf("Chunklist:\n");
|
||||
for (hdr = chunklist; hdr;) {
|
||||
for (p = hdr->list; p; ) {
|
||||
if (rf_memChunkDebug)
|
||||
printf("Size %d reuse count %d\n",p->size, p->reuse_count);
|
||||
pt = p; p=p->next;
|
||||
RF_Free(pt->buf, pt->size);
|
||||
RF_Free(pt, sizeof(*pt));
|
||||
}
|
||||
ht = hdr; hdr=hdr->next;
|
||||
RF_Free(ht, sizeof(*ht));
|
||||
}
|
||||
|
||||
rf_mutex_destroy(&chunkmutex);
|
||||
}
|
||||
|
||||
int rf_ConfigureMemChunk(listp)
|
||||
RF_ShutdownList_t **listp;
|
||||
{
|
||||
int rc;
|
||||
|
||||
chunklist = NULL;
|
||||
chunk_hdr_free_list = NULL;
|
||||
chunk_desc_free_list = NULL;
|
||||
rc = rf_mutex_init(&chunkmutex);
|
||||
if (rc) {
|
||||
RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__,
|
||||
__LINE__, rc);
|
||||
}
|
||||
rc = rf_ShutdownCreate(listp, rf_ShutdownMemChunk, NULL);
|
||||
if (rc) {
|
||||
RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", __FILE__,
|
||||
__LINE__, rc);
|
||||
rf_mutex_destroy(&chunkmutex);
|
||||
}
|
||||
return(rc);
|
||||
}
|
||||
|
||||
/* called to get a chunk descriptor for a newly-allocated chunk of memory
|
||||
* MUTEX MUST BE LOCKED
|
||||
*
|
||||
* free list is not currently used
|
||||
*/
|
||||
static RF_ChunkDesc_t *NewMemChunk(size, buf)
|
||||
int size;
|
||||
char *buf;
|
||||
{
|
||||
RF_ChunkDesc_t *p;
|
||||
|
||||
if (chunk_desc_free_list) {p = chunk_desc_free_list; chunk_desc_free_list = p->next;}
|
||||
else RF_Malloc(p, sizeof(RF_ChunkDesc_t), (RF_ChunkDesc_t *));
|
||||
p->size = size;
|
||||
p->buf = buf;
|
||||
p->next = NULL;
|
||||
p->reuse_count = 0;
|
||||
return(p);
|
||||
}
|
||||
|
||||
/* looks for a chunk of memory of acceptable size. If none, allocates one and returns
|
||||
* a chunk descriptor for it, but does not install anything in the list. This is done
|
||||
* when the chunk is released.
|
||||
*/
|
||||
RF_ChunkDesc_t *rf_GetMemChunk(size)
|
||||
int size;
|
||||
{
|
||||
RF_ChunkHdr_t *hdr = chunklist;
|
||||
RF_ChunkDesc_t *p = NULL;
|
||||
char *buf;
|
||||
|
||||
RF_LOCK_MUTEX(chunkmutex);
|
||||
for (hdr = chunklist; hdr; hdr = hdr->next) if (hdr->size >= size) {
|
||||
p = hdr->list;
|
||||
if (p) {
|
||||
hdr->list = p->next;
|
||||
p->next = NULL;
|
||||
p->reuse_count++;
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (!p) {
|
||||
RF_Malloc(buf, size, (char *));
|
||||
p = NewMemChunk(size, buf);
|
||||
}
|
||||
RF_UNLOCK_MUTEX(chunkmutex);
|
||||
(void) bzero(p->buf, size);
|
||||
return(p);
|
||||
}
|
||||
|
||||
void rf_ReleaseMemChunk(chunk)
|
||||
RF_ChunkDesc_t *chunk;
|
||||
{
|
||||
RF_ChunkHdr_t *hdr, *ht = NULL, *new;
|
||||
|
||||
RF_LOCK_MUTEX(chunkmutex);
|
||||
for (hdr = chunklist; hdr && hdr->size < chunk->size; ht=hdr,hdr=hdr->next);
|
||||
if (hdr && hdr->size == chunk->size) {
|
||||
chunk->next = hdr->list;
|
||||
hdr->list = chunk;
|
||||
}
|
||||
else {
|
||||
RF_Malloc(new, sizeof(RF_ChunkHdr_t), (RF_ChunkHdr_t *));
|
||||
new->size = chunk->size; new->list = chunk; chunk->next = NULL;
|
||||
if (ht) {
|
||||
new->next = ht->next;
|
||||
ht->next = new;
|
||||
}
|
||||
else {
|
||||
new->next = hdr;
|
||||
chunklist = new;
|
||||
}
|
||||
}
|
||||
RF_UNLOCK_MUTEX(chunkmutex);
|
||||
}
|
|
@ -0,0 +1,79 @@
|
|||
/* $NetBSD: rf_memchunk.h,v 1.1 1998/11/13 04:20:31 oster Exp $ */
|
||||
/*
|
||||
* Copyright (c) 1995 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Author: Mark Holland
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and
|
||||
* its documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
||||
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*/
|
||||
|
||||
/* header file for rf_memchunk.c. See comments there */
|
||||
|
||||
/* :
|
||||
* Log: rf_memchunk.h,v
|
||||
* Revision 1.8 1996/06/10 11:55:47 jimz
|
||||
* Straightened out some per-array/not-per-array distinctions, fixed
|
||||
* a couple bugs related to confusion. Added shutdown lists. Removed
|
||||
* layout shutdown function (now subsumed by shutdown lists).
|
||||
*
|
||||
* Revision 1.7 1996/06/02 17:31:48 jimz
|
||||
* Moved a lot of global stuff into array structure, where it belongs.
|
||||
* Fixed up paritylogging, pss modules in this manner. Some general
|
||||
* code cleanup. Removed lots of dead code, some dead files.
|
||||
*
|
||||
* Revision 1.6 1996/05/24 04:28:55 jimz
|
||||
* release cleanup ckpt
|
||||
*
|
||||
* Revision 1.5 1996/05/23 21:46:35 jimz
|
||||
* checkpoint in code cleanup (release prep)
|
||||
* lots of types, function names have been fixed
|
||||
*
|
||||
* Revision 1.4 1996/05/23 00:33:23 jimz
|
||||
* code cleanup: move all debug decls to rf_options.c, all extern
|
||||
* debug decls to rf_options.h, all debug vars preceded by rf_
|
||||
*
|
||||
* Revision 1.3 1996/05/18 19:51:34 jimz
|
||||
* major code cleanup- fix syntax, make some types consistent,
|
||||
* add prototypes, clean out dead code, et cetera
|
||||
*
|
||||
* Revision 1.2 1995/12/01 19:25:56 root
|
||||
* added copyright info
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _RF__RF_MEMCHUNK_H_
|
||||
#define _RF__RF_MEMCHUNK_H_
|
||||
|
||||
#include "rf_types.h"
|
||||
|
||||
struct RF_ChunkDesc_s {
|
||||
int size;
|
||||
int reuse_count;
|
||||
char *buf;
|
||||
RF_ChunkDesc_t *next;
|
||||
};
|
||||
|
||||
int rf_ConfigureMemChunk(RF_ShutdownList_t **listp);
|
||||
RF_ChunkDesc_t *rf_GetMemChunk(int size);
|
||||
void rf_ReleaseMemChunk(RF_ChunkDesc_t *chunk);
|
||||
|
||||
#endif /* !_RF__RF_MEMCHUNK_H_ */
|
|
@ -0,0 +1,97 @@
|
|||
/* $NetBSD: rf_netbsd.h,v 1.1 1998/11/13 04:20:31 oster Exp $ */
|
||||
/*-
|
||||
* Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This code is derived from software contributed to The NetBSD Foundation
|
||||
* by Greg Oster
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by the NetBSD
|
||||
* Foundation, Inc. and its contributors.
|
||||
* 4. Neither the name of The NetBSD Foundation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
|
||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
|
||||
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/*-
|
||||
* Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This code is derived from software contributed to The NetBSD Foundation
|
||||
* by Jason R. Thorpe.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by the NetBSD
|
||||
* Foundation, Inc. and its contributors.
|
||||
* 4. Neither the name of The NetBSD Foundation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
|
||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
|
||||
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef _RF__RF_NETBSDSTUFF_H_
|
||||
#define _RF__RF_NETBSDSTUFF_H_
|
||||
|
||||
#include <sys/fcntl.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/namei.h>
|
||||
#include <sys/vnode.h>
|
||||
|
||||
|
||||
|
||||
#if defined(__NetBSD__) && defined(_KERNEL)
|
||||
struct raidcinfo {
|
||||
struct vnode *ci_vp; /* device's vnode */
|
||||
dev_t ci_dev; /* XXX: device's dev_t */
|
||||
#if 0
|
||||
size_t ci_size; /* size */
|
||||
char *ci_path; /* path to component */
|
||||
size_t ci_pathlen; /* length of component path */
|
||||
#endif
|
||||
};
|
||||
#endif
|
||||
|
||||
#endif /* _RF__RF_NETBSDSTUFF_H_ */
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,453 @@
|
|||
/* $NetBSD: rf_nwayxor.c,v 1.1 1998/11/13 04:20:31 oster Exp $ */
|
||||
/*
|
||||
* Copyright (c) 1995 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Author: Mark Holland, Daniel Stodolsky
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and
|
||||
* its documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
||||
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*/
|
||||
|
||||
/************************************************************
|
||||
*
|
||||
* nwayxor.c -- code to do N-way xors for reconstruction
|
||||
*
|
||||
* nWayXorN xors N input buffers into the destination buffer.
|
||||
* adapted from danner's longword_bxor code.
|
||||
*
|
||||
************************************************************/
|
||||
|
||||
/* :
|
||||
* Log: rf_nwayxor.c,v
|
||||
* Revision 1.6 1996/06/12 03:31:18 jimz
|
||||
* only print call counts if rf_showXorCallCounts != 0
|
||||
*
|
||||
* Revision 1.5 1996/06/10 11:55:47 jimz
|
||||
* Straightened out some per-array/not-per-array distinctions, fixed
|
||||
* a couple bugs related to confusion. Added shutdown lists. Removed
|
||||
* layout shutdown function (now subsumed by shutdown lists).
|
||||
*
|
||||
* Revision 1.4 1996/06/02 17:31:48 jimz
|
||||
* Moved a lot of global stuff into array structure, where it belongs.
|
||||
* Fixed up paritylogging, pss modules in this manner. Some general
|
||||
* code cleanup. Removed lots of dead code, some dead files.
|
||||
*
|
||||
* Revision 1.3 1996/05/18 19:51:34 jimz
|
||||
* major code cleanup- fix syntax, make some types consistent,
|
||||
* add prototypes, clean out dead code, et cetera
|
||||
*
|
||||
* Revision 1.2 1995/12/01 19:29:14 root
|
||||
* added copyright info
|
||||
*
|
||||
*/
|
||||
|
||||
#include "rf_nwayxor.h"
|
||||
#include "rf_shutdown.h"
|
||||
|
||||
static int callcount[10];
|
||||
static void rf_ShutdownNWayXor(void *);
|
||||
|
||||
static void rf_ShutdownNWayXor(ignored)
|
||||
void *ignored;
|
||||
{
|
||||
int i;
|
||||
|
||||
if (rf_showXorCallCounts == 0)
|
||||
return;
|
||||
printf("Call counts for n-way xor routines: ");
|
||||
for (i=0; i<10; i++)
|
||||
printf("%d ",callcount[i]);
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
int rf_ConfigureNWayXor(listp)
|
||||
RF_ShutdownList_t **listp;
|
||||
{
|
||||
int i, rc;
|
||||
|
||||
for (i=0; i<10; i++)
|
||||
callcount[i] = 0;
|
||||
rc = rf_ShutdownCreate(listp, rf_ShutdownNWayXor, NULL);
|
||||
return(rc);
|
||||
}
|
||||
|
||||
void rf_nWayXor1(src_rbs, dest_rb, len)
|
||||
RF_ReconBuffer_t **src_rbs;
|
||||
RF_ReconBuffer_t *dest_rb;
|
||||
int len;
|
||||
{
|
||||
register unsigned long *src = (unsigned long *) src_rbs[0]->buffer;
|
||||
register unsigned long *dest= (unsigned long *) dest_rb->buffer;
|
||||
register unsigned long *end = src+len;
|
||||
register unsigned long d0, d1, d2, d3, s0, s1, s2, s3;
|
||||
|
||||
callcount[1]++;
|
||||
while (len >= 4 )
|
||||
{
|
||||
d0 = dest[0];
|
||||
d1 = dest[1];
|
||||
d2 = dest[2];
|
||||
d3 = dest[3];
|
||||
s0 = src[0];
|
||||
s1 = src[1];
|
||||
s2 = src[2];
|
||||
s3 = src[3];
|
||||
dest[0] = d0 ^ s0;
|
||||
dest[1] = d1 ^ s1;
|
||||
dest[2] = d2 ^ s2;
|
||||
dest[3] = d3 ^ s3;
|
||||
src += 4;
|
||||
dest += 4;
|
||||
len -= 4;
|
||||
}
|
||||
while (src < end) {*dest++ ^= *src++;}
|
||||
}
|
||||
|
||||
void rf_nWayXor2(src_rbs, dest_rb, len)
|
||||
RF_ReconBuffer_t **src_rbs;
|
||||
RF_ReconBuffer_t *dest_rb;
|
||||
int len;
|
||||
{
|
||||
register unsigned long *dst = (unsigned long *) dest_rb->buffer;
|
||||
register unsigned long *a = dst;
|
||||
register unsigned long *b = (unsigned long *) src_rbs[0]->buffer;
|
||||
register unsigned long *c = (unsigned long *) src_rbs[1]->buffer;
|
||||
unsigned long a0,a1,a2,a3, b0,b1,b2,b3;
|
||||
|
||||
callcount[2]++;
|
||||
/* align dest to cache line */
|
||||
while ((((unsigned long) dst) & 0x1f))
|
||||
{
|
||||
*dst++ = *a++ ^ *b++ ^ *c++;
|
||||
len--;
|
||||
}
|
||||
while (len > 4 )
|
||||
{
|
||||
a0 = a[0]; len -= 4;
|
||||
|
||||
a1 = a[1];
|
||||
a2 = a[2];
|
||||
|
||||
a3 = a[3]; a += 4;
|
||||
|
||||
b0 = b[0];
|
||||
b1 = b[1];
|
||||
|
||||
b2 = b[2];
|
||||
b3 = b[3];
|
||||
/* start dual issue */
|
||||
a0 ^= b0; b0 = c[0];
|
||||
|
||||
b += 4; a1 ^= b1;
|
||||
|
||||
a2 ^= b2; a3 ^= b3;
|
||||
|
||||
b1 = c[1]; a0 ^= b0;
|
||||
|
||||
b2 = c[2]; a1 ^= b1;
|
||||
|
||||
b3 = c[3]; a2 ^= b2;
|
||||
|
||||
dst[0] = a0; a3 ^= b3;
|
||||
dst[1] = a1; c += 4;
|
||||
dst[2] = a2;
|
||||
dst[3] = a3; dst += 4;
|
||||
}
|
||||
while (len)
|
||||
{
|
||||
*dst++ = *a++ ^ *b++ ^ *c++;
|
||||
len--;
|
||||
}
|
||||
}
|
||||
|
||||
/* note that first arg is not incremented but 2nd arg is */
|
||||
#define LOAD_FIRST(_dst,_b) \
|
||||
a0 = _dst[0]; len -= 4; \
|
||||
a1 = _dst[1]; \
|
||||
a2 = _dst[2]; \
|
||||
a3 = _dst[3]; \
|
||||
b0 = _b[0]; \
|
||||
b1 = _b[1]; \
|
||||
b2 = _b[2]; \
|
||||
b3 = _b[3]; _b += 4;
|
||||
|
||||
/* note: arg is incremented */
|
||||
#define XOR_AND_LOAD_NEXT(_n) \
|
||||
a0 ^= b0; b0 = _n[0]; \
|
||||
a1 ^= b1; b1 = _n[1]; \
|
||||
a2 ^= b2; b2 = _n[2]; \
|
||||
a3 ^= b3; b3 = _n[3]; \
|
||||
_n += 4;
|
||||
|
||||
/* arg is incremented */
|
||||
#define XOR_AND_STORE(_dst) \
|
||||
a0 ^= b0; _dst[0] = a0; \
|
||||
a1 ^= b1; _dst[1] = a1; \
|
||||
a2 ^= b2; _dst[2] = a2; \
|
||||
a3 ^= b3; _dst[3] = a3; \
|
||||
_dst += 4;
|
||||
|
||||
|
||||
void rf_nWayXor3(src_rbs, dest_rb, len)
|
||||
RF_ReconBuffer_t **src_rbs;
|
||||
RF_ReconBuffer_t *dest_rb;
|
||||
int len;
|
||||
{
|
||||
register unsigned long *dst = (unsigned long *) dest_rb->buffer;
|
||||
register unsigned long *b = (unsigned long *) src_rbs[0]->buffer;
|
||||
register unsigned long *c = (unsigned long *) src_rbs[1]->buffer;
|
||||
register unsigned long *d = (unsigned long *) src_rbs[2]->buffer;
|
||||
unsigned long a0,a1,a2,a3, b0,b1,b2,b3;
|
||||
|
||||
callcount[3]++;
|
||||
/* align dest to cache line */
|
||||
while ((((unsigned long) dst) & 0x1f)) {
|
||||
*dst++ ^= *b++ ^ *c++ ^ *d++;
|
||||
len--;
|
||||
}
|
||||
while (len > 4 ) {
|
||||
LOAD_FIRST(dst,b);
|
||||
XOR_AND_LOAD_NEXT(c);
|
||||
XOR_AND_LOAD_NEXT(d);
|
||||
XOR_AND_STORE(dst);
|
||||
}
|
||||
while (len) {
|
||||
*dst++ ^= *b++ ^ *c++ ^ *d++;
|
||||
len--;
|
||||
}
|
||||
}
|
||||
|
||||
void rf_nWayXor4(src_rbs, dest_rb, len)
|
||||
RF_ReconBuffer_t **src_rbs;
|
||||
RF_ReconBuffer_t *dest_rb;
|
||||
int len;
|
||||
{
|
||||
register unsigned long *dst = (unsigned long *) dest_rb->buffer;
|
||||
register unsigned long *b = (unsigned long *) src_rbs[0]->buffer;
|
||||
register unsigned long *c = (unsigned long *) src_rbs[1]->buffer;
|
||||
register unsigned long *d = (unsigned long *) src_rbs[2]->buffer;
|
||||
register unsigned long *e = (unsigned long *) src_rbs[3]->buffer;
|
||||
unsigned long a0,a1,a2,a3, b0,b1,b2,b3;
|
||||
|
||||
callcount[4]++;
|
||||
/* align dest to cache line */
|
||||
while ((((unsigned long) dst) & 0x1f)) {
|
||||
*dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++;
|
||||
len--;
|
||||
}
|
||||
while (len > 4 ) {
|
||||
LOAD_FIRST(dst,b);
|
||||
XOR_AND_LOAD_NEXT(c);
|
||||
XOR_AND_LOAD_NEXT(d);
|
||||
XOR_AND_LOAD_NEXT(e);
|
||||
XOR_AND_STORE(dst);
|
||||
}
|
||||
while (len) {
|
||||
*dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++;
|
||||
len--;
|
||||
}
|
||||
}
|
||||
|
||||
void rf_nWayXor5(src_rbs, dest_rb, len)
|
||||
RF_ReconBuffer_t **src_rbs;
|
||||
RF_ReconBuffer_t *dest_rb;
|
||||
int len;
|
||||
{
|
||||
register unsigned long *dst = (unsigned long *) dest_rb->buffer;
|
||||
register unsigned long *b = (unsigned long *) src_rbs[0]->buffer;
|
||||
register unsigned long *c = (unsigned long *) src_rbs[1]->buffer;
|
||||
register unsigned long *d = (unsigned long *) src_rbs[2]->buffer;
|
||||
register unsigned long *e = (unsigned long *) src_rbs[3]->buffer;
|
||||
register unsigned long *f = (unsigned long *) src_rbs[4]->buffer;
|
||||
unsigned long a0,a1,a2,a3, b0,b1,b2,b3;
|
||||
|
||||
callcount[5]++;
|
||||
/* align dest to cache line */
|
||||
while ((((unsigned long) dst) & 0x1f)) {
|
||||
*dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++;
|
||||
len--;
|
||||
}
|
||||
while (len > 4 ) {
|
||||
LOAD_FIRST(dst,b);
|
||||
XOR_AND_LOAD_NEXT(c);
|
||||
XOR_AND_LOAD_NEXT(d);
|
||||
XOR_AND_LOAD_NEXT(e);
|
||||
XOR_AND_LOAD_NEXT(f);
|
||||
XOR_AND_STORE(dst);
|
||||
}
|
||||
while (len) {
|
||||
*dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++;
|
||||
len--;
|
||||
}
|
||||
}
|
||||
|
||||
void rf_nWayXor6(src_rbs, dest_rb, len)
|
||||
RF_ReconBuffer_t **src_rbs;
|
||||
RF_ReconBuffer_t *dest_rb;
|
||||
int len;
|
||||
{
|
||||
register unsigned long *dst = (unsigned long *) dest_rb->buffer;
|
||||
register unsigned long *b = (unsigned long *) src_rbs[0]->buffer;
|
||||
register unsigned long *c = (unsigned long *) src_rbs[1]->buffer;
|
||||
register unsigned long *d = (unsigned long *) src_rbs[2]->buffer;
|
||||
register unsigned long *e = (unsigned long *) src_rbs[3]->buffer;
|
||||
register unsigned long *f = (unsigned long *) src_rbs[4]->buffer;
|
||||
register unsigned long *g = (unsigned long *) src_rbs[5]->buffer;
|
||||
unsigned long a0,a1,a2,a3, b0,b1,b2,b3;
|
||||
|
||||
callcount[6]++;
|
||||
/* align dest to cache line */
|
||||
while ((((unsigned long) dst) & 0x1f)) {
|
||||
*dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++;
|
||||
len--;
|
||||
}
|
||||
while (len > 4 ) {
|
||||
LOAD_FIRST(dst,b);
|
||||
XOR_AND_LOAD_NEXT(c);
|
||||
XOR_AND_LOAD_NEXT(d);
|
||||
XOR_AND_LOAD_NEXT(e);
|
||||
XOR_AND_LOAD_NEXT(f);
|
||||
XOR_AND_LOAD_NEXT(g);
|
||||
XOR_AND_STORE(dst);
|
||||
}
|
||||
while (len) {
|
||||
*dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++;
|
||||
len--;
|
||||
}
|
||||
}
|
||||
|
||||
void rf_nWayXor7(src_rbs, dest_rb, len)
|
||||
RF_ReconBuffer_t **src_rbs;
|
||||
RF_ReconBuffer_t *dest_rb;
|
||||
int len;
|
||||
{
|
||||
register unsigned long *dst = (unsigned long *) dest_rb->buffer;
|
||||
register unsigned long *b = (unsigned long *) src_rbs[0]->buffer;
|
||||
register unsigned long *c = (unsigned long *) src_rbs[1]->buffer;
|
||||
register unsigned long *d = (unsigned long *) src_rbs[2]->buffer;
|
||||
register unsigned long *e = (unsigned long *) src_rbs[3]->buffer;
|
||||
register unsigned long *f = (unsigned long *) src_rbs[4]->buffer;
|
||||
register unsigned long *g = (unsigned long *) src_rbs[5]->buffer;
|
||||
register unsigned long *h = (unsigned long *) src_rbs[6]->buffer;
|
||||
unsigned long a0,a1,a2,a3, b0,b1,b2,b3;
|
||||
|
||||
callcount[7]++;
|
||||
/* align dest to cache line */
|
||||
while ((((unsigned long) dst) & 0x1f)) {
|
||||
*dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++;
|
||||
len--;
|
||||
}
|
||||
while (len > 4 ) {
|
||||
LOAD_FIRST(dst,b);
|
||||
XOR_AND_LOAD_NEXT(c);
|
||||
XOR_AND_LOAD_NEXT(d);
|
||||
XOR_AND_LOAD_NEXT(e);
|
||||
XOR_AND_LOAD_NEXT(f);
|
||||
XOR_AND_LOAD_NEXT(g);
|
||||
XOR_AND_LOAD_NEXT(h);
|
||||
XOR_AND_STORE(dst);
|
||||
}
|
||||
while (len) {
|
||||
*dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++;
|
||||
len--;
|
||||
}
|
||||
}
|
||||
|
||||
void rf_nWayXor8(src_rbs, dest_rb, len)
|
||||
RF_ReconBuffer_t **src_rbs;
|
||||
RF_ReconBuffer_t *dest_rb;
|
||||
int len;
|
||||
{
|
||||
register unsigned long *dst = (unsigned long *) dest_rb->buffer;
|
||||
register unsigned long *b = (unsigned long *) src_rbs[0]->buffer;
|
||||
register unsigned long *c = (unsigned long *) src_rbs[1]->buffer;
|
||||
register unsigned long *d = (unsigned long *) src_rbs[2]->buffer;
|
||||
register unsigned long *e = (unsigned long *) src_rbs[3]->buffer;
|
||||
register unsigned long *f = (unsigned long *) src_rbs[4]->buffer;
|
||||
register unsigned long *g = (unsigned long *) src_rbs[5]->buffer;
|
||||
register unsigned long *h = (unsigned long *) src_rbs[6]->buffer;
|
||||
register unsigned long *i = (unsigned long *) src_rbs[7]->buffer;
|
||||
unsigned long a0,a1,a2,a3, b0,b1,b2,b3;
|
||||
|
||||
callcount[8]++;
|
||||
/* align dest to cache line */
|
||||
while ((((unsigned long) dst) & 0x1f)) {
|
||||
*dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++ ^ *i++;
|
||||
len--;
|
||||
}
|
||||
while (len > 4 ) {
|
||||
LOAD_FIRST(dst,b);
|
||||
XOR_AND_LOAD_NEXT(c);
|
||||
XOR_AND_LOAD_NEXT(d);
|
||||
XOR_AND_LOAD_NEXT(e);
|
||||
XOR_AND_LOAD_NEXT(f);
|
||||
XOR_AND_LOAD_NEXT(g);
|
||||
XOR_AND_LOAD_NEXT(h);
|
||||
XOR_AND_LOAD_NEXT(i);
|
||||
XOR_AND_STORE(dst);
|
||||
}
|
||||
while (len) {
|
||||
*dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++ ^ *i++;
|
||||
len--;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void rf_nWayXor9(src_rbs, dest_rb, len)
|
||||
RF_ReconBuffer_t **src_rbs;
|
||||
RF_ReconBuffer_t *dest_rb;
|
||||
int len;
|
||||
{
|
||||
register unsigned long *dst = (unsigned long *) dest_rb->buffer;
|
||||
register unsigned long *b = (unsigned long *) src_rbs[0]->buffer;
|
||||
register unsigned long *c = (unsigned long *) src_rbs[1]->buffer;
|
||||
register unsigned long *d = (unsigned long *) src_rbs[2]->buffer;
|
||||
register unsigned long *e = (unsigned long *) src_rbs[3]->buffer;
|
||||
register unsigned long *f = (unsigned long *) src_rbs[4]->buffer;
|
||||
register unsigned long *g = (unsigned long *) src_rbs[5]->buffer;
|
||||
register unsigned long *h = (unsigned long *) src_rbs[6]->buffer;
|
||||
register unsigned long *i = (unsigned long *) src_rbs[7]->buffer;
|
||||
register unsigned long *j = (unsigned long *) src_rbs[8]->buffer;
|
||||
unsigned long a0,a1,a2,a3, b0,b1,b2,b3;
|
||||
|
||||
callcount[9]++;
|
||||
/* align dest to cache line */
|
||||
while ((((unsigned long) dst) & 0x1f)) {
|
||||
*dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++ ^ *i++ ^ *j++;
|
||||
len--;
|
||||
}
|
||||
while (len > 4 ) {
|
||||
LOAD_FIRST(dst,b);
|
||||
XOR_AND_LOAD_NEXT(c);
|
||||
XOR_AND_LOAD_NEXT(d);
|
||||
XOR_AND_LOAD_NEXT(e);
|
||||
XOR_AND_LOAD_NEXT(f);
|
||||
XOR_AND_LOAD_NEXT(g);
|
||||
XOR_AND_LOAD_NEXT(h);
|
||||
XOR_AND_LOAD_NEXT(i);
|
||||
XOR_AND_LOAD_NEXT(j);
|
||||
XOR_AND_STORE(dst);
|
||||
}
|
||||
while (len) {
|
||||
*dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++ ^ *i++ ^ *j++;
|
||||
len--;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,74 @@
|
|||
/* $NetBSD: rf_nwayxor.h,v 1.1 1998/11/13 04:20:31 oster Exp $ */
|
||||
/*
|
||||
* rf_nwayxor.h
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 1996 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Author: Jim Zelenka
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and
|
||||
* its documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
||||
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*/
|
||||
/*
|
||||
* rf_nwayxor.h -- types and prototypes for nwayxor module
|
||||
*/
|
||||
/*
|
||||
* :
|
||||
* Log: rf_nwayxor.h,v
|
||||
* Revision 1.4 1996/06/10 11:55:47 jimz
|
||||
* Straightened out some per-array/not-per-array distinctions, fixed
|
||||
* a couple bugs related to confusion. Added shutdown lists. Removed
|
||||
* layout shutdown function (now subsumed by shutdown lists).
|
||||
*
|
||||
* Revision 1.3 1996/06/02 17:31:48 jimz
|
||||
* Moved a lot of global stuff into array structure, where it belongs.
|
||||
* Fixed up paritylogging, pss modules in this manner. Some general
|
||||
* code cleanup. Removed lots of dead code, some dead files.
|
||||
*
|
||||
* Revision 1.2 1996/05/23 21:46:35 jimz
|
||||
* checkpoint in code cleanup (release prep)
|
||||
* lots of types, function names have been fixed
|
||||
*
|
||||
* Revision 1.1 1996/05/18 19:56:47 jimz
|
||||
* Initial revision
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _RF__RF_NWAYXOR_H_
|
||||
#define _RF__RF_NWAYXOR_H_
|
||||
|
||||
#include "rf_types.h"
|
||||
#include "rf_raid.h"
|
||||
#include "rf_reconstruct.h"
|
||||
|
||||
int rf_ConfigureNWayXor(RF_ShutdownList_t **listp);
|
||||
void rf_nWayXor1(RF_ReconBuffer_t **src_rbs, RF_ReconBuffer_t *dest_rb, int len);
|
||||
void rf_nWayXor2(RF_ReconBuffer_t **src_rbs, RF_ReconBuffer_t *dest_rb, int len);
|
||||
void rf_nWayXor3(RF_ReconBuffer_t **src_rbs, RF_ReconBuffer_t *dest_rb, int len);
|
||||
void rf_nWayXor4(RF_ReconBuffer_t **src_rbs, RF_ReconBuffer_t *dest_rb, int len);
|
||||
void rf_nWayXor5(RF_ReconBuffer_t **src_rbs, RF_ReconBuffer_t *dest_rb, int len);
|
||||
void rf_nWayXor6(RF_ReconBuffer_t **src_rbs, RF_ReconBuffer_t *dest_rb, int len);
|
||||
void rf_nWayXor7(RF_ReconBuffer_t **src_rbs, RF_ReconBuffer_t *dest_rb, int len);
|
||||
void rf_nWayXor8(RF_ReconBuffer_t **src_rbs, RF_ReconBuffer_t *dest_rb, int len);
|
||||
void rf_nWayXor9(RF_ReconBuffer_t **src_rbs, RF_ReconBuffer_t *dest_rb, int len);
|
||||
|
||||
#endif /* !_RF__RF_NWAYXOR_H_ */
|
|
@ -0,0 +1,84 @@
|
|||
/* $NetBSD: rf_options.c,v 1.1 1998/11/13 04:20:31 oster Exp $ */
|
||||
/*
|
||||
* rf_options.c
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 1996 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Author: Jim Zelenka
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and
|
||||
* its documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
||||
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*/
|
||||
|
||||
#ifdef _KERNEL
|
||||
#define KERNEL
|
||||
#endif
|
||||
|
||||
#ifdef KERNEL
|
||||
#ifndef __NetBSD__
|
||||
#include <dfstrace.h>
|
||||
#endif /* !__NetBSD__ */
|
||||
#endif /* KERNEL */
|
||||
|
||||
#include "rf_threadstuff.h"
|
||||
#include "rf_types.h"
|
||||
#include "rf_archs.h"
|
||||
#include "rf_general.h"
|
||||
#include "rf_options.h"
|
||||
|
||||
#ifdef RF_DBG_OPTION
|
||||
#undef RF_DBG_OPTION
|
||||
#endif /* RF_DBG_OPTION */
|
||||
|
||||
#ifdef __STDC__
|
||||
#define RF_DBG_OPTION(_option_,_defval_) long rf_##_option_ = _defval_;
|
||||
#else /* __STDC__ */
|
||||
#define RF_DBG_OPTION(_option_,_defval_) long rf_/**/_option_ = _defval_;
|
||||
#endif /* __STDC__ */
|
||||
|
||||
#include "rf_optnames.h"
|
||||
|
||||
#undef RF_DBG_OPTION
|
||||
|
||||
#ifdef __STDC__
|
||||
#define RF_DBG_OPTION(_option_,_defval_) { RF_STRING(_option_), &rf_##_option_ },
|
||||
#else /* __STDC__ */
|
||||
#define RF_DBG_OPTION(_option_,_defval_) { RF_STRING(_option_), &rf_/**/_option_ },
|
||||
#endif /* __STDC__ */
|
||||
|
||||
RF_DebugName_t rf_debugNames[] = {
|
||||
#include "rf_optnames.h"
|
||||
{NULL, NULL}
|
||||
};
|
||||
|
||||
#undef RF_DBG_OPTION
|
||||
|
||||
#ifdef __STDC__
|
||||
#define RF_DBG_OPTION(_option_,_defval_) rf_##_option_ = _defval_ ;
|
||||
#else /* __STDC__ */
|
||||
#define RF_DBG_OPTION(_option_,_defval_) rf_/**/_option_ = _defval_ ;
|
||||
#endif /* __STDC__ */
|
||||
|
||||
void rf_ResetDebugOptions()
|
||||
{
|
||||
#include "rf_optnames.h"
|
||||
}
|
|
@ -0,0 +1,67 @@
|
|||
/* $NetBSD: rf_options.h,v 1.1 1998/11/13 04:20:31 oster Exp $ */
|
||||
/*
|
||||
* rf_options.h
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 1996 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Author: Jim Zelenka
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and
|
||||
* its documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
||||
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*/
|
||||
|
||||
#ifndef _RF__RF_OPTIONS_H_
|
||||
#define _RF__RF_OPTIONS_H_
|
||||
|
||||
#ifdef _KERNEL
|
||||
#define KERNEL
|
||||
#endif
|
||||
|
||||
#ifdef KERNEL
|
||||
#ifndef __NetBSD__
|
||||
#include <dfstrace.h>
|
||||
#endif /* !__NetBSD__ */
|
||||
#endif /* KERNEL */
|
||||
|
||||
#define RF_DEFAULT_LOCK_TABLE_SIZE 256
|
||||
|
||||
typedef struct RF_DebugNames_s {
|
||||
char *name;
|
||||
long *ptr;
|
||||
} RF_DebugName_t;
|
||||
|
||||
extern RF_DebugName_t rf_debugNames[];
|
||||
|
||||
#ifdef RF_DBG_OPTION
|
||||
#undef RF_DBG_OPTION
|
||||
#endif /* RF_DBG_OPTION */
|
||||
|
||||
#ifdef __STDC__
|
||||
#define RF_DBG_OPTION(_option_,_defval_) extern long rf_##_option_;
|
||||
#else /* __STDC__ */
|
||||
#define RF_DBG_OPTION(_option_,_defval_) extern long rf_/**/_option_;
|
||||
#endif /* __STDC__ */
|
||||
#include "rf_optnames.h"
|
||||
|
||||
void rf_ResetDebugOptions(void);
|
||||
|
||||
#endif /* !_RF__RF_OPTIONS_H_ */
|
|
@ -0,0 +1,143 @@
|
|||
/* $NetBSD: rf_optnames.h,v 1.1 1998/11/13 04:20:31 oster Exp $ */
|
||||
/*
|
||||
* rf_optnames.h
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 1996 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Author: Jim Zelenka
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and
|
||||
* its documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
||||
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Don't protect against multiple inclusion here- we actually want this.
|
||||
*/
|
||||
|
||||
#ifdef _KERNEL
|
||||
#define KERNEL
|
||||
#endif
|
||||
|
||||
RF_DBG_OPTION(accSizeKB,0) /* if nonzero, the fixed access size to run */
|
||||
RF_DBG_OPTION(accessDebug,0)
|
||||
RF_DBG_OPTION(accessTraceBufSize,0)
|
||||
RF_DBG_OPTION(alignAccesses,0) /* whether accs should be aligned to their size */
|
||||
RF_DBG_OPTION(camlayerIOs,0)
|
||||
RF_DBG_OPTION(camlayerDebug,0) /* debug CAM activity */
|
||||
RF_DBG_OPTION(cscanDebug,0) /* debug CSCAN sorting */
|
||||
RF_DBG_OPTION(dagDebug,0)
|
||||
RF_DBG_OPTION(debugPrintUseBuffer,0)
|
||||
RF_DBG_OPTION(degDagDebug,0)
|
||||
RF_DBG_OPTION(disableAsyncAccs,0)
|
||||
RF_DBG_OPTION(diskDebug,0)
|
||||
RF_DBG_OPTION(doDebug,0)
|
||||
RF_DBG_OPTION(dtDebug,0)
|
||||
RF_DBG_OPTION(enableAtomicRMW,0) /* this debug var enables locking of the disk
|
||||
* arm during small-write operations. Setting
|
||||
* this variable to anything other than 0 will
|
||||
* result in deadlock. (wvcii)
|
||||
*/
|
||||
RF_DBG_OPTION(engineDebug,0)
|
||||
RF_DBG_OPTION(fifoDebug,0) /* debug fifo queueing */
|
||||
RF_DBG_OPTION(floatingRbufDebug,0)
|
||||
RF_DBG_OPTION(forceHeadSepLimit,-1)
|
||||
RF_DBG_OPTION(forceNumFloatingReconBufs,-1) /* wire down number of extra recon buffers to use */
|
||||
RF_DBG_OPTION(keepAccTotals,0) /* turn on keep_acc_totals */
|
||||
RF_DBG_OPTION(lockTableSize,RF_DEFAULT_LOCK_TABLE_SIZE)
|
||||
RF_DBG_OPTION(mapDebug,0)
|
||||
RF_DBG_OPTION(maxNumTraces,-1)
|
||||
RF_DBG_OPTION(maxRandomSizeKB,128) /* if rf_accSizeKB==0, acc sizes are uniform in [ (1/2)..maxRandomSizeKB ] */
|
||||
RF_DBG_OPTION(maxTraceRunTimeSec,0)
|
||||
RF_DBG_OPTION(memAmtDebug,0) /* trace amount of memory allocated */
|
||||
RF_DBG_OPTION(memChunkDebug,0)
|
||||
RF_DBG_OPTION(memDebug,0)
|
||||
RF_DBG_OPTION(memDebugAddress,0)
|
||||
RF_DBG_OPTION(numBufsToAccumulate,1) /* number of buffers to accumulate before doing XOR */
|
||||
RF_DBG_OPTION(prReconSched,0)
|
||||
RF_DBG_OPTION(printDAGsDebug,0)
|
||||
RF_DBG_OPTION(printStatesDebug,0)
|
||||
RF_DBG_OPTION(protectedSectors,64L) /* # of sectors at start of disk to
|
||||
exclude from RAID address space */
|
||||
RF_DBG_OPTION(pssDebug,0)
|
||||
RF_DBG_OPTION(queueDebug,0)
|
||||
RF_DBG_OPTION(quiesceDebug,0)
|
||||
RF_DBG_OPTION(raidSectorOffset,0) /* added to all incoming sectors to
|
||||
debug alignment problems */
|
||||
RF_DBG_OPTION(reconDebug,0)
|
||||
RF_DBG_OPTION(reconbufferDebug,0)
|
||||
RF_DBG_OPTION(rewriteParityStripes,0) /* debug flag that causes parity rewrite at startup */
|
||||
RF_DBG_OPTION(scanDebug,0) /* debug SCAN sorting */
|
||||
RF_DBG_OPTION(showXorCallCounts,0) /* show n-way Xor call counts */
|
||||
RF_DBG_OPTION(shutdownDebug,0) /* show shutdown calls */
|
||||
RF_DBG_OPTION(sizePercentage,100)
|
||||
RF_DBG_OPTION(sstfDebug,0) /* turn on debugging info for sstf queueing */
|
||||
RF_DBG_OPTION(stripeLockDebug,0)
|
||||
RF_DBG_OPTION(suppressLocksAndLargeWrites,0)
|
||||
RF_DBG_OPTION(suppressTraceDelays,0)
|
||||
RF_DBG_OPTION(testDebug,0)
|
||||
RF_DBG_OPTION(useMemChunks,1)
|
||||
RF_DBG_OPTION(validateDAGDebug,0)
|
||||
RF_DBG_OPTION(validateVisitedDebug,1) /* XXX turn to zero by default? */
|
||||
RF_DBG_OPTION(verifyParityDebug,0)
|
||||
RF_DBG_OPTION(warnLongIOs,0)
|
||||
|
||||
#ifdef KERNEL
|
||||
RF_DBG_OPTION(debugKernelAccess,0) /* DoAccessKernel debugging */
|
||||
#endif /* KERNEL */
|
||||
|
||||
#ifndef KERNEL
|
||||
RF_DBG_OPTION(disableParityVerify,0) /* supress verification of parity */
|
||||
RF_DBG_OPTION(interactiveScript,0) /* set as a debug option for now */
|
||||
RF_DBG_OPTION(looptestShowWrites,0) /* user-level loop test write debugging */
|
||||
RF_DBG_OPTION(traceDebug,0)
|
||||
#endif /* !KERNEL */
|
||||
|
||||
#ifdef SIMULATE
|
||||
RF_DBG_OPTION(addrSizePercentage,100)
|
||||
RF_DBG_OPTION(diskTrace,0) /* ised to turn the timing traces on and of */
|
||||
RF_DBG_OPTION(eventDebug,0)
|
||||
RF_DBG_OPTION(mWactive,1500)
|
||||
RF_DBG_OPTION(mWidle,625)
|
||||
RF_DBG_OPTION(mWsleep,15)
|
||||
RF_DBG_OPTION(mWspinup,3500)
|
||||
#endif /* SIMULATE */
|
||||
|
||||
#if RF_INCLUDE_PARITYLOGGING > 0
|
||||
RF_DBG_OPTION(forceParityLogReint,0)
|
||||
RF_DBG_OPTION(numParityRegions,0) /* number of regions in the array */
|
||||
RF_DBG_OPTION(numReintegrationThreads,1)
|
||||
RF_DBG_OPTION(parityLogDebug,0) /* if nonzero, enables debugging of parity logging */
|
||||
RF_DBG_OPTION(totalInCoreLogCapacity,1024*1024) /* target bytes available for in-core logs */
|
||||
#endif /* RF_INCLUDE_PARITYLOGGING > 0 */
|
||||
|
||||
#if DFSTRACE > 0
|
||||
RF_DBG_OPTION(DFSTraceAccesses,0)
|
||||
#endif /* DFSTRACE > 0 */
|
||||
|
||||
#if RF_DEMO > 0
|
||||
RF_DBG_OPTION(demoMeterHpos,0) /* horizontal position of meters for demo mode */
|
||||
RF_DBG_OPTION(demoMeterTag,0)
|
||||
RF_DBG_OPTION(demoMeterVpos,0) /* vertical position of meters for demo mode */
|
||||
RF_DBG_OPTION(demoMode,0)
|
||||
RF_DBG_OPTION(demoSMM,0)
|
||||
RF_DBG_OPTION(demoSuppressReconInitVerify,0) /* supress initialization & verify for recon */
|
||||
#endif /* RF_DEMO > 0 */
|
|
@ -0,0 +1,74 @@
|
|||
/* $NetBSD: rf_owner.h,v 1.1 1998/11/13 04:20:31 oster Exp $ */
|
||||
/*
|
||||
* Copyright (c) 1995 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Author: Mark Holland
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and
|
||||
* its documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
||||
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*/
|
||||
|
||||
/* :
|
||||
* Log: rf_owner.h,v
|
||||
* Revision 1.8 1996/08/20 14:36:51 jimz
|
||||
* add bufLen to RF_EventCreate_t to be able to include buffer length
|
||||
* when freeing buffer
|
||||
*
|
||||
* Revision 1.7 1996/06/02 17:31:48 jimz
|
||||
* Moved a lot of global stuff into array structure, where it belongs.
|
||||
* Fixed up paritylogging, pss modules in this manner. Some general
|
||||
* code cleanup. Removed lots of dead code, some dead files.
|
||||
*
|
||||
* Revision 1.6 1996/05/24 22:17:04 jimz
|
||||
* continue code + namespace cleanup
|
||||
* typed a bunch of flags
|
||||
*
|
||||
* Revision 1.5 1996/05/23 21:46:35 jimz
|
||||
* checkpoint in code cleanup (release prep)
|
||||
* lots of types, function names have been fixed
|
||||
*
|
||||
* Revision 1.4 1995/12/01 19:44:30 root
|
||||
* added copyright info
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _RF__RF_OWNER_H_
|
||||
#define _RF__RF_OWNER_H_
|
||||
|
||||
#include "rf_types.h"
|
||||
|
||||
struct RF_OwnerInfo_s {
|
||||
RF_RaidAccessDesc_t *desc;
|
||||
int owner;
|
||||
double last_start;
|
||||
int done;
|
||||
int notFirst;
|
||||
};
|
||||
|
||||
struct RF_EventCreate_s {
|
||||
RF_Raid_t *raidPtr;
|
||||
RF_Script_t *script;
|
||||
RF_OwnerInfo_t *ownerInfo;
|
||||
char *bufPtr;
|
||||
int bufLen;
|
||||
};
|
||||
|
||||
#endif /* !_RF__RF_OWNER_H_ */
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,224 @@
|
|||
/* $NetBSD: rf_paritylog.h,v 1.1 1998/11/13 04:20:31 oster Exp $ */
|
||||
/*
|
||||
* Copyright (c) 1995 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Author: William V. Courtright II
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and
|
||||
* its documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
||||
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*/
|
||||
|
||||
/* header file for parity log
|
||||
*
|
||||
* :
|
||||
* Log: rf_paritylog.h,v
|
||||
* Revision 1.21 1996/07/17 21:00:58 jimz
|
||||
* clean up timer interface, tracing
|
||||
*
|
||||
* Revision 1.20 1996/07/15 17:22:18 jimz
|
||||
* nit-pick code cleanup
|
||||
* resolve stdlib problems on DEC OSF
|
||||
*
|
||||
* Revision 1.19 1996/06/11 10:17:57 jimz
|
||||
* definitions and run state for parity logging thread
|
||||
*
|
||||
* Revision 1.18 1996/06/07 21:33:04 jimz
|
||||
* begin using consistent types for sector numbers,
|
||||
* stripe numbers, row+col numbers, recon unit numbers
|
||||
*
|
||||
* Revision 1.17 1996/06/05 18:06:02 jimz
|
||||
* Major code cleanup. The Great Renaming is now done.
|
||||
* Better modularity. Better typing. Fixed a bunch of
|
||||
* synchronization bugs. Made a lot of global stuff
|
||||
* per-desc or per-array. Removed dead code.
|
||||
*
|
||||
* Revision 1.16 1996/06/02 17:31:48 jimz
|
||||
* Moved a lot of global stuff into array structure, where it belongs.
|
||||
* Fixed up paritylogging, pss modules in this manner. Some general
|
||||
* code cleanup. Removed lots of dead code, some dead files.
|
||||
*
|
||||
* Revision 1.15 1996/05/31 22:26:54 jimz
|
||||
* fix a lot of mapping problems, memory allocation problems
|
||||
* found some weird lock issues, fixed 'em
|
||||
* more code cleanup
|
||||
*
|
||||
* Revision 1.14 1996/05/30 11:29:41 jimz
|
||||
* Numerous bug fixes. Stripe lock release code disagreed with the taking code
|
||||
* about when stripes should be locked (I made it consistent: no parity, no lock)
|
||||
* There was a lot of extra serialization of I/Os which I've removed- a lot of
|
||||
* it was to calculate values for the cache code, which is no longer with us.
|
||||
* More types, function, macro cleanup. Added code to properly quiesce the array
|
||||
* on shutdown. Made a lot of stuff array-specific which was (bogusly) general
|
||||
* before. Fixed memory allocation, freeing bugs.
|
||||
*
|
||||
* Revision 1.13 1996/05/23 00:33:23 jimz
|
||||
* code cleanup: move all debug decls to rf_options.c, all extern
|
||||
* debug decls to rf_options.h, all debug vars preceded by rf_
|
||||
*
|
||||
* Revision 1.12 1996/05/18 19:51:34 jimz
|
||||
* major code cleanup- fix syntax, make some types consistent,
|
||||
* add prototypes, clean out dead code, et cetera
|
||||
*
|
||||
* Revision 1.11 1995/12/06 20:54:58 wvcii
|
||||
* added prototyping
|
||||
*
|
||||
* Revision 1.10 1995/11/30 16:05:50 wvcii
|
||||
* added copyright info
|
||||
*
|
||||
* Revision 1.9 1995/10/07 05:09:27 wvcii
|
||||
* removed #define BYTESPERSECTOR 512
|
||||
*
|
||||
* Revision 1.8 1995/09/06 19:27:52 wvcii
|
||||
* added startTime to commonLogData
|
||||
*
|
||||
* Revision 1.7 1995/07/07 00:13:42 wvcii
|
||||
* this version free from deadlock, fails parity verification
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _RF__RF_PARITYLOG_H_
|
||||
#define _RF__RF_PARITYLOG_H_
|
||||
|
||||
#include "rf_types.h"
|
||||
|
||||
#define RF_DEFAULT_NUM_SECTORS_PER_LOG 64
|
||||
|
||||
typedef int RF_RegionId_t;
|
||||
|
||||
typedef enum RF_ParityRecordType_e {
|
||||
RF_STOP,
|
||||
RF_UPDATE,
|
||||
RF_OVERWRITE
|
||||
} RF_ParityRecordType_t;
|
||||
|
||||
struct RF_CommonLogData_s {
|
||||
RF_DECLARE_MUTEX(mutex) /* protects cnt */
|
||||
int cnt; /* when 0, time to call wakeFunc */
|
||||
RF_Raid_t *raidPtr;
|
||||
/* int (*wakeFunc)(struct buf *); */
|
||||
int (*wakeFunc)(RF_DagNode_t *node, int status);
|
||||
void *wakeArg;
|
||||
RF_AccTraceEntry_t *tracerec;
|
||||
RF_Etimer_t startTime;
|
||||
caddr_t bufPtr;
|
||||
RF_ParityRecordType_t operation;
|
||||
RF_CommonLogData_t *next;
|
||||
};
|
||||
|
||||
struct RF_ParityLogData_s {
|
||||
RF_RegionId_t regionID; /* this struct guaranteed to span a single region */
|
||||
int bufOffset; /* offset from common->bufPtr */
|
||||
RF_PhysDiskAddr_t diskAddress;
|
||||
RF_CommonLogData_t *common; /* info shared by one or more parityLogData structs */
|
||||
RF_ParityLogData_t *next;
|
||||
RF_ParityLogData_t *prev;
|
||||
};
|
||||
|
||||
struct RF_ParityLogAppendQueue_s {
|
||||
RF_DECLARE_MUTEX(mutex)
|
||||
};
|
||||
|
||||
struct RF_ParityLogRecord_s {
|
||||
RF_PhysDiskAddr_t parityAddr;
|
||||
RF_ParityRecordType_t operation;
|
||||
};
|
||||
|
||||
struct RF_ParityLog_s {
|
||||
RF_RegionId_t regionID;
|
||||
int numRecords;
|
||||
int diskOffset;
|
||||
RF_ParityLogRecord_t *records;
|
||||
caddr_t bufPtr;
|
||||
RF_ParityLog_t *next;
|
||||
};
|
||||
|
||||
struct RF_ParityLogQueue_s {
|
||||
RF_DECLARE_MUTEX(mutex)
|
||||
RF_ParityLog_t *parityLogs;
|
||||
};
|
||||
|
||||
struct RF_RegionBufferQueue_s {
|
||||
RF_DECLARE_MUTEX(mutex)
|
||||
RF_DECLARE_COND(cond)
|
||||
int bufferSize;
|
||||
int totalBuffers; /* size of array 'buffers' */
|
||||
int availableBuffers; /* num available 'buffers' */
|
||||
int emptyBuffersIndex; /* stick next freed buffer here */
|
||||
int availBuffersIndex; /* grab next buffer from here */
|
||||
caddr_t *buffers; /* array buffers used to hold parity */
|
||||
};
|
||||
|
||||
#define RF_PLOG_CREATED (1<<0) /* thread is created */
|
||||
#define RF_PLOG_RUNNING (1<<1) /* thread is running */
|
||||
#define RF_PLOG_TERMINATE (1<<2) /* thread is terminated (should exit) */
|
||||
#define RF_PLOG_SHUTDOWN (1<<3) /* thread is aware and exiting/exited */
|
||||
|
||||
struct RF_ParityLogDiskQueue_s {
|
||||
RF_DECLARE_MUTEX(mutex) /* protects all vars in this struct */
|
||||
RF_DECLARE_COND(cond)
|
||||
int threadState; /* is thread running, should it shutdown (see above) */
|
||||
RF_ParityLog_t *flushQueue; /* list of parity logs to be flushed to log disk */
|
||||
RF_ParityLog_t *reintQueue; /* list of parity logs waiting to be reintegrated */
|
||||
RF_ParityLogData_t *bufHead; /* head of FIFO list of log data, waiting on a buffer */
|
||||
RF_ParityLogData_t *bufTail; /* tail of FIFO list of log data, waiting on a buffer */
|
||||
RF_ParityLogData_t *reintHead; /* head of FIFO list of log data, waiting on reintegration */
|
||||
RF_ParityLogData_t *reintTail; /* tail of FIFO list of log data, waiting on reintegration */
|
||||
RF_ParityLogData_t *logBlockHead; /* queue of work, blocked until a log is available */
|
||||
RF_ParityLogData_t *logBlockTail;
|
||||
RF_ParityLogData_t *reintBlockHead; /* queue of work, blocked until reintegration is complete */
|
||||
RF_ParityLogData_t *reintBlockTail;
|
||||
RF_CommonLogData_t *freeCommonList; /* list of unused common data structs */
|
||||
RF_ParityLogData_t *freeDataList; /* list of unused log data structs */
|
||||
};
|
||||
|
||||
struct RF_DiskMap_s {
|
||||
RF_PhysDiskAddr_t parityAddr;
|
||||
RF_ParityRecordType_t operation;
|
||||
};
|
||||
|
||||
struct RF_RegionInfo_s {
|
||||
RF_DECLARE_MUTEX(mutex) /* protects: diskCount, diskMap, loggingEnabled, coreLog */
|
||||
RF_DECLARE_MUTEX(reintMutex) /* protects: reintInProgress */
|
||||
int reintInProgress; /* flag used to suspend flushing operations */
|
||||
RF_SectorCount_t capacity; /* capacity of this region in sectors */
|
||||
RF_SectorNum_t regionStartAddr; /* starting disk address for this region */
|
||||
RF_SectorNum_t parityStartAddr; /* starting disk address for this region */
|
||||
RF_SectorCount_t numSectorsParity; /* number of parity sectors protected by this region */
|
||||
RF_SectorCount_t diskCount; /* num of sectors written to this region's disk log */
|
||||
RF_DiskMap_t *diskMap; /* in-core map of what's in this region's disk log */
|
||||
int loggingEnabled; /* logging enable for this region */
|
||||
RF_ParityLog_t *coreLog; /* in-core log for this region */
|
||||
};
|
||||
|
||||
RF_ParityLogData_t *rf_CreateParityLogData(RF_ParityRecordType_t operation,
|
||||
RF_PhysDiskAddr_t *pda, caddr_t bufPtr, RF_Raid_t *raidPtr,
|
||||
int (*wakeFunc)(RF_DagNode_t *node, int status),
|
||||
void *wakeArg, RF_AccTraceEntry_t *tracerec,
|
||||
RF_Etimer_t startTime);
|
||||
RF_ParityLogData_t *rf_SearchAndDequeueParityLogData(RF_Raid_t *raidPtr,
|
||||
RF_RegionId_t regionID, RF_ParityLogData_t **head,
|
||||
RF_ParityLogData_t **tail, int ignoreLocks);
|
||||
void rf_ReleaseParityLogs(RF_Raid_t *raidPtr, RF_ParityLog_t *firstLog);
|
||||
int rf_ParityLogAppend(RF_ParityLogData_t *logData, int finish,
|
||||
RF_ParityLog_t **incomingLog, int clearReintFlag);
|
||||
void rf_EnableParityLogging(RF_Raid_t *raidPtr);
|
||||
|
||||
#endif /* !_RF__RF_PARITYLOG_H_ */
|
|
@ -0,0 +1,789 @@
|
|||
/* $NetBSD: rf_paritylogDiskMgr.c,v 1.1 1998/11/13 04:20:31 oster Exp $ */
|
||||
/*
|
||||
* Copyright (c) 1995 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Author: William V. Courtright II
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and
|
||||
* its documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
||||
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*/
|
||||
/* Code for flushing and reintegration operations related to parity logging.
|
||||
*
|
||||
* :
|
||||
* Log: rf_paritylogDiskMgr.c,v
|
||||
* Revision 1.25 1996/07/28 20:31:39 jimz
|
||||
* i386netbsd port
|
||||
* true/false fixup
|
||||
*
|
||||
* Revision 1.24 1996/07/27 23:36:08 jimz
|
||||
* Solaris port of simulator
|
||||
*
|
||||
* Revision 1.23 1996/07/22 19:52:16 jimz
|
||||
* switched node params to RF_DagParam_t, a union of
|
||||
* a 64-bit int and a void *, for better portability
|
||||
* attempted hpux port, but failed partway through for
|
||||
* lack of a single C compiler capable of compiling all
|
||||
* source files
|
||||
*
|
||||
* Revision 1.22 1996/06/11 10:17:33 jimz
|
||||
* Put in thread startup/shutdown mechanism for proper synchronization
|
||||
* with start and end of day routines.
|
||||
*
|
||||
* Revision 1.21 1996/06/09 02:36:46 jimz
|
||||
* lots of little crufty cleanup- fixup whitespace
|
||||
* issues, comment #ifdefs, improve typing in some
|
||||
* places (esp size-related)
|
||||
*
|
||||
* Revision 1.20 1996/06/07 21:33:04 jimz
|
||||
* begin using consistent types for sector numbers,
|
||||
* stripe numbers, row+col numbers, recon unit numbers
|
||||
*
|
||||
* Revision 1.19 1996/06/05 18:06:02 jimz
|
||||
* Major code cleanup. The Great Renaming is now done.
|
||||
* Better modularity. Better typing. Fixed a bunch of
|
||||
* synchronization bugs. Made a lot of global stuff
|
||||
* per-desc or per-array. Removed dead code.
|
||||
*
|
||||
* Revision 1.18 1996/06/02 17:31:48 jimz
|
||||
* Moved a lot of global stuff into array structure, where it belongs.
|
||||
* Fixed up paritylogging, pss modules in this manner. Some general
|
||||
* code cleanup. Removed lots of dead code, some dead files.
|
||||
*
|
||||
* Revision 1.17 1996/05/31 22:26:54 jimz
|
||||
* fix a lot of mapping problems, memory allocation problems
|
||||
* found some weird lock issues, fixed 'em
|
||||
* more code cleanup
|
||||
*
|
||||
* Revision 1.16 1996/05/30 23:22:16 jimz
|
||||
* bugfixes of serialization, timing problems
|
||||
* more cleanup
|
||||
*
|
||||
* Revision 1.15 1996/05/30 12:59:18 jimz
|
||||
* make etimer happier, more portable
|
||||
*
|
||||
* Revision 1.14 1996/05/30 11:29:41 jimz
|
||||
* Numerous bug fixes. Stripe lock release code disagreed with the taking code
|
||||
* about when stripes should be locked (I made it consistent: no parity, no lock)
|
||||
* There was a lot of extra serialization of I/Os which I've removed- a lot of
|
||||
* it was to calculate values for the cache code, which is no longer with us.
|
||||
* More types, function, macro cleanup. Added code to properly quiesce the array
|
||||
* on shutdown. Made a lot of stuff array-specific which was (bogusly) general
|
||||
* before. Fixed memory allocation, freeing bugs.
|
||||
*
|
||||
* Revision 1.13 1996/05/27 18:56:37 jimz
|
||||
* more code cleanup
|
||||
* better typing
|
||||
* compiles in all 3 environments
|
||||
*
|
||||
* Revision 1.12 1996/05/24 22:17:04 jimz
|
||||
* continue code + namespace cleanup
|
||||
* typed a bunch of flags
|
||||
*
|
||||
* Revision 1.11 1996/05/24 04:28:55 jimz
|
||||
* release cleanup ckpt
|
||||
*
|
||||
* Revision 1.10 1996/05/23 21:46:35 jimz
|
||||
* checkpoint in code cleanup (release prep)
|
||||
* lots of types, function names have been fixed
|
||||
*
|
||||
* Revision 1.9 1996/05/23 00:33:23 jimz
|
||||
* code cleanup: move all debug decls to rf_options.c, all extern
|
||||
* debug decls to rf_options.h, all debug vars preceded by rf_
|
||||
*
|
||||
* Revision 1.8 1996/05/18 19:51:34 jimz
|
||||
* major code cleanup- fix syntax, make some types consistent,
|
||||
* add prototypes, clean out dead code, et cetera
|
||||
*
|
||||
* Revision 1.7 1995/12/12 18:10:06 jimz
|
||||
* MIN -> RF_MIN, MAX -> RF_MAX, ASSERT -> RF_ASSERT
|
||||
* fix 80-column brain damage in comments
|
||||
*
|
||||
* Revision 1.6 1995/12/06 20:58:27 wvcii
|
||||
* added prototypes
|
||||
*
|
||||
* Revision 1.5 1995/11/30 16:06:05 wvcii
|
||||
* added copyright info
|
||||
*
|
||||
* Revision 1.4 1995/10/09 22:41:10 wvcii
|
||||
* minor bug fix
|
||||
*
|
||||
* Revision 1.3 1995/10/08 20:43:47 wvcii
|
||||
* lots of random debugging - debugging still incomplete
|
||||
*
|
||||
* Revision 1.2 1995/09/07 15:52:19 jimz
|
||||
* noop compile when INCLUDE_PARITYLOGGING not defined
|
||||
*
|
||||
* Revision 1.1 1995/09/06 19:24:44 wvcii
|
||||
* Initial revision
|
||||
*
|
||||
*/
|
||||
|
||||
#include "rf_archs.h"
|
||||
|
||||
#if RF_INCLUDE_PARITYLOGGING > 0
|
||||
|
||||
#include "rf_types.h"
|
||||
#include "rf_threadstuff.h"
|
||||
#include "rf_mcpair.h"
|
||||
#include "rf_raid.h"
|
||||
#include "rf_dag.h"
|
||||
#include "rf_dagfuncs.h"
|
||||
#include "rf_desc.h"
|
||||
#include "rf_layout.h"
|
||||
#include "rf_diskqueue.h"
|
||||
#include "rf_paritylog.h"
|
||||
#include "rf_general.h"
|
||||
#include "rf_threadid.h"
|
||||
#include "rf_etimer.h"
|
||||
#include "rf_paritylogging.h"
|
||||
#include "rf_engine.h"
|
||||
#include "rf_dagutils.h"
|
||||
#include "rf_map.h"
|
||||
#include "rf_parityscan.h"
|
||||
#include "rf_sys.h"
|
||||
|
||||
#include "rf_paritylogDiskMgr.h"
|
||||
|
||||
static caddr_t AcquireReintBuffer(RF_RegionBufferQueue_t *);
|
||||
|
||||
static caddr_t AcquireReintBuffer(pool)
|
||||
RF_RegionBufferQueue_t *pool;
|
||||
{
|
||||
caddr_t bufPtr = NULL;
|
||||
|
||||
/* Return a region buffer from the free list (pool).
|
||||
If the free list is empty, WAIT.
|
||||
BLOCKING */
|
||||
|
||||
RF_LOCK_MUTEX(pool->mutex);
|
||||
if (pool->availableBuffers > 0) {
|
||||
bufPtr = pool->buffers[pool->availBuffersIndex];
|
||||
pool->availableBuffers--;
|
||||
pool->availBuffersIndex++;
|
||||
if (pool->availBuffersIndex == pool->totalBuffers)
|
||||
pool->availBuffersIndex = 0;
|
||||
RF_UNLOCK_MUTEX(pool->mutex);
|
||||
}
|
||||
else {
|
||||
RF_PANIC(); /* should never happen in currect config, single reint */
|
||||
RF_WAIT_COND(pool->cond, pool->mutex);
|
||||
}
|
||||
return(bufPtr);
|
||||
}
|
||||
|
||||
static void ReleaseReintBuffer(
|
||||
RF_RegionBufferQueue_t *pool,
|
||||
caddr_t bufPtr)
|
||||
{
|
||||
/* Insert a region buffer (bufPtr) into the free list (pool).
|
||||
NON-BLOCKING */
|
||||
|
||||
RF_LOCK_MUTEX(pool->mutex);
|
||||
pool->availableBuffers++;
|
||||
pool->buffers[pool->emptyBuffersIndex] = bufPtr;
|
||||
pool->emptyBuffersIndex++;
|
||||
if (pool->emptyBuffersIndex == pool->totalBuffers)
|
||||
pool->emptyBuffersIndex = 0;
|
||||
RF_ASSERT(pool->availableBuffers <= pool->totalBuffers);
|
||||
RF_UNLOCK_MUTEX(pool->mutex);
|
||||
RF_SIGNAL_COND(pool->cond);
|
||||
}
|
||||
|
||||
|
||||
|
||||
static void ReadRegionLog(
|
||||
RF_RegionId_t regionID,
|
||||
RF_MCPair_t *rrd_mcpair,
|
||||
caddr_t regionBuffer,
|
||||
RF_Raid_t *raidPtr,
|
||||
RF_DagHeader_t **rrd_dag_h,
|
||||
RF_AllocListElem_t **rrd_alloclist,
|
||||
RF_PhysDiskAddr_t **rrd_pda)
|
||||
{
|
||||
/* Initiate the read a region log from disk. Once initiated, return
|
||||
to the calling routine.
|
||||
|
||||
NON-BLOCKING
|
||||
*/
|
||||
|
||||
RF_AccTraceEntry_t tracerec;
|
||||
RF_DagNode_t *rrd_rdNode;
|
||||
|
||||
/* create DAG to read region log from disk */
|
||||
rf_MakeAllocList(*rrd_alloclist);
|
||||
*rrd_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, regionBuffer, rf_DiskReadFunc, rf_DiskReadUndoFunc,
|
||||
"Rrl", *rrd_alloclist, RF_DAG_FLAGS_NONE, RF_IO_NORMAL_PRIORITY);
|
||||
|
||||
/* create and initialize PDA for the core log */
|
||||
/* RF_Malloc(*rrd_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *)); */
|
||||
*rrd_pda = rf_AllocPDAList(1);
|
||||
rf_MapLogParityLogging(raidPtr, regionID, 0, &((*rrd_pda)->row), &((*rrd_pda)->col), &((*rrd_pda)->startSector));
|
||||
(*rrd_pda)->numSector = raidPtr->regionInfo[regionID].capacity;
|
||||
|
||||
if ((*rrd_pda)->next) {
|
||||
(*rrd_pda)->next = NULL;
|
||||
printf("set rrd_pda->next to NULL\n");
|
||||
}
|
||||
|
||||
/* initialize DAG parameters */
|
||||
bzero((char *)&tracerec,sizeof(tracerec));
|
||||
(*rrd_dag_h)->tracerec = &tracerec;
|
||||
rrd_rdNode = (*rrd_dag_h)->succedents[0]->succedents[0];
|
||||
rrd_rdNode->params[0].p = *rrd_pda;
|
||||
/* rrd_rdNode->params[1] = regionBuffer; */
|
||||
rrd_rdNode->params[2].v = 0;
|
||||
rrd_rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0);
|
||||
|
||||
/* launch region log read dag */
|
||||
rf_DispatchDAG(*rrd_dag_h, (void (*)(void *))rf_MCPairWakeupFunc,
|
||||
(void *) rrd_mcpair);
|
||||
}
|
||||
|
||||
|
||||
|
||||
static void WriteCoreLog(
|
||||
RF_ParityLog_t *log,
|
||||
RF_MCPair_t *fwr_mcpair,
|
||||
RF_Raid_t *raidPtr,
|
||||
RF_DagHeader_t **fwr_dag_h,
|
||||
RF_AllocListElem_t **fwr_alloclist,
|
||||
RF_PhysDiskAddr_t **fwr_pda)
|
||||
{
|
||||
RF_RegionId_t regionID = log->regionID;
|
||||
RF_AccTraceEntry_t tracerec;
|
||||
RF_SectorNum_t regionOffset;
|
||||
RF_DagNode_t *fwr_wrNode;
|
||||
|
||||
/* Initiate the write of a core log to a region log disk.
|
||||
Once initiated, return to the calling routine.
|
||||
|
||||
NON-BLOCKING
|
||||
*/
|
||||
|
||||
/* create DAG to write a core log to a region log disk */
|
||||
rf_MakeAllocList(*fwr_alloclist);
|
||||
*fwr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, log->bufPtr, rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
|
||||
"Wcl", *fwr_alloclist, RF_DAG_FLAGS_NONE, RF_IO_NORMAL_PRIORITY);
|
||||
|
||||
/* create and initialize PDA for the region log */
|
||||
/* RF_Malloc(*fwr_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *)); */
|
||||
*fwr_pda = rf_AllocPDAList(1);
|
||||
regionOffset = log->diskOffset;
|
||||
rf_MapLogParityLogging(raidPtr, regionID, regionOffset, &((*fwr_pda)->row), &((*fwr_pda)->col), &((*fwr_pda)->startSector));
|
||||
(*fwr_pda)->numSector = raidPtr->numSectorsPerLog;
|
||||
|
||||
/* initialize DAG parameters */
|
||||
bzero((char *)&tracerec,sizeof(tracerec));
|
||||
(*fwr_dag_h)->tracerec = &tracerec;
|
||||
fwr_wrNode = (*fwr_dag_h)->succedents[0]->succedents[0];
|
||||
fwr_wrNode->params[0].p = *fwr_pda;
|
||||
/* fwr_wrNode->params[1] = log->bufPtr; */
|
||||
fwr_wrNode->params[2].v = 0;
|
||||
fwr_wrNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0);
|
||||
|
||||
/* launch the dag to write the core log to disk */
|
||||
rf_DispatchDAG(*fwr_dag_h, (void (*)(void *)) rf_MCPairWakeupFunc,
|
||||
(void *) fwr_mcpair);
|
||||
}
|
||||
|
||||
|
||||
static void ReadRegionParity(
|
||||
RF_RegionId_t regionID,
|
||||
RF_MCPair_t *prd_mcpair,
|
||||
caddr_t parityBuffer,
|
||||
RF_Raid_t *raidPtr,
|
||||
RF_DagHeader_t **prd_dag_h,
|
||||
RF_AllocListElem_t **prd_alloclist,
|
||||
RF_PhysDiskAddr_t **prd_pda)
|
||||
{
|
||||
/* Initiate the read region parity from disk.
|
||||
Once initiated, return to the calling routine.
|
||||
|
||||
NON-BLOCKING
|
||||
*/
|
||||
|
||||
RF_AccTraceEntry_t tracerec;
|
||||
RF_DagNode_t *prd_rdNode;
|
||||
|
||||
/* create DAG to read region parity from disk */
|
||||
rf_MakeAllocList(*prd_alloclist);
|
||||
*prd_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, NULL, rf_DiskReadFunc, rf_DiskReadUndoFunc,
|
||||
"Rrp", *prd_alloclist, RF_DAG_FLAGS_NONE, RF_IO_NORMAL_PRIORITY);
|
||||
|
||||
/* create and initialize PDA for region parity */
|
||||
/* RF_Malloc(*prd_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *)); */
|
||||
*prd_pda = rf_AllocPDAList(1);
|
||||
rf_MapRegionParity(raidPtr, regionID, &((*prd_pda)->row), &((*prd_pda)->col), &((*prd_pda)->startSector), &((*prd_pda)->numSector));
|
||||
if (rf_parityLogDebug)
|
||||
printf("[reading %d sectors of parity from region %d]\n",
|
||||
(int)(*prd_pda)->numSector, regionID);
|
||||
if ((*prd_pda)->next) {
|
||||
(*prd_pda)->next = NULL;
|
||||
printf("set prd_pda->next to NULL\n");
|
||||
}
|
||||
|
||||
/* initialize DAG parameters */
|
||||
bzero((char *)&tracerec,sizeof(tracerec));
|
||||
(*prd_dag_h)->tracerec = &tracerec;
|
||||
prd_rdNode = (*prd_dag_h)->succedents[0]->succedents[0];
|
||||
prd_rdNode->params[0].p = *prd_pda;
|
||||
prd_rdNode->params[1].p = parityBuffer;
|
||||
prd_rdNode->params[2].v = 0;
|
||||
prd_rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0);
|
||||
if (rf_validateDAGDebug)
|
||||
rf_ValidateDAG(*prd_dag_h);
|
||||
/* launch region parity read dag */
|
||||
rf_DispatchDAG(*prd_dag_h, (void (*)(void *)) rf_MCPairWakeupFunc,
|
||||
(void *) prd_mcpair);
|
||||
}
|
||||
|
||||
static void WriteRegionParity(
|
||||
RF_RegionId_t regionID,
|
||||
RF_MCPair_t *pwr_mcpair,
|
||||
caddr_t parityBuffer,
|
||||
RF_Raid_t *raidPtr,
|
||||
RF_DagHeader_t **pwr_dag_h,
|
||||
RF_AllocListElem_t **pwr_alloclist,
|
||||
RF_PhysDiskAddr_t **pwr_pda)
|
||||
{
|
||||
/* Initiate the write of region parity to disk.
|
||||
Once initiated, return to the calling routine.
|
||||
|
||||
NON-BLOCKING
|
||||
*/
|
||||
|
||||
RF_AccTraceEntry_t tracerec;
|
||||
RF_DagNode_t *pwr_wrNode;
|
||||
|
||||
/* create DAG to write region log from disk */
|
||||
rf_MakeAllocList(*pwr_alloclist);
|
||||
*pwr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, parityBuffer, rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
|
||||
"Wrp", *pwr_alloclist, RF_DAG_FLAGS_NONE, RF_IO_NORMAL_PRIORITY);
|
||||
|
||||
/* create and initialize PDA for region parity */
|
||||
/* RF_Malloc(*pwr_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *)); */
|
||||
*pwr_pda = rf_AllocPDAList(1);
|
||||
rf_MapRegionParity(raidPtr, regionID, &((*pwr_pda)->row), &((*pwr_pda)->col), &((*pwr_pda)->startSector), &((*pwr_pda)->numSector));
|
||||
|
||||
/* initialize DAG parameters */
|
||||
bzero((char *)&tracerec,sizeof(tracerec));
|
||||
(*pwr_dag_h)->tracerec = &tracerec;
|
||||
pwr_wrNode = (*pwr_dag_h)->succedents[0]->succedents[0];
|
||||
pwr_wrNode->params[0].p = *pwr_pda;
|
||||
/* pwr_wrNode->params[1] = parityBuffer; */
|
||||
pwr_wrNode->params[2].v = 0;
|
||||
pwr_wrNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0);
|
||||
|
||||
/* launch the dag to write region parity to disk */
|
||||
rf_DispatchDAG(*pwr_dag_h, (void (*)(void *))rf_MCPairWakeupFunc,
|
||||
(void *) pwr_mcpair);
|
||||
}
|
||||
|
||||
static void FlushLogsToDisk(
|
||||
RF_Raid_t *raidPtr,
|
||||
RF_ParityLog_t *logList)
|
||||
{
|
||||
/* Flush a linked list of core logs to the log disk.
|
||||
Logs contain the disk location where they should be
|
||||
written. Logs were written in FIFO order and that
|
||||
order must be preserved.
|
||||
|
||||
Recommended optimizations:
|
||||
1) allow multiple flushes to occur simultaneously
|
||||
2) coalesce contiguous flush operations
|
||||
|
||||
BLOCKING
|
||||
*/
|
||||
|
||||
RF_ParityLog_t *log;
|
||||
RF_RegionId_t regionID;
|
||||
RF_MCPair_t *fwr_mcpair;
|
||||
RF_DagHeader_t *fwr_dag_h;
|
||||
RF_AllocListElem_t *fwr_alloclist;
|
||||
RF_PhysDiskAddr_t *fwr_pda;
|
||||
|
||||
fwr_mcpair = rf_AllocMCPair();
|
||||
RF_LOCK_MUTEX(fwr_mcpair->mutex);
|
||||
|
||||
RF_ASSERT(logList);
|
||||
log = logList;
|
||||
while (log)
|
||||
{
|
||||
regionID = log->regionID;
|
||||
|
||||
/* create and launch a DAG to write the core log */
|
||||
if (rf_parityLogDebug)
|
||||
printf("[initiating write of core log for region %d]\n", regionID);
|
||||
fwr_mcpair->flag = RF_FALSE;
|
||||
WriteCoreLog(log, fwr_mcpair, raidPtr, &fwr_dag_h, &fwr_alloclist, &fwr_pda);
|
||||
|
||||
/* wait for the DAG to complete */
|
||||
#ifndef SIMULATE
|
||||
while (!fwr_mcpair->flag)
|
||||
RF_WAIT_COND(fwr_mcpair->cond, fwr_mcpair->mutex);
|
||||
#endif /* !SIMULATE */
|
||||
if (fwr_dag_h->status != rf_enable)
|
||||
{
|
||||
RF_ERRORMSG1("Unable to write core log to disk (region %d)\n", regionID);
|
||||
RF_ASSERT(0);
|
||||
}
|
||||
|
||||
/* RF_Free(fwr_pda, sizeof(RF_PhysDiskAddr_t)); */
|
||||
rf_FreePhysDiskAddr(fwr_pda);
|
||||
rf_FreeDAG(fwr_dag_h);
|
||||
rf_FreeAllocList(fwr_alloclist);
|
||||
|
||||
log = log->next;
|
||||
}
|
||||
RF_UNLOCK_MUTEX(fwr_mcpair->mutex);
|
||||
rf_FreeMCPair(fwr_mcpair);
|
||||
rf_ReleaseParityLogs(raidPtr, logList);
|
||||
}
|
||||
|
||||
static void ReintegrateRegion(
|
||||
RF_Raid_t *raidPtr,
|
||||
RF_RegionId_t regionID,
|
||||
RF_ParityLog_t *coreLog)
|
||||
{
|
||||
RF_MCPair_t *rrd_mcpair=NULL, *prd_mcpair, *pwr_mcpair;
|
||||
RF_DagHeader_t *rrd_dag_h, *prd_dag_h, *pwr_dag_h;
|
||||
RF_AllocListElem_t *rrd_alloclist, *prd_alloclist, *pwr_alloclist;
|
||||
RF_PhysDiskAddr_t *rrd_pda, *prd_pda, *pwr_pda;
|
||||
caddr_t parityBuffer, regionBuffer=NULL;
|
||||
|
||||
/* Reintegrate a region (regionID).
|
||||
1. acquire region and parity buffers
|
||||
2. read log from disk
|
||||
3. read parity from disk
|
||||
4. apply log to parity
|
||||
5. apply core log to parity
|
||||
6. write new parity to disk
|
||||
|
||||
BLOCKING
|
||||
*/
|
||||
|
||||
if (rf_parityLogDebug)
|
||||
printf("[reintegrating region %d]\n", regionID);
|
||||
|
||||
/* initiate read of region parity */
|
||||
if (rf_parityLogDebug)
|
||||
printf("[initiating read of parity for region %d]\n", regionID);
|
||||
parityBuffer = AcquireReintBuffer(&raidPtr->parityBufferPool);
|
||||
prd_mcpair = rf_AllocMCPair();
|
||||
RF_LOCK_MUTEX(prd_mcpair->mutex);
|
||||
prd_mcpair->flag = RF_FALSE;
|
||||
ReadRegionParity(regionID, prd_mcpair, parityBuffer, raidPtr, &prd_dag_h, &prd_alloclist, &prd_pda);
|
||||
|
||||
/* if region log nonempty, initiate read */
|
||||
if (raidPtr->regionInfo[regionID].diskCount > 0)
|
||||
{
|
||||
if (rf_parityLogDebug)
|
||||
printf("[initiating read of disk log for region %d]\n", regionID);
|
||||
regionBuffer = AcquireReintBuffer(&raidPtr->regionBufferPool);
|
||||
rrd_mcpair = rf_AllocMCPair();
|
||||
RF_LOCK_MUTEX(rrd_mcpair->mutex);
|
||||
rrd_mcpair->flag = RF_FALSE;
|
||||
ReadRegionLog(regionID, rrd_mcpair, regionBuffer, raidPtr, &rrd_dag_h, &rrd_alloclist, &rrd_pda);
|
||||
}
|
||||
|
||||
/* wait on read of region parity to complete */
|
||||
#ifndef SIMULATE
|
||||
while (!prd_mcpair->flag) {
|
||||
RF_WAIT_COND(prd_mcpair->cond, prd_mcpair->mutex);
|
||||
}
|
||||
#endif /* !SIMULATE */
|
||||
RF_UNLOCK_MUTEX(prd_mcpair->mutex);
|
||||
if (prd_dag_h->status != rf_enable)
|
||||
{
|
||||
RF_ERRORMSG("Unable to read parity from disk\n");
|
||||
/* add code to fail the parity disk */
|
||||
RF_ASSERT(0);
|
||||
}
|
||||
|
||||
/* apply core log to parity */
|
||||
/* if (coreLog)
|
||||
ApplyLogsToParity(coreLog, parityBuffer); */
|
||||
|
||||
if (raidPtr->regionInfo[regionID].diskCount > 0)
|
||||
{
|
||||
/* wait on read of region log to complete */
|
||||
#ifndef SIMULATE
|
||||
while (!rrd_mcpair->flag)
|
||||
RF_WAIT_COND(rrd_mcpair->cond, rrd_mcpair->mutex);
|
||||
#endif /* !SIMULATE */
|
||||
RF_UNLOCK_MUTEX(rrd_mcpair->mutex);
|
||||
if (rrd_dag_h->status != rf_enable)
|
||||
{
|
||||
RF_ERRORMSG("Unable to read region log from disk\n");
|
||||
/* add code to fail the log disk */
|
||||
RF_ASSERT(0);
|
||||
}
|
||||
/* apply region log to parity */
|
||||
/* ApplyRegionToParity(regionID, regionBuffer, parityBuffer); */
|
||||
/* release resources associated with region log */
|
||||
/* RF_Free(rrd_pda, sizeof(RF_PhysDiskAddr_t)); */
|
||||
rf_FreePhysDiskAddr(rrd_pda);
|
||||
rf_FreeDAG(rrd_dag_h);
|
||||
rf_FreeAllocList(rrd_alloclist);
|
||||
rf_FreeMCPair(rrd_mcpair);
|
||||
ReleaseReintBuffer(&raidPtr->regionBufferPool, regionBuffer);
|
||||
}
|
||||
|
||||
/* write reintegrated parity to disk */
|
||||
if (rf_parityLogDebug)
|
||||
printf("[initiating write of parity for region %d]\n", regionID);
|
||||
pwr_mcpair = rf_AllocMCPair();
|
||||
RF_LOCK_MUTEX(pwr_mcpair->mutex);
|
||||
pwr_mcpair->flag = RF_FALSE;
|
||||
WriteRegionParity(regionID, pwr_mcpair, parityBuffer, raidPtr, &pwr_dag_h, &pwr_alloclist, &pwr_pda);
|
||||
#ifndef SIMULATE
|
||||
while (!pwr_mcpair->flag)
|
||||
RF_WAIT_COND(pwr_mcpair->cond, pwr_mcpair->mutex);
|
||||
#endif /* !SIMULATE */
|
||||
RF_UNLOCK_MUTEX(pwr_mcpair->mutex);
|
||||
if (pwr_dag_h->status != rf_enable)
|
||||
{
|
||||
RF_ERRORMSG("Unable to write parity to disk\n");
|
||||
/* add code to fail the parity disk */
|
||||
RF_ASSERT(0);
|
||||
}
|
||||
|
||||
/* release resources associated with read of old parity */
|
||||
/* RF_Free(prd_pda, sizeof(RF_PhysDiskAddr_t)); */
|
||||
rf_FreePhysDiskAddr(prd_pda);
|
||||
rf_FreeDAG(prd_dag_h);
|
||||
rf_FreeAllocList(prd_alloclist);
|
||||
rf_FreeMCPair(prd_mcpair);
|
||||
|
||||
/* release resources associated with write of new parity */
|
||||
ReleaseReintBuffer(&raidPtr->parityBufferPool, parityBuffer);
|
||||
/* RF_Free(pwr_pda, sizeof(RF_PhysDiskAddr_t)); */
|
||||
rf_FreePhysDiskAddr(pwr_pda);
|
||||
rf_FreeDAG(pwr_dag_h);
|
||||
rf_FreeAllocList(pwr_alloclist);
|
||||
rf_FreeMCPair(pwr_mcpair);
|
||||
|
||||
if (rf_parityLogDebug)
|
||||
printf("[finished reintegrating region %d]\n", regionID);
|
||||
}
|
||||
|
||||
|
||||
|
||||
static void ReintegrateLogs(
|
||||
RF_Raid_t *raidPtr,
|
||||
RF_ParityLog_t *logList)
|
||||
{
|
||||
RF_ParityLog_t *log, *freeLogList = NULL;
|
||||
RF_ParityLogData_t *logData, *logDataList;
|
||||
RF_RegionId_t regionID;
|
||||
|
||||
RF_ASSERT(logList);
|
||||
while (logList)
|
||||
{
|
||||
log = logList;
|
||||
logList = logList->next;
|
||||
log->next = NULL;
|
||||
regionID = log->regionID;
|
||||
ReintegrateRegion(raidPtr, regionID, log);
|
||||
log->numRecords = 0;
|
||||
|
||||
/* remove all items which are blocked on reintegration of this region */
|
||||
RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
|
||||
logData = rf_SearchAndDequeueParityLogData(raidPtr, regionID, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail, RF_TRUE);
|
||||
logDataList = logData;
|
||||
while (logData)
|
||||
{
|
||||
logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail, RF_TRUE);
|
||||
logData = logData->next;
|
||||
}
|
||||
RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
|
||||
|
||||
/* process blocked log data and clear reintInProgress flag for this region */
|
||||
if (logDataList)
|
||||
rf_ParityLogAppend(logDataList, RF_TRUE, &log, RF_TRUE);
|
||||
else
|
||||
{
|
||||
/* Enable flushing for this region. Holding both locks provides
|
||||
a synchronization barrier with DumpParityLogToDisk
|
||||
*/
|
||||
RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
|
||||
RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
|
||||
RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
|
||||
raidPtr->regionInfo[regionID].diskCount = 0;
|
||||
raidPtr->regionInfo[regionID].reintInProgress = RF_FALSE;
|
||||
RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
|
||||
RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); /* flushing is now enabled */
|
||||
RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
|
||||
}
|
||||
/* if log wasn't used, attach it to the list of logs to be returned */
|
||||
if (log)
|
||||
{
|
||||
log->next = freeLogList;
|
||||
freeLogList = log;
|
||||
}
|
||||
}
|
||||
if (freeLogList)
|
||||
rf_ReleaseParityLogs(raidPtr, freeLogList);
|
||||
}
|
||||
|
||||
int rf_ShutdownLogging(RF_Raid_t *raidPtr)
|
||||
{
|
||||
/* shutdown parity logging
|
||||
1) disable parity logging in all regions
|
||||
2) reintegrate all regions
|
||||
*/
|
||||
|
||||
RF_SectorCount_t diskCount;
|
||||
RF_RegionId_t regionID;
|
||||
RF_ParityLog_t *log;
|
||||
|
||||
if (rf_parityLogDebug)
|
||||
printf("[shutting down parity logging]\n");
|
||||
/* Since parity log maps are volatile, we must reintegrate all regions. */
|
||||
if (rf_forceParityLogReint) {
|
||||
for (regionID = 0; regionID < rf_numParityRegions; regionID++)
|
||||
{
|
||||
RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
|
||||
raidPtr->regionInfo[regionID].loggingEnabled = RF_FALSE;
|
||||
log = raidPtr->regionInfo[regionID].coreLog;
|
||||
raidPtr->regionInfo[regionID].coreLog = NULL;
|
||||
diskCount = raidPtr->regionInfo[regionID].diskCount;
|
||||
RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
|
||||
if (diskCount > 0 || log != NULL)
|
||||
ReintegrateRegion(raidPtr, regionID, log);
|
||||
if (log != NULL)
|
||||
rf_ReleaseParityLogs(raidPtr, log);
|
||||
}
|
||||
}
|
||||
if (rf_parityLogDebug)
|
||||
{
|
||||
printf("[parity logging disabled]\n");
|
||||
printf("[should be done!]\n");
|
||||
}
|
||||
return(0);
|
||||
}
|
||||
|
||||
int rf_ParityLoggingDiskManager(RF_Raid_t *raidPtr)
|
||||
{
|
||||
RF_ParityLog_t *reintQueue, *flushQueue;
|
||||
int workNeeded, done = RF_FALSE;
|
||||
|
||||
rf_assign_threadid(); /* don't remove this line */
|
||||
|
||||
/* Main program for parity logging disk thread. This routine waits
|
||||
for work to appear in either the flush or reintegration queues
|
||||
and is responsible for flushing core logs to the log disk as
|
||||
well as reintegrating parity regions.
|
||||
|
||||
BLOCKING
|
||||
*/
|
||||
|
||||
RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
|
||||
|
||||
/*
|
||||
* Inform our creator that we're running. Don't bother doing the
|
||||
* mutex lock/unlock dance- we locked above, and we'll unlock
|
||||
* below with nothing to do, yet.
|
||||
*/
|
||||
raidPtr->parityLogDiskQueue.threadState |= RF_PLOG_RUNNING;
|
||||
RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
|
||||
|
||||
/* empty the work queues */
|
||||
flushQueue = raidPtr->parityLogDiskQueue.flushQueue; raidPtr->parityLogDiskQueue.flushQueue = NULL;
|
||||
reintQueue = raidPtr->parityLogDiskQueue.reintQueue; raidPtr->parityLogDiskQueue.reintQueue = NULL;
|
||||
workNeeded = (flushQueue || reintQueue);
|
||||
|
||||
while (!done)
|
||||
{
|
||||
while (workNeeded)
|
||||
{
|
||||
/* First, flush all logs in the flush queue, freeing buffers
|
||||
Second, reintegrate all regions which are reported as full.
|
||||
Third, append queued log data until blocked.
|
||||
|
||||
Note: Incoming appends (ParityLogAppend) can block on either
|
||||
1. empty buffer pool
|
||||
2. region under reintegration
|
||||
To preserve a global FIFO ordering of appends, buffers are not
|
||||
released to the world until those appends blocked on buffers are
|
||||
removed from the append queue. Similarly, regions which are
|
||||
reintegrated are not opened for general use until the append
|
||||
queue has been emptied.
|
||||
*/
|
||||
|
||||
RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
|
||||
|
||||
/* empty flushQueue, using free'd log buffers to process bufTail */
|
||||
if (flushQueue)
|
||||
FlushLogsToDisk(raidPtr, flushQueue);
|
||||
|
||||
/* empty reintQueue, flushing from reintTail as we go */
|
||||
if (reintQueue)
|
||||
ReintegrateLogs(raidPtr, reintQueue);
|
||||
|
||||
RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
|
||||
flushQueue = raidPtr->parityLogDiskQueue.flushQueue; raidPtr->parityLogDiskQueue.flushQueue = NULL;
|
||||
reintQueue = raidPtr->parityLogDiskQueue.reintQueue; raidPtr->parityLogDiskQueue.reintQueue = NULL;
|
||||
workNeeded = (flushQueue || reintQueue);
|
||||
}
|
||||
/* no work is needed at this point */
|
||||
if (raidPtr->parityLogDiskQueue.threadState&RF_PLOG_TERMINATE)
|
||||
{
|
||||
/* shutdown parity logging
|
||||
1. disable parity logging in all regions
|
||||
2. reintegrate all regions
|
||||
*/
|
||||
done = RF_TRUE; /* thread disabled, no work needed */
|
||||
RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
|
||||
rf_ShutdownLogging(raidPtr);
|
||||
}
|
||||
if (!done)
|
||||
{
|
||||
/* thread enabled, no work needed, so sleep */
|
||||
if (rf_parityLogDebug)
|
||||
printf("[parity logging disk manager sleeping]\n");
|
||||
RF_WAIT_COND(raidPtr->parityLogDiskQueue.cond, raidPtr->parityLogDiskQueue.mutex);
|
||||
if (rf_parityLogDebug)
|
||||
printf("[parity logging disk manager just woke up]\n");
|
||||
flushQueue = raidPtr->parityLogDiskQueue.flushQueue; raidPtr->parityLogDiskQueue.flushQueue = NULL;
|
||||
reintQueue = raidPtr->parityLogDiskQueue.reintQueue; raidPtr->parityLogDiskQueue.reintQueue = NULL;
|
||||
workNeeded = (flushQueue || reintQueue);
|
||||
}
|
||||
}
|
||||
/*
|
||||
* Announce that we're done.
|
||||
*/
|
||||
RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
|
||||
raidPtr->parityLogDiskQueue.threadState |= RF_PLOG_SHUTDOWN;
|
||||
RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
|
||||
RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
|
||||
#if defined(__NetBSD__) && defined(_KERNEL)
|
||||
/*
|
||||
* In the NetBSD kernel, the thread must exit; returning would
|
||||
* cause the proc trampoline to attempt to return to userspace.
|
||||
*/
|
||||
kthread_exit(0); /* does not return */
|
||||
#else
|
||||
return(0);
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif /* RF_INCLUDE_PARITYLOGGING > 0 */
|
|
@ -0,0 +1,62 @@
|
|||
/* $NetBSD: rf_paritylogDiskMgr.h,v 1.1 1998/11/13 04:20:32 oster Exp $ */
|
||||
/*
|
||||
* Copyright (c) 1995 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Author: William V. Courtright II
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and
|
||||
* its documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
||||
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*/
|
||||
|
||||
/* header file for parity log disk mgr code
|
||||
*
|
||||
* :
|
||||
* Log: rf_paritylogDiskMgr.h,v
|
||||
* Revision 1.5 1996/06/02 17:31:48 jimz
|
||||
* Moved a lot of global stuff into array structure, where it belongs.
|
||||
* Fixed up paritylogging, pss modules in this manner. Some general
|
||||
* code cleanup. Removed lots of dead code, some dead files.
|
||||
*
|
||||
* Revision 1.4 1996/05/23 21:46:35 jimz
|
||||
* checkpoint in code cleanup (release prep)
|
||||
* lots of types, function names have been fixed
|
||||
*
|
||||
* Revision 1.3 1995/12/06 20:56:39 wvcii
|
||||
* added prototypes
|
||||
*
|
||||
* Revision 1.2 1995/11/30 16:06:21 wvcii
|
||||
* added copyright info
|
||||
*
|
||||
* Revision 1.1 1995/09/06 19:25:29 wvcii
|
||||
* Initial revision
|
||||
*
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _RF__RF_PARITYLOGDISKMGR_H_
|
||||
#define _RF__RF_PARITYLOGDISKMGR_H_
|
||||
|
||||
#include "rf_types.h"
|
||||
|
||||
int rf_ShutdownLogging(RF_Raid_t *raidPtr);
|
||||
int rf_ParityLoggingDiskManager(RF_Raid_t *raidPtr);
|
||||
|
||||
#endif /* !_RF__RF_PARITYLOGDISKMGR_H_ */
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,136 @@
|
|||
/* $NetBSD: rf_paritylogging.h,v 1.1 1998/11/13 04:20:32 oster Exp $ */
|
||||
/*
|
||||
* Copyright (c) 1995 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Author: William V. Courtright II
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and
|
||||
* its documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
||||
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*/
|
||||
|
||||
/* header file for Parity Logging */
|
||||
|
||||
/*
|
||||
* :
|
||||
* Log: rf_paritylogging.h,v
|
||||
* Revision 1.22 1996/07/27 23:36:08 jimz
|
||||
* Solaris port of simulator
|
||||
*
|
||||
* Revision 1.21 1996/07/13 00:00:59 jimz
|
||||
* sanitized generalized reconstruction architecture
|
||||
* cleaned up head sep, rbuf problems
|
||||
*
|
||||
* Revision 1.20 1996/06/10 11:55:47 jimz
|
||||
* Straightened out some per-array/not-per-array distinctions, fixed
|
||||
* a couple bugs related to confusion. Added shutdown lists. Removed
|
||||
* layout shutdown function (now subsumed by shutdown lists).
|
||||
*
|
||||
* Revision 1.19 1996/06/07 22:26:27 jimz
|
||||
* type-ify which_ru (RF_ReconUnitNum_t)
|
||||
*
|
||||
* Revision 1.18 1996/06/07 21:33:04 jimz
|
||||
* begin using consistent types for sector numbers,
|
||||
* stripe numbers, row+col numbers, recon unit numbers
|
||||
*
|
||||
* Revision 1.17 1996/06/03 23:28:26 jimz
|
||||
* more bugfixes
|
||||
* check in tree to sync for IPDS runs with current bugfixes
|
||||
* there still may be a problem with threads in the script test
|
||||
* getting I/Os stuck- not trivially reproducible (runs ~50 times
|
||||
* in a row without getting stuck)
|
||||
*
|
||||
* Revision 1.16 1996/06/02 17:31:48 jimz
|
||||
* Moved a lot of global stuff into array structure, where it belongs.
|
||||
* Fixed up paritylogging, pss modules in this manner. Some general
|
||||
* code cleanup. Removed lots of dead code, some dead files.
|
||||
*
|
||||
* Revision 1.15 1996/05/31 22:26:54 jimz
|
||||
* fix a lot of mapping problems, memory allocation problems
|
||||
* found some weird lock issues, fixed 'em
|
||||
* more code cleanup
|
||||
*
|
||||
* Revision 1.14 1996/05/27 18:56:37 jimz
|
||||
* more code cleanup
|
||||
* better typing
|
||||
* compiles in all 3 environments
|
||||
*
|
||||
* Revision 1.13 1996/05/24 01:59:45 jimz
|
||||
* another checkpoint in code cleanup for release
|
||||
* time to sync kernel tree
|
||||
*
|
||||
* Revision 1.12 1996/05/18 19:51:34 jimz
|
||||
* major code cleanup- fix syntax, make some types consistent,
|
||||
* add prototypes, clean out dead code, et cetera
|
||||
*
|
||||
* Revision 1.11 1995/12/06 20:56:25 wvcii
|
||||
* added prototypes
|
||||
*
|
||||
* Revision 1.10 1995/11/30 16:06:58 wvcii
|
||||
* added copyright info
|
||||
*
|
||||
* Revision 1.9 1995/11/17 19:53:08 wvcii
|
||||
* fixed bug in MapParityRegion prototype
|
||||
*
|
||||
* Revision 1.8 1995/11/17 19:09:24 wvcii
|
||||
* added prototypint to MapParity
|
||||
*
|
||||
* Revision 1.7 1995/11/07 15:28:17 wvcii
|
||||
* changed ParityLoggingDagSelect prototype
|
||||
* function no longer generates numHdrSucc, numTermAnt
|
||||
*
|
||||
* Revision 1.6 1995/07/07 00:16:50 wvcii
|
||||
* this version free from deadlock, fails parity verification
|
||||
*
|
||||
* Revision 1.5 1995/06/23 13:39:44 robby
|
||||
* updeated to prototypes in rf_layout.h
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _RF__RF_PARITYLOGGING_H_
|
||||
#define _RF__RF_PARITYLOGGING_H_
|
||||
|
||||
int rf_ConfigureParityLogging(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
|
||||
RF_Config_t *cfgPtr);
|
||||
int rf_GetDefaultNumFloatingReconBuffersParityLogging(RF_Raid_t *raidPtr);
|
||||
RF_HeadSepLimit_t rf_GetDefaultHeadSepLimitParityLogging(RF_Raid_t *raidPtr);
|
||||
RF_RegionId_t rf_MapRegionIDParityLogging(RF_Raid_t *raidPtr,
|
||||
RF_SectorNum_t address);
|
||||
void rf_MapSectorParityLogging(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector,
|
||||
RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector,
|
||||
int remap);
|
||||
void rf_MapParityParityLogging(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector,
|
||||
RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector,
|
||||
int remap);
|
||||
void rf_MapLogParityLogging(RF_Raid_t *raidPtr, RF_RegionId_t regionID,
|
||||
RF_SectorNum_t regionOffset, RF_RowCol_t *row, RF_RowCol_t *col,
|
||||
RF_SectorNum_t *startSector);
|
||||
void rf_MapRegionParity(RF_Raid_t *raidPtr, RF_RegionId_t regionID,
|
||||
RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *startSector,
|
||||
RF_SectorCount_t *numSector);
|
||||
void rf_IdentifyStripeParityLogging(RF_Raid_t *raidPtr, RF_RaidAddr_t addr,
|
||||
RF_RowCol_t **diskids, RF_RowCol_t *outRow);
|
||||
void rf_MapSIDToPSIDParityLogging(RF_RaidLayout_t *layoutPtr,
|
||||
RF_StripeNum_t stripeID, RF_StripeNum_t *psID,
|
||||
RF_ReconUnitNum_t *which_ru);
|
||||
void rf_ParityLoggingDagSelect(RF_Raid_t *raidPtr, RF_IoType_t type,
|
||||
RF_AccessStripeMap_t *asmap, RF_VoidFuncPtr *createFunc);
|
||||
|
||||
#endif /* !_RF__RF_PARITYLOGGING_H_ */
|
|
@ -0,0 +1,751 @@
|
|||
/* $NetBSD: rf_parityloggingdags.c,v 1.1 1998/11/13 04:20:32 oster Exp $ */
|
||||
/*
|
||||
* Copyright (c) 1995 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Author: William V. Courtright II
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and
|
||||
* its documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
||||
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Log: rf_parityloggingdags.c,v
|
||||
* Revision 1.27 1996/07/28 20:31:39 jimz
|
||||
* i386netbsd port
|
||||
* true/false fixup
|
||||
*
|
||||
* Revision 1.26 1996/07/27 23:36:08 jimz
|
||||
* Solaris port of simulator
|
||||
*
|
||||
* Revision 1.25 1996/07/22 19:52:16 jimz
|
||||
* switched node params to RF_DagParam_t, a union of
|
||||
* a 64-bit int and a void *, for better portability
|
||||
* attempted hpux port, but failed partway through for
|
||||
* lack of a single C compiler capable of compiling all
|
||||
* source files
|
||||
*
|
||||
* Revision 1.24 1996/06/11 13:47:21 jimz
|
||||
* fix up for in-kernel compilation
|
||||
*
|
||||
* Revision 1.23 1996/06/07 22:26:27 jimz
|
||||
* type-ify which_ru (RF_ReconUnitNum_t)
|
||||
*
|
||||
* Revision 1.22 1996/06/07 21:33:04 jimz
|
||||
* begin using consistent types for sector numbers,
|
||||
* stripe numbers, row+col numbers, recon unit numbers
|
||||
*
|
||||
* Revision 1.21 1996/06/02 17:31:48 jimz
|
||||
* Moved a lot of global stuff into array structure, where it belongs.
|
||||
* Fixed up paritylogging, pss modules in this manner. Some general
|
||||
* code cleanup. Removed lots of dead code, some dead files.
|
||||
*
|
||||
* Revision 1.20 1996/05/31 22:26:54 jimz
|
||||
* fix a lot of mapping problems, memory allocation problems
|
||||
* found some weird lock issues, fixed 'em
|
||||
* more code cleanup
|
||||
*
|
||||
* Revision 1.19 1996/05/30 11:29:41 jimz
|
||||
* Numerous bug fixes. Stripe lock release code disagreed with the taking code
|
||||
* about when stripes should be locked (I made it consistent: no parity, no lock)
|
||||
* There was a lot of extra serialization of I/Os which I've removed- a lot of
|
||||
* it was to calculate values for the cache code, which is no longer with us.
|
||||
* More types, function, macro cleanup. Added code to properly quiesce the array
|
||||
* on shutdown. Made a lot of stuff array-specific which was (bogusly) general
|
||||
* before. Fixed memory allocation, freeing bugs.
|
||||
*
|
||||
* Revision 1.18 1996/05/27 18:56:37 jimz
|
||||
* more code cleanup
|
||||
* better typing
|
||||
* compiles in all 3 environments
|
||||
*
|
||||
* Revision 1.17 1996/05/24 22:17:04 jimz
|
||||
* continue code + namespace cleanup
|
||||
* typed a bunch of flags
|
||||
*
|
||||
* Revision 1.16 1996/05/24 04:28:55 jimz
|
||||
* release cleanup ckpt
|
||||
*
|
||||
* Revision 1.15 1996/05/23 21:46:35 jimz
|
||||
* checkpoint in code cleanup (release prep)
|
||||
* lots of types, function names have been fixed
|
||||
*
|
||||
* Revision 1.14 1996/05/23 00:33:23 jimz
|
||||
* code cleanup: move all debug decls to rf_options.c, all extern
|
||||
* debug decls to rf_options.h, all debug vars preceded by rf_
|
||||
*
|
||||
* Revision 1.13 1996/05/18 19:51:34 jimz
|
||||
* major code cleanup- fix syntax, make some types consistent,
|
||||
* add prototypes, clean out dead code, et cetera
|
||||
*
|
||||
* Revision 1.12 1996/05/08 21:01:24 jimz
|
||||
* fixed up enum type names that were conflicting with other
|
||||
* enums and function names (ie, "panic")
|
||||
* future naming trends will be towards RF_ and rf_ for
|
||||
* everything raidframe-related
|
||||
*
|
||||
* Revision 1.11 1996/05/03 19:42:02 wvcii
|
||||
* added includes for dag library
|
||||
*
|
||||
* Revision 1.10 1995/12/12 18:10:06 jimz
|
||||
* MIN -> RF_MIN, MAX -> RF_MAX, ASSERT -> RF_ASSERT
|
||||
* fix 80-column brain damage in comments
|
||||
*
|
||||
* Revision 1.9 1995/12/06 20:55:24 wvcii
|
||||
* added prototyping
|
||||
* fixed bug in dag header numSuccedents count for both small and large dags
|
||||
*
|
||||
* Revision 1.8 1995/11/30 16:08:01 wvcii
|
||||
* added copyright info
|
||||
*
|
||||
* Revision 1.7 1995/11/07 15:29:05 wvcii
|
||||
* reorganized code, adding comments and asserts
|
||||
* dag creation routines now generate term node
|
||||
* encoded commit point, barrier, and antecedence types into dags
|
||||
*
|
||||
* Revision 1.6 1995/09/07 15:52:06 jimz
|
||||
* noop compile when INCLUDE_PARITYLOGGING not defined
|
||||
*
|
||||
* Revision 1.5 1995/06/15 13:51:53 robby
|
||||
* updated some wrong prototypes (after prototyping rf_dagutils.h)
|
||||
*
|
||||
* Revision 1.4 1995/06/09 13:15:05 wvcii
|
||||
* code is now nonblocking
|
||||
*
|
||||
* Revision 1.3 95/05/31 13:09:14 wvcii
|
||||
* code debug
|
||||
*
|
||||
* Revision 1.2 1995/05/21 15:34:14 wvcii
|
||||
* code debug
|
||||
*
|
||||
* Revision 1.1 95/05/16 14:36:53 wvcii
|
||||
* Initial revision
|
||||
*
|
||||
*
|
||||
*/
|
||||
|
||||
#include "rf_archs.h"
|
||||
|
||||
#if RF_INCLUDE_PARITYLOGGING > 0
|
||||
|
||||
/*
|
||||
DAGs specific to parity logging are created here
|
||||
*/
|
||||
|
||||
#include "rf_types.h"
|
||||
#include "rf_raid.h"
|
||||
#include "rf_dag.h"
|
||||
#include "rf_dagutils.h"
|
||||
#include "rf_dagfuncs.h"
|
||||
#include "rf_threadid.h"
|
||||
#include "rf_debugMem.h"
|
||||
#include "rf_paritylog.h"
|
||||
#include "rf_memchunk.h"
|
||||
#include "rf_general.h"
|
||||
|
||||
#include "rf_parityloggingdags.h"
|
||||
|
||||
/******************************************************************************
|
||||
*
|
||||
* creates a DAG to perform a large-write operation:
|
||||
*
|
||||
* / Rod \ / Wnd \
|
||||
* H -- NIL- Rod - NIL - Wnd ------ NIL - T
|
||||
* \ Rod / \ Xor - Lpo /
|
||||
*
|
||||
* The writes are not done until the reads complete because if they were done in
|
||||
* parallel, a failure on one of the reads could leave the parity in an inconsistent
|
||||
* state, so that the retry with a new DAG would produce erroneous parity.
|
||||
*
|
||||
* Note: this DAG has the nasty property that none of the buffers allocated for reading
|
||||
* old data can be freed until the XOR node fires. Need to fix this.
|
||||
*
|
||||
* The last two arguments are the number of faults tolerated, and function for the
|
||||
* redundancy calculation. The undo for the redundancy calc is assumed to be null
|
||||
*
|
||||
*****************************************************************************/
|
||||
|
||||
void rf_CommonCreateParityLoggingLargeWriteDAG(
|
||||
RF_Raid_t *raidPtr,
|
||||
RF_AccessStripeMap_t *asmap,
|
||||
RF_DagHeader_t *dag_h,
|
||||
void *bp,
|
||||
RF_RaidAccessFlags_t flags,
|
||||
RF_AllocListElem_t *allocList,
|
||||
int nfaults,
|
||||
int (*redFunc)(RF_DagNode_t *))
|
||||
{
|
||||
RF_DagNode_t *nodes, *wndNodes, *rodNodes=NULL, *syncNode, *xorNode, *lpoNode, *blockNode, *unblockNode, *termNode;
|
||||
int nWndNodes, nRodNodes, i;
|
||||
RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
|
||||
RF_AccessStripeMapHeader_t *new_asm_h[2];
|
||||
int nodeNum, asmNum;
|
||||
RF_ReconUnitNum_t which_ru;
|
||||
char *sosBuffer, *eosBuffer;
|
||||
RF_PhysDiskAddr_t *pda;
|
||||
RF_StripeNum_t parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout), asmap->raidAddress, &which_ru);
|
||||
|
||||
if (rf_dagDebug)
|
||||
printf("[Creating parity-logging large-write DAG]\n");
|
||||
RF_ASSERT(nfaults == 1); /* this arch only single fault tolerant */
|
||||
dag_h->creator = "ParityLoggingLargeWriteDAG";
|
||||
|
||||
/* alloc the Wnd nodes, the xor node, and the Lpo node */
|
||||
nWndNodes = asmap->numStripeUnitsAccessed;
|
||||
RF_CallocAndAdd(nodes, nWndNodes + 6, sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList);
|
||||
i = 0;
|
||||
wndNodes = &nodes[i]; i += nWndNodes;
|
||||
xorNode = &nodes[i]; i += 1;
|
||||
lpoNode = &nodes[i]; i += 1;
|
||||
blockNode = &nodes[i]; i += 1;
|
||||
syncNode = &nodes[i]; i += 1;
|
||||
unblockNode = &nodes[i]; i += 1;
|
||||
termNode = &nodes[i]; i += 1;
|
||||
|
||||
dag_h->numCommitNodes = nWndNodes + 1;
|
||||
dag_h->numCommits = 0;
|
||||
dag_h->numSuccedents = 1;
|
||||
|
||||
rf_MapUnaccessedPortionOfStripe(raidPtr, layoutPtr, asmap, dag_h, new_asm_h, &nRodNodes, &sosBuffer, &eosBuffer, allocList);
|
||||
if (nRodNodes > 0)
|
||||
RF_CallocAndAdd(rodNodes, nRodNodes, sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList);
|
||||
|
||||
/* begin node initialization */
|
||||
rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, nRodNodes + 1, 0, 0, 0, dag_h, "Nil", allocList);
|
||||
rf_InitNode(unblockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, 1, nWndNodes + 1, 0, 0, dag_h, "Nil", allocList);
|
||||
rf_InitNode(syncNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, nWndNodes + 1, nRodNodes + 1, 0, 0, dag_h, "Nil", allocList);
|
||||
rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, dag_h, "Trm", allocList);
|
||||
|
||||
/* initialize the Rod nodes */
|
||||
for (nodeNum = asmNum = 0; asmNum < 2; asmNum++) {
|
||||
if (new_asm_h[asmNum]) {
|
||||
pda = new_asm_h[asmNum]->stripeMap->physInfo;
|
||||
while (pda) {
|
||||
rf_InitNode(&rodNodes[nodeNum], rf_wait, RF_FALSE, rf_DiskReadFunc,rf_DiskReadUndoFunc,rf_GenericWakeupFunc,1,1,4,0, dag_h, "Rod", allocList);
|
||||
rodNodes[nodeNum].params[0].p = pda;
|
||||
rodNodes[nodeNum].params[1].p = pda->bufPtr;
|
||||
rodNodes[nodeNum].params[2].v = parityStripeID;
|
||||
rodNodes[nodeNum].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
|
||||
nodeNum++;
|
||||
pda=pda->next;
|
||||
}
|
||||
}
|
||||
}
|
||||
RF_ASSERT(nodeNum == nRodNodes);
|
||||
|
||||
/* initialize the wnd nodes */
|
||||
pda = asmap->physInfo;
|
||||
for (i=0; i < nWndNodes; i++) {
|
||||
rf_InitNode(&wndNodes[i], rf_wait, RF_TRUE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wnd", allocList);
|
||||
RF_ASSERT(pda != NULL);
|
||||
wndNodes[i].params[0].p = pda;
|
||||
wndNodes[i].params[1].p = pda->bufPtr;
|
||||
wndNodes[i].params[2].v = parityStripeID;
|
||||
wndNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
|
||||
pda = pda->next;
|
||||
}
|
||||
|
||||
/* initialize the redundancy node */
|
||||
rf_InitNode(xorNode, rf_wait, RF_TRUE, redFunc, rf_NullNodeUndoFunc, NULL, 1, 1, 2*(nWndNodes+nRodNodes)+1, 1, dag_h, "Xr ", allocList);
|
||||
xorNode->flags |= RF_DAGNODE_FLAG_YIELD;
|
||||
for (i=0; i < nWndNodes; i++) {
|
||||
xorNode->params[2*i+0] = wndNodes[i].params[0]; /* pda */
|
||||
xorNode->params[2*i+1] = wndNodes[i].params[1]; /* buf ptr */
|
||||
}
|
||||
for (i=0; i < nRodNodes; i++) {
|
||||
xorNode->params[2*(nWndNodes+i)+0] = rodNodes[i].params[0]; /* pda */
|
||||
xorNode->params[2*(nWndNodes+i)+1] = rodNodes[i].params[1]; /* buf ptr */
|
||||
}
|
||||
xorNode->params[2*(nWndNodes+nRodNodes)].p = raidPtr; /* xor node needs to get at RAID information */
|
||||
|
||||
/* look for an Rod node that reads a complete SU. If none, alloc a buffer to receive the parity info.
|
||||
* Note that we can't use a new data buffer because it will not have gotten written when the xor occurs.
|
||||
*/
|
||||
for (i = 0; i < nRodNodes; i++)
|
||||
if (((RF_PhysDiskAddr_t *) rodNodes[i].params[0].p)->numSector == raidPtr->Layout.sectorsPerStripeUnit)
|
||||
break;
|
||||
if (i == nRodNodes) {
|
||||
RF_CallocAndAdd(xorNode->results[0], 1, rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit), (void *), allocList);
|
||||
}
|
||||
else {
|
||||
xorNode->results[0] = rodNodes[i].params[1].p;
|
||||
}
|
||||
|
||||
/* initialize the Lpo node */
|
||||
rf_InitNode(lpoNode, rf_wait, RF_FALSE, rf_ParityLogOverwriteFunc, rf_ParityLogOverwriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 2, 0, dag_h, "Lpo", allocList);
|
||||
|
||||
lpoNode->params[0].p = asmap->parityInfo;
|
||||
lpoNode->params[1].p = xorNode->results[0];
|
||||
RF_ASSERT(asmap->parityInfo->next == NULL); /* parityInfo must describe entire parity unit */
|
||||
|
||||
/* connect nodes to form graph */
|
||||
|
||||
/* connect dag header to block node */
|
||||
RF_ASSERT(dag_h->numSuccedents == 1);
|
||||
RF_ASSERT(blockNode->numAntecedents == 0);
|
||||
dag_h->succedents[0] = blockNode;
|
||||
|
||||
/* connect the block node to the Rod nodes */
|
||||
RF_ASSERT(blockNode->numSuccedents == nRodNodes + 1);
|
||||
for (i = 0; i < nRodNodes; i++) {
|
||||
RF_ASSERT(rodNodes[i].numAntecedents == 1);
|
||||
blockNode->succedents[i] = &rodNodes[i];
|
||||
rodNodes[i].antecedents[0] = blockNode;
|
||||
rodNodes[i].antType[0] = rf_control;
|
||||
}
|
||||
|
||||
/* connect the block node to the sync node */
|
||||
/* necessary if nRodNodes == 0 */
|
||||
RF_ASSERT(syncNode->numAntecedents == nRodNodes + 1);
|
||||
blockNode->succedents[nRodNodes] = syncNode;
|
||||
syncNode->antecedents[0] = blockNode;
|
||||
syncNode->antType[0] = rf_control;
|
||||
|
||||
/* connect the Rod nodes to the syncNode */
|
||||
for (i = 0; i < nRodNodes; i++) {
|
||||
rodNodes[i].succedents[0] = syncNode;
|
||||
syncNode->antecedents[1 + i] = &rodNodes[i];
|
||||
syncNode->antType[1 + i] = rf_control;
|
||||
}
|
||||
|
||||
/* connect the sync node to the xor node */
|
||||
RF_ASSERT(syncNode->numSuccedents == nWndNodes + 1);
|
||||
RF_ASSERT(xorNode->numAntecedents == 1);
|
||||
syncNode->succedents[0] = xorNode;
|
||||
xorNode->antecedents[0] = syncNode;
|
||||
xorNode->antType[0] = rf_trueData; /* carry forward from sync */
|
||||
|
||||
/* connect the sync node to the Wnd nodes */
|
||||
for (i = 0; i < nWndNodes; i++) {
|
||||
RF_ASSERT(wndNodes->numAntecedents == 1);
|
||||
syncNode->succedents[1 + i] = &wndNodes[i];
|
||||
wndNodes[i].antecedents[0] = syncNode;
|
||||
wndNodes[i].antType[0] = rf_control;
|
||||
}
|
||||
|
||||
/* connect the xor node to the Lpo node */
|
||||
RF_ASSERT(xorNode->numSuccedents == 1);
|
||||
RF_ASSERT(lpoNode->numAntecedents == 1);
|
||||
xorNode->succedents[0] = lpoNode;
|
||||
lpoNode->antecedents[0]= xorNode;
|
||||
lpoNode->antType[0] = rf_trueData;
|
||||
|
||||
/* connect the Wnd nodes to the unblock node */
|
||||
RF_ASSERT(unblockNode->numAntecedents == nWndNodes + 1);
|
||||
for (i = 0; i < nWndNodes; i++) {
|
||||
RF_ASSERT(wndNodes->numSuccedents == 1);
|
||||
wndNodes[i].succedents[0] = unblockNode;
|
||||
unblockNode->antecedents[i] = &wndNodes[i];
|
||||
unblockNode->antType[i] = rf_control;
|
||||
}
|
||||
|
||||
/* connect the Lpo node to the unblock node */
|
||||
RF_ASSERT(lpoNode->numSuccedents == 1);
|
||||
lpoNode->succedents[0] = unblockNode;
|
||||
unblockNode->antecedents[nWndNodes] = lpoNode;
|
||||
unblockNode->antType[nWndNodes] = rf_control;
|
||||
|
||||
/* connect unblock node to terminator */
|
||||
RF_ASSERT(unblockNode->numSuccedents == 1);
|
||||
RF_ASSERT(termNode->numAntecedents == 1);
|
||||
RF_ASSERT(termNode->numSuccedents == 0);
|
||||
unblockNode->succedents[0] = termNode;
|
||||
termNode->antecedents[0] = unblockNode;
|
||||
termNode->antType[0] = rf_control;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/******************************************************************************
|
||||
*
|
||||
* creates a DAG to perform a small-write operation (either raid 5 or pq), which is as follows:
|
||||
*
|
||||
* Header
|
||||
* |
|
||||
* Block
|
||||
* / | ... \ \
|
||||
* / | \ \
|
||||
* Rod Rod Rod Rop
|
||||
* | \ /| \ / | \/ |
|
||||
* | | | /\ |
|
||||
* Wnd Wnd Wnd X
|
||||
* | \ / |
|
||||
* | \ / |
|
||||
* \ \ / Lpo
|
||||
* \ \ / /
|
||||
* +-> Unblock <-+
|
||||
* |
|
||||
* T
|
||||
*
|
||||
*
|
||||
* R = Read, W = Write, X = Xor, o = old, n = new, d = data, p = parity.
|
||||
* When the access spans a stripe unit boundary and is less than one SU in size, there will
|
||||
* be two Rop -- X -- Wnp branches. I call this the "double-XOR" case.
|
||||
* The second output from each Rod node goes to the X node. In the double-XOR
|
||||
* case, there are exactly 2 Rod nodes, and each sends one output to one X node.
|
||||
* There is one Rod -- Wnd -- T branch for each stripe unit being updated.
|
||||
*
|
||||
* The block and unblock nodes are unused. See comment above CreateFaultFreeReadDAG.
|
||||
*
|
||||
* Note: this DAG ignores all the optimizations related to making the RMWs atomic.
|
||||
* it also has the nasty property that none of the buffers allocated for reading
|
||||
* old data & parity can be freed until the XOR node fires. Need to fix this.
|
||||
*
|
||||
* A null qfuncs indicates single fault tolerant
|
||||
*****************************************************************************/
|
||||
|
||||
void rf_CommonCreateParityLoggingSmallWriteDAG(
|
||||
RF_Raid_t *raidPtr,
|
||||
RF_AccessStripeMap_t *asmap,
|
||||
RF_DagHeader_t *dag_h,
|
||||
void *bp,
|
||||
RF_RaidAccessFlags_t flags,
|
||||
RF_AllocListElem_t *allocList,
|
||||
RF_RedFuncs_t *pfuncs,
|
||||
RF_RedFuncs_t *qfuncs)
|
||||
{
|
||||
RF_DagNode_t *xorNodes, *blockNode, *unblockNode, *nodes;
|
||||
RF_DagNode_t *readDataNodes, *readParityNodes;
|
||||
RF_DagNode_t *writeDataNodes, *lpuNodes;
|
||||
RF_DagNode_t *unlockDataNodes=NULL, *termNode;
|
||||
RF_PhysDiskAddr_t *pda = asmap->physInfo;
|
||||
int numDataNodes = asmap->numStripeUnitsAccessed;
|
||||
int numParityNodes = (asmap->parityInfo->next) ? 2 : 1;
|
||||
int i, j, nNodes, totalNumNodes;
|
||||
RF_ReconUnitNum_t which_ru;
|
||||
int (*func)(RF_DagNode_t *node), (*undoFunc)(RF_DagNode_t *node);
|
||||
int (*qfunc)(RF_DagNode_t *node);
|
||||
char *name, *qname;
|
||||
RF_StripeNum_t parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout), asmap->raidAddress, &which_ru);
|
||||
long nfaults = qfuncs ? 2 : 1;
|
||||
int lu_flag = (rf_enableAtomicRMW) ? 1 : 0; /* lock/unlock flag */
|
||||
|
||||
if (rf_dagDebug) printf("[Creating parity-logging small-write DAG]\n");
|
||||
RF_ASSERT(numDataNodes > 0);
|
||||
RF_ASSERT(nfaults == 1);
|
||||
dag_h->creator = "ParityLoggingSmallWriteDAG";
|
||||
|
||||
/* DAG creation occurs in three steps:
|
||||
1. count the number of nodes in the DAG
|
||||
2. create the nodes
|
||||
3. initialize the nodes
|
||||
4. connect the nodes
|
||||
*/
|
||||
|
||||
/* Step 1. compute number of nodes in the graph */
|
||||
|
||||
/* number of nodes:
|
||||
a read and write for each data unit
|
||||
a redundancy computation node for each parity node
|
||||
a read and Lpu for each parity unit
|
||||
a block and unblock node (2)
|
||||
a terminator node
|
||||
if atomic RMW
|
||||
an unlock node for each data unit, redundancy unit
|
||||
*/
|
||||
totalNumNodes = (2 * numDataNodes) + numParityNodes + (2 * numParityNodes) + 3;
|
||||
if (lu_flag)
|
||||
totalNumNodes += numDataNodes;
|
||||
|
||||
nNodes = numDataNodes + numParityNodes;
|
||||
|
||||
dag_h->numCommitNodes = numDataNodes + numParityNodes;
|
||||
dag_h->numCommits = 0;
|
||||
dag_h->numSuccedents = 1;
|
||||
|
||||
/* Step 2. create the nodes */
|
||||
RF_CallocAndAdd(nodes, totalNumNodes, sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList);
|
||||
i = 0;
|
||||
blockNode = &nodes[i]; i += 1;
|
||||
unblockNode = &nodes[i]; i += 1;
|
||||
readDataNodes = &nodes[i]; i += numDataNodes;
|
||||
readParityNodes = &nodes[i]; i += numParityNodes;
|
||||
writeDataNodes = &nodes[i]; i += numDataNodes;
|
||||
lpuNodes = &nodes[i]; i += numParityNodes;
|
||||
xorNodes = &nodes[i]; i += numParityNodes;
|
||||
termNode = &nodes[i]; i += 1;
|
||||
if (lu_flag) {
|
||||
unlockDataNodes = &nodes[i]; i += numDataNodes;
|
||||
}
|
||||
RF_ASSERT(i == totalNumNodes);
|
||||
|
||||
/* Step 3. initialize the nodes */
|
||||
/* initialize block node (Nil) */
|
||||
rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, nNodes, 0, 0, 0, dag_h, "Nil", allocList);
|
||||
|
||||
/* initialize unblock node (Nil) */
|
||||
rf_InitNode(unblockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, 1, nNodes, 0, 0, dag_h, "Nil", allocList);
|
||||
|
||||
/* initialize terminatory node (Trm) */
|
||||
rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, dag_h, "Trm", allocList);
|
||||
|
||||
/* initialize nodes which read old data (Rod) */
|
||||
for (i = 0; i < numDataNodes; i++) {
|
||||
rf_InitNode(&readDataNodes[i], rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, rf_GenericWakeupFunc, nNodes, 1, 4, 0, dag_h, "Rod", allocList);
|
||||
RF_ASSERT(pda != NULL);
|
||||
readDataNodes[i].params[0].p = pda; /* physical disk addr desc */
|
||||
readDataNodes[i].params[1].p = rf_AllocBuffer(raidPtr, dag_h, pda, allocList); /* buffer to hold old data */
|
||||
readDataNodes[i].params[2].v = parityStripeID;
|
||||
readDataNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, lu_flag, 0, which_ru);
|
||||
pda=pda->next;
|
||||
readDataNodes[i].propList[0] = NULL;
|
||||
readDataNodes[i].propList[1] = NULL;
|
||||
}
|
||||
|
||||
/* initialize nodes which read old parity (Rop) */
|
||||
pda = asmap->parityInfo; i = 0;
|
||||
for (i = 0; i < numParityNodes; i++) {
|
||||
RF_ASSERT(pda != NULL);
|
||||
rf_InitNode(&readParityNodes[i], rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, rf_GenericWakeupFunc, nNodes, 1, 4, 0, dag_h, "Rop", allocList);
|
||||
readParityNodes[i].params[0].p = pda;
|
||||
readParityNodes[i].params[1].p = rf_AllocBuffer(raidPtr, dag_h, pda, allocList); /* buffer to hold old parity */
|
||||
readParityNodes[i].params[2].v = parityStripeID;
|
||||
readParityNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
|
||||
readParityNodes[i].propList[0] = NULL;
|
||||
pda=pda->next;
|
||||
}
|
||||
|
||||
/* initialize nodes which write new data (Wnd) */
|
||||
pda = asmap->physInfo;
|
||||
for (i=0; i < numDataNodes; i++) {
|
||||
RF_ASSERT(pda != NULL);
|
||||
rf_InitNode(&writeDataNodes[i], rf_wait, RF_TRUE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, nNodes, 4, 0, dag_h, "Wnd", allocList);
|
||||
writeDataNodes[i].params[0].p = pda; /* physical disk addr desc */
|
||||
writeDataNodes[i].params[1].p = pda->bufPtr; /* buffer holding new data to be written */
|
||||
writeDataNodes[i].params[2].v = parityStripeID;
|
||||
writeDataNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
|
||||
|
||||
if (lu_flag) {
|
||||
/* initialize node to unlock the disk queue */
|
||||
rf_InitNode(&unlockDataNodes[i], rf_wait, RF_FALSE, rf_DiskUnlockFunc, rf_DiskUnlockUndoFunc, rf_GenericWakeupFunc, 1, 1, 2, 0, dag_h, "Und", allocList);
|
||||
unlockDataNodes[i].params[0].p = pda; /* physical disk addr desc */
|
||||
unlockDataNodes[i].params[1].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, lu_flag, which_ru);
|
||||
}
|
||||
pda = pda->next;
|
||||
}
|
||||
|
||||
|
||||
/* initialize nodes which compute new parity */
|
||||
/* we use the simple XOR func in the double-XOR case, and when we're accessing only a portion of one stripe unit.
|
||||
* the distinction between the two is that the regular XOR func assumes that the targbuf is a full SU in size,
|
||||
* and examines the pda associated with the buffer to decide where within the buffer to XOR the data, whereas
|
||||
* the simple XOR func just XORs the data into the start of the buffer.
|
||||
*/
|
||||
if ((numParityNodes==2) || ((numDataNodes == 1) && (asmap->totalSectorsAccessed < raidPtr->Layout.sectorsPerStripeUnit))) {
|
||||
func = pfuncs->simple; undoFunc = rf_NullNodeUndoFunc; name = pfuncs->SimpleName;
|
||||
if (qfuncs)
|
||||
{ qfunc = qfuncs->simple; qname = qfuncs->SimpleName;}
|
||||
} else {
|
||||
func = pfuncs->regular; undoFunc = rf_NullNodeUndoFunc; name = pfuncs->RegularName;
|
||||
if (qfuncs) { qfunc = qfuncs->regular; qname = qfuncs->RegularName;}
|
||||
}
|
||||
/* initialize the xor nodes: params are {pda,buf} from {Rod,Wnd,Rop} nodes, and raidPtr */
|
||||
if (numParityNodes==2) { /* double-xor case */
|
||||
for (i=0; i < numParityNodes; i++) {
|
||||
rf_InitNode(&xorNodes[i], rf_wait, RF_TRUE, func, undoFunc, NULL, 1, nNodes, 7, 1, dag_h, name, allocList); /* no wakeup func for xor */
|
||||
xorNodes[i].flags |= RF_DAGNODE_FLAG_YIELD;
|
||||
xorNodes[i].params[0] = readDataNodes[i].params[0];
|
||||
xorNodes[i].params[1] = readDataNodes[i].params[1];
|
||||
xorNodes[i].params[2] = readParityNodes[i].params[0];
|
||||
xorNodes[i].params[3] = readParityNodes[i].params[1];
|
||||
xorNodes[i].params[4] = writeDataNodes[i].params[0];
|
||||
xorNodes[i].params[5] = writeDataNodes[i].params[1];
|
||||
xorNodes[i].params[6].p = raidPtr;
|
||||
xorNodes[i].results[0] = readParityNodes[i].params[1].p; /* use old parity buf as target buf */
|
||||
}
|
||||
}
|
||||
else {
|
||||
/* there is only one xor node in this case */
|
||||
rf_InitNode(&xorNodes[0], rf_wait, RF_TRUE, func, undoFunc, NULL, 1, nNodes, (2 * (numDataNodes + numDataNodes + 1) + 1), 1, dag_h, name, allocList);
|
||||
xorNodes[0].flags |= RF_DAGNODE_FLAG_YIELD;
|
||||
for (i=0; i < numDataNodes + 1; i++) {
|
||||
/* set up params related to Rod and Rop nodes */
|
||||
xorNodes[0].params[2*i+0] = readDataNodes[i].params[0]; /* pda */
|
||||
xorNodes[0].params[2*i+1] = readDataNodes[i].params[1]; /* buffer pointer */
|
||||
}
|
||||
for (i=0; i < numDataNodes; i++) {
|
||||
/* set up params related to Wnd and Wnp nodes */
|
||||
xorNodes[0].params[2*(numDataNodes+1+i)+0] = writeDataNodes[i].params[0]; /* pda */
|
||||
xorNodes[0].params[2*(numDataNodes+1+i)+1] = writeDataNodes[i].params[1]; /* buffer pointer */
|
||||
}
|
||||
xorNodes[0].params[2*(numDataNodes+numDataNodes+1)].p = raidPtr; /* xor node needs to get at RAID information */
|
||||
xorNodes[0].results[0] = readParityNodes[0].params[1].p;
|
||||
}
|
||||
|
||||
/* initialize the log node(s) */
|
||||
pda = asmap->parityInfo;
|
||||
for (i = 0; i < numParityNodes; i++) {
|
||||
RF_ASSERT(pda);
|
||||
rf_InitNode(&lpuNodes[i], rf_wait, RF_FALSE, rf_ParityLogUpdateFunc, rf_ParityLogUpdateUndoFunc, rf_GenericWakeupFunc, 1, 1, 2, 0, dag_h, "Lpu", allocList);
|
||||
lpuNodes[i].params[0].p = pda; /* PhysDiskAddr of parity */
|
||||
lpuNodes[i].params[1].p = xorNodes[i].results[0]; /* buffer pointer to parity */
|
||||
pda = pda->next;
|
||||
}
|
||||
|
||||
|
||||
/* Step 4. connect the nodes */
|
||||
|
||||
/* connect header to block node */
|
||||
RF_ASSERT(dag_h->numSuccedents == 1);
|
||||
RF_ASSERT(blockNode->numAntecedents == 0);
|
||||
dag_h->succedents[0] = blockNode;
|
||||
|
||||
/* connect block node to read old data nodes */
|
||||
RF_ASSERT(blockNode->numSuccedents == (numDataNodes + numParityNodes));
|
||||
for (i = 0; i < numDataNodes; i++) {
|
||||
blockNode->succedents[i] = &readDataNodes[i];
|
||||
RF_ASSERT(readDataNodes[i].numAntecedents == 1);
|
||||
readDataNodes[i].antecedents[0]= blockNode;
|
||||
readDataNodes[i].antType[0] = rf_control;
|
||||
}
|
||||
|
||||
/* connect block node to read old parity nodes */
|
||||
for (i = 0; i < numParityNodes; i++) {
|
||||
blockNode->succedents[numDataNodes + i] = &readParityNodes[i];
|
||||
RF_ASSERT(readParityNodes[i].numAntecedents == 1);
|
||||
readParityNodes[i].antecedents[0] = blockNode;
|
||||
readParityNodes[i].antType[0] = rf_control;
|
||||
}
|
||||
|
||||
/* connect read old data nodes to write new data nodes */
|
||||
for (i = 0; i < numDataNodes; i++) {
|
||||
RF_ASSERT(readDataNodes[i].numSuccedents == numDataNodes + numParityNodes);
|
||||
for (j = 0; j < numDataNodes; j++) {
|
||||
RF_ASSERT(writeDataNodes[j].numAntecedents == numDataNodes + numParityNodes);
|
||||
readDataNodes[i].succedents[j] = &writeDataNodes[j];
|
||||
writeDataNodes[j].antecedents[i] = &readDataNodes[i];
|
||||
if (i == j)
|
||||
writeDataNodes[j].antType[i] = rf_antiData;
|
||||
else
|
||||
writeDataNodes[j].antType[i] = rf_control;
|
||||
}
|
||||
}
|
||||
|
||||
/* connect read old data nodes to xor nodes */
|
||||
for (i = 0; i < numDataNodes; i++)
|
||||
for (j = 0; j < numParityNodes; j++){
|
||||
RF_ASSERT(xorNodes[j].numAntecedents == numDataNodes + numParityNodes);
|
||||
readDataNodes[i].succedents[numDataNodes + j] = &xorNodes[j];
|
||||
xorNodes[j].antecedents[i] = &readDataNodes[i];
|
||||
xorNodes[j].antType[i] = rf_trueData;
|
||||
}
|
||||
|
||||
/* connect read old parity nodes to write new data nodes */
|
||||
for (i = 0; i < numParityNodes; i++) {
|
||||
RF_ASSERT(readParityNodes[i].numSuccedents == numDataNodes + numParityNodes);
|
||||
for (j = 0; j < numDataNodes; j++) {
|
||||
readParityNodes[i].succedents[j] = &writeDataNodes[j];
|
||||
writeDataNodes[j].antecedents[numDataNodes + i] = &readParityNodes[i];
|
||||
writeDataNodes[j].antType[numDataNodes + i] = rf_control;
|
||||
}
|
||||
}
|
||||
|
||||
/* connect read old parity nodes to xor nodes */
|
||||
for (i = 0; i < numParityNodes; i++)
|
||||
for (j = 0; j < numParityNodes; j++) {
|
||||
readParityNodes[i].succedents[numDataNodes + j] = &xorNodes[j];
|
||||
xorNodes[j].antecedents[numDataNodes + i] = &readParityNodes[i];
|
||||
xorNodes[j].antType[numDataNodes + i] = rf_trueData;
|
||||
}
|
||||
|
||||
/* connect xor nodes to write new parity nodes */
|
||||
for (i = 0; i < numParityNodes; i++) {
|
||||
RF_ASSERT(xorNodes[i].numSuccedents == 1);
|
||||
RF_ASSERT(lpuNodes[i].numAntecedents == 1);
|
||||
xorNodes[i].succedents[0] = &lpuNodes[i];
|
||||
lpuNodes[i].antecedents[0] = &xorNodes[i];
|
||||
lpuNodes[i].antType[0] = rf_trueData;
|
||||
}
|
||||
|
||||
for (i = 0; i < numDataNodes; i++) {
|
||||
if (lu_flag) {
|
||||
/* connect write new data nodes to unlock nodes */
|
||||
RF_ASSERT(writeDataNodes[i].numSuccedents == 1);
|
||||
RF_ASSERT(unlockDataNodes[i].numAntecedents == 1);
|
||||
writeDataNodes[i].succedents[0] = &unlockDataNodes[i];
|
||||
unlockDataNodes[i].antecedents[0] = &writeDataNodes[i];
|
||||
unlockDataNodes[i].antType[0] = rf_control;
|
||||
|
||||
/* connect unlock nodes to unblock node */
|
||||
RF_ASSERT(unlockDataNodes[i].numSuccedents == 1);
|
||||
RF_ASSERT(unblockNode->numAntecedents == (numDataNodes + (nfaults * numParityNodes)));
|
||||
unlockDataNodes[i].succedents[0] = unblockNode;
|
||||
unblockNode->antecedents[i] = &unlockDataNodes[i];
|
||||
unblockNode->antType[i] = rf_control;
|
||||
}
|
||||
else {
|
||||
/* connect write new data nodes to unblock node */
|
||||
RF_ASSERT(writeDataNodes[i].numSuccedents == 1);
|
||||
RF_ASSERT(unblockNode->numAntecedents == (numDataNodes + (nfaults * numParityNodes)));
|
||||
writeDataNodes[i].succedents[0] = unblockNode;
|
||||
unblockNode->antecedents[i] = &writeDataNodes[i];
|
||||
unblockNode->antType[i] = rf_control;
|
||||
}
|
||||
}
|
||||
|
||||
/* connect write new parity nodes to unblock node */
|
||||
for (i = 0; i < numParityNodes; i++) {
|
||||
RF_ASSERT(lpuNodes[i].numSuccedents == 1);
|
||||
lpuNodes[i].succedents[0] = unblockNode;
|
||||
unblockNode->antecedents[numDataNodes + i] = &lpuNodes[i];
|
||||
unblockNode->antType[numDataNodes + i] = rf_control;
|
||||
}
|
||||
|
||||
/* connect unblock node to terminator */
|
||||
RF_ASSERT(unblockNode->numSuccedents == 1);
|
||||
RF_ASSERT(termNode->numAntecedents == 1);
|
||||
RF_ASSERT(termNode->numSuccedents == 0);
|
||||
unblockNode->succedents[0] = termNode;
|
||||
termNode->antecedents[0] = unblockNode;
|
||||
termNode->antType[0] = rf_control;
|
||||
}
|
||||
|
||||
|
||||
void rf_CreateParityLoggingSmallWriteDAG(
|
||||
RF_Raid_t *raidPtr,
|
||||
RF_AccessStripeMap_t *asmap,
|
||||
RF_DagHeader_t *dag_h,
|
||||
void *bp,
|
||||
RF_RaidAccessFlags_t flags,
|
||||
RF_AllocListElem_t *allocList,
|
||||
RF_RedFuncs_t *pfuncs,
|
||||
RF_RedFuncs_t *qfuncs)
|
||||
{
|
||||
dag_h->creator = "ParityLoggingSmallWriteDAG";
|
||||
rf_CommonCreateParityLoggingSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_xorFuncs, NULL);
|
||||
}
|
||||
|
||||
|
||||
void rf_CreateParityLoggingLargeWriteDAG(
|
||||
RF_Raid_t *raidPtr,
|
||||
RF_AccessStripeMap_t *asmap,
|
||||
RF_DagHeader_t *dag_h,
|
||||
void *bp,
|
||||
RF_RaidAccessFlags_t flags,
|
||||
RF_AllocListElem_t *allocList,
|
||||
int nfaults,
|
||||
int (*redFunc)(RF_DagNode_t *))
|
||||
{
|
||||
dag_h->creator = "ParityLoggingSmallWriteDAG";
|
||||
rf_CommonCreateParityLoggingLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 1, rf_RegularXorFunc);
|
||||
}
|
||||
|
||||
#endif /* RF_INCLUDE_PARITYLOGGING > 0 */
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue