RAIDframe, version 1.1, from the Parallel Data Laboratory at

Carnegie Mellon University.  Full RAID implementation, including
levels 0, 1, 4, 5, 6, parity logging, and a few other goodies.
Ported to NetBSD by Greg Oster.
This commit is contained in:
oster 1998-11-13 04:20:26 +00:00
parent 2f3f9379cf
commit 38a3987b69
153 changed files with 53895 additions and 0 deletions

View File

@ -0,0 +1,294 @@
/* $NetBSD: rf_acctrace.c,v 1.1 1998/11/13 04:20:26 oster Exp $ */
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
*
* Author: Mark Holland
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/*****************************************************************************
*
* acctrace.c -- code to support collecting information about each access
*
*****************************************************************************/
/* :
* Log: rf_acctrace.c,v
* Revision 1.29 1996/07/27 23:36:08 jimz
* Solaris port of simulator
*
* Revision 1.28 1996/07/17 21:00:58 jimz
* clean up timer interface, tracing
*
* Revision 1.27 1996/06/14 14:35:24 jimz
* clean up dfstrace protection
*
* Revision 1.26 1996/06/13 19:09:04 jimz
* remove trace.dat file before beginning
*
* Revision 1.25 1996/06/12 04:41:26 jimz
* tweaks to make genplot work with user-level driver
* (mainly change stat collection)
*
* Revision 1.24 1996/06/10 11:55:47 jimz
* Straightened out some per-array/not-per-array distinctions, fixed
* a couple bugs related to confusion. Added shutdown lists. Removed
* layout shutdown function (now subsumed by shutdown lists).
*
* Revision 1.23 1996/06/09 02:36:46 jimz
* lots of little crufty cleanup- fixup whitespace
* issues, comment #ifdefs, improve typing in some
* places (esp size-related)
*
* Revision 1.22 1996/06/05 18:06:02 jimz
* Major code cleanup. The Great Renaming is now done.
* Better modularity. Better typing. Fixed a bunch of
* synchronization bugs. Made a lot of global stuff
* per-desc or per-array. Removed dead code.
*
* Revision 1.21 1996/05/31 22:26:54 jimz
* fix a lot of mapping problems, memory allocation problems
* found some weird lock issues, fixed 'em
* more code cleanup
*
* Revision 1.20 1996/05/30 23:22:16 jimz
* bugfixes of serialization, timing problems
* more cleanup
*
* Revision 1.19 1996/05/30 12:59:18 jimz
* make etimer happier, more portable
*
* Revision 1.18 1996/05/27 18:56:37 jimz
* more code cleanup
* better typing
* compiles in all 3 environments
*
* Revision 1.17 1996/05/23 00:33:23 jimz
* code cleanup: move all debug decls to rf_options.c, all extern
* debug decls to rf_options.h, all debug vars preceded by rf_
*
* Revision 1.16 1996/05/20 16:15:49 jimz
* switch to rf_{mutex,cond}_{init,destroy}
*
* Revision 1.15 1996/05/18 20:10:00 jimz
* bit of cleanup to compile cleanly in kernel, once again
*
* Revision 1.14 1996/05/18 19:51:34 jimz
* major code cleanup- fix syntax, make some types consistent,
* add prototypes, clean out dead code, et cetera
*
* Revision 1.13 1995/11/30 16:26:43 wvcii
* added copyright info
*
*/
#ifdef _KERNEL
#define KERNEL
#endif
#include "rf_threadstuff.h"
#include "rf_types.h"
#include <sys/stat.h>
#include <sys/types.h>
#ifdef KERNEL
#ifndef __NetBSD__
#include <dfstrace.h>
#endif /* !__NetBSD__ */
#if DFSTRACE > 0
#include <sys/dfs_log.h>
#include <sys/dfstracebuf.h>
#endif /* DFSTRACE > 0 */
#endif /* KERNEL */
#include "rf_debugMem.h"
#include "rf_acctrace.h"
#include "rf_general.h"
#include "rf_raid.h"
#include "rf_etimer.h"
#include "rf_hist.h"
#include "rf_shutdown.h"
#include "rf_sys.h"
static long numTracesSoFar;
static int accessTraceBufCount = 0;
static RF_AccTraceEntry_t *access_tracebuf;
static long traceCount;
int rf_stopCollectingTraces;
RF_DECLARE_MUTEX(rf_tracing_mutex)
int rf_trace_fd;
static void rf_ShutdownAccessTrace(void *);
static void rf_ShutdownAccessTrace(ignored)
void *ignored;
{
if (rf_accessTraceBufSize) {
if (accessTraceBufCount) rf_FlushAccessTraceBuf();
#ifndef KERNEL
close(rf_trace_fd);
#endif /* !KERNEL */
RF_Free(access_tracebuf, rf_accessTraceBufSize * sizeof(RF_AccTraceEntry_t));
}
rf_mutex_destroy(&rf_tracing_mutex);
#if defined(KERNEL) && DFSTRACE > 0
printf("RAIDFRAME: %d trace entries were sent to dfstrace\n",traceCount);
#endif /* KERNEL && DFSTRACE > 0 */
}
int rf_ConfigureAccessTrace(listp)
RF_ShutdownList_t **listp;
{
int rc;
numTracesSoFar = accessTraceBufCount = rf_stopCollectingTraces = 0;
if (rf_accessTraceBufSize) {
RF_Malloc(access_tracebuf, rf_accessTraceBufSize * sizeof(RF_AccTraceEntry_t), (RF_AccTraceEntry_t *));
accessTraceBufCount = 0;
#ifndef KERNEL
rc = unlink("trace.dat");
if (rc && (errno != ENOENT)) {
perror("unlink");
RF_ERRORMSG("Unable to remove existing trace.dat\n");
return(errno);
}
if ((rf_trace_fd = open("trace.dat",O_WRONLY|O_CREAT|O_TRUNC, S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH)) < 0 ) {
perror("Unable to open trace.dat for output");
return(errno);
}
#endif /* !KERNEL */
}
traceCount = 0;
numTracesSoFar = 0;
rc = rf_mutex_init(&rf_tracing_mutex);
if (rc) {
RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__,
__LINE__, rc);
}
rc = rf_ShutdownCreate(listp, rf_ShutdownAccessTrace, NULL);
if (rc) {
RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", __FILE__,
__LINE__, rc);
if (rf_accessTraceBufSize) {
RF_Free(access_tracebuf, rf_accessTraceBufSize * sizeof(RF_AccTraceEntry_t));
#ifndef KERNEL
close(rf_trace_fd);
#endif /* !KERNEL */
rf_mutex_destroy(&rf_tracing_mutex);
}
}
return(rc);
}
/* install a trace record. cause a flush to disk or to the trace collector daemon
* if the trace buffer is at least 1/2 full.
*/
void rf_LogTraceRec(raid, rec)
RF_Raid_t *raid;
RF_AccTraceEntry_t *rec;
{
RF_AccTotals_t *acc = &raid->acc_totals;
#if 0
RF_Etimer_t timer;
int i, n;
#endif
if (rf_stopCollectingTraces || ((rf_maxNumTraces >= 0) && (numTracesSoFar >= rf_maxNumTraces)))
return;
#ifndef KERNEL
if (rf_accessTraceBufSize) {
RF_LOCK_MUTEX(rf_tracing_mutex);
numTracesSoFar++;
bcopy((char *)rec, (char *)&access_tracebuf[ accessTraceBufCount++ ], sizeof(RF_AccTraceEntry_t));
if (accessTraceBufCount == rf_accessTraceBufSize)
rf_FlushAccessTraceBuf();
RF_UNLOCK_MUTEX(rf_tracing_mutex);
}
#endif /* !KERNEL */
#if defined(KERNEL) && DFSTRACE > 0
rec->index = traceCount++;
if (traceon & DFS_TRACE_RAIDFRAME) {
dfs_log(DFS_NOTE, (char *) rec, (int) sizeof(*rec), 0);
}
#endif /* KERNEL && DFSTRACE > 0 */
/* update AccTotals for this device */
if (!raid->keep_acc_totals)
return;
acc->num_log_ents++;
if (rec->reconacc) {
acc->recon_start_to_fetch_us += rec->specific.recon.recon_start_to_fetch_us;
acc->recon_fetch_to_return_us += rec->specific.recon.recon_fetch_to_return_us;
acc->recon_return_to_submit_us += rec->specific.recon.recon_return_to_submit_us;
acc->recon_num_phys_ios += rec->num_phys_ios;
acc->recon_phys_io_us += rec->phys_io_us;
acc->recon_diskwait_us += rec->diskwait_us;
acc->recon_reccount++;
}
else {
RF_HIST_ADD(acc->tot_hist, rec->total_us);
RF_HIST_ADD(acc->dw_hist, rec->diskwait_us);
/* count of physical ios which are too big. often due to thermal recalibration */
/* if bigvals > 0, you should probably ignore this data set */
if (rec->diskwait_us > 100000)
acc->bigvals++;
acc->total_us += rec->total_us;
acc->suspend_ovhd_us += rec->specific.user.suspend_ovhd_us;
acc->map_us += rec->specific.user.map_us;
acc->lock_us += rec->specific.user.lock_us;
acc->dag_create_us += rec->specific.user.dag_create_us;
acc->dag_retry_us += rec->specific.user.dag_retry_us;
acc->exec_us += rec->specific.user.exec_us;
acc->cleanup_us += rec->specific.user.cleanup_us;
acc->exec_engine_us += rec->specific.user.exec_engine_us;
acc->xor_us += rec->xor_us;
acc->q_us += rec->q_us;
acc->plog_us += rec->plog_us;
acc->diskqueue_us += rec->diskqueue_us;
acc->diskwait_us += rec->diskwait_us;
acc->num_phys_ios += rec->num_phys_ios;
acc->phys_io_us = rec->phys_io_us;
acc->user_reccount++;
}
}
/* assumes the tracing mutex is locked at entry. In order to allow this to be called
* from interrupt context, we don't do any copyouts here, but rather just wake trace
* buffer collector thread.
*/
void rf_FlushAccessTraceBuf()
{
#ifndef KERNEL
int size = accessTraceBufCount * sizeof(RF_AccTraceEntry_t);
if (write(rf_trace_fd, (char *) access_tracebuf, size) < size ) {
fprintf(stderr, "Unable to write traces to file. tracing disabled\n");
RF_Free(access_tracebuf, rf_accessTraceBufSize * sizeof(RF_AccTraceEntry_t));
rf_accessTraceBufSize = 0;
close(rf_trace_fd);
}
#endif /* !KERNEL */
accessTraceBufCount = 0;
}

View File

@ -0,0 +1,195 @@
/* $NetBSD: rf_acctrace.h,v 1.1 1998/11/13 04:20:26 oster Exp $ */
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
*
* Author: Mark Holland
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/*****************************************************************************
*
* acctrace.h -- header file for acctrace.c
*
*****************************************************************************/
/* :
*
* Log: rf_acctrace.h,v
* Revision 1.32 1996/08/02 15:12:38 jimz
* remove dead code
*
* Revision 1.31 1996/07/27 14:34:39 jimz
* remove bogus semicolon
*
* Revision 1.30 1996/07/18 22:57:14 jimz
* port simulator to AIX
*
* Revision 1.29 1996/07/17 21:00:58 jimz
* clean up timer interface, tracing
*
* Revision 1.28 1996/06/10 11:55:47 jimz
* Straightened out some per-array/not-per-array distinctions, fixed
* a couple bugs related to confusion. Added shutdown lists. Removed
* layout shutdown function (now subsumed by shutdown lists).
*
* Revision 1.27 1996/06/09 02:36:46 jimz
* lots of little crufty cleanup- fixup whitespace
* issues, comment #ifdefs, improve typing in some
* places (esp size-related)
* /
*
* Revision 1.26 1996/06/05 18:06:02 jimz
* Major code cleanup. The Great Renaming is now done.
* Better modularity. Better typing. Fixed a bunch of
* synchronization bugs. Made a lot of global stuff
* per-desc or per-array. Removed dead code.
*
* Revision 1.25 1996/05/31 22:26:54 jimz
* fix a lot of mapping problems, memory allocation problems
* found some weird lock issues, fixed 'em
* more code cleanup
*
* Revision 1.24 1996/05/30 12:59:18 jimz
* make etimer happier, more portable
*
* Revision 1.23 1996/05/28 12:34:30 jimz
* nail down size of reconacc
*
* Revision 1.22 1996/05/23 00:33:23 jimz
* code cleanup: move all debug decls to rf_options.c, all extern
* debug decls to rf_options.h, all debug vars preceded by rf_
*
* Revision 1.21 1996/05/18 19:51:34 jimz
* major code cleanup- fix syntax, make some types consistent,
* add prototypes, clean out dead code, et cetera
*
* Revision 1.20 1996/05/02 14:57:24 jimz
* change to boolean_t
*
* Revision 1.19 1995/12/14 18:37:06 jimz
* convert to rf_types.h types
*
* Revision 1.18 1995/11/30 16:26:49 wvcii
* added copyright info
*
* Revision 1.17 1995/09/30 19:49:23 jimz
* add AccTotals structure, for capturing totals in kernel
*
* Revision 1.16 1995/09/12 00:20:55 wvcii
* added support for tracing disk queue time
*
* Revision 1.15 95/09/06 19:23:12 wvcii
* increased MAX_IOS_PER_TRACE_ENTRY from 1 to 4
*
*/
#ifndef _RF__RF_ACCTRACE_H_
#define _RF__RF_ACCTRACE_H_
#include "rf_types.h"
#include "rf_hist.h"
#include "rf_etimer.h"
typedef struct RF_user_acc_stats_s {
RF_uint64 suspend_ovhd_us; /* us spent mucking in the access-suspension code */
RF_uint64 map_us; /* us spent mapping the access */
RF_uint64 lock_us; /* us spent locking & unlocking stripes, including time spent blocked */
RF_uint64 dag_create_us; /* us spent creating the DAGs */
RF_uint64 dag_retry_us; /* _total_ us spent retrying the op -- not broken down into components */
RF_uint64 exec_us; /* us spent in DispatchDAG */
RF_uint64 exec_engine_us; /* us spent in engine, not including blocking time */
RF_uint64 cleanup_us; /* us spent tearing down the dag & maps, and generally cleaning up */
} RF_user_acc_stats_t;
typedef struct RF_recon_acc_stats_s {
RF_uint32 recon_start_to_fetch_us;
RF_uint32 recon_fetch_to_return_us;
RF_uint32 recon_return_to_submit_us;
} RF_recon_acc_stats_t;
typedef struct RF_acctrace_entry_s {
union {
RF_user_acc_stats_t user;
RF_recon_acc_stats_t recon;
} specific;
RF_uint8 reconacc; /* whether this is a tracerec for a user acc or a recon acc */
RF_uint64 xor_us; /* us spent doing XORs */
RF_uint64 q_us; /* us spent doing XORs */
RF_uint64 plog_us; /* us spent waiting to stuff parity into log */
RF_uint64 diskqueue_us; /* _total_ us spent in disk queue(s), incl concurrent ops */
RF_uint64 diskwait_us; /* _total_ us spent waiting actually waiting on the disk, incl concurrent ops */
RF_uint64 total_us; /* total us spent on this access */
RF_uint64 num_phys_ios; /* number of physical I/Os invoked */
RF_uint64 phys_io_us; /* time of physical I/O */
RF_Etimer_t tot_timer; /* a timer used to compute total access time */
RF_Etimer_t timer; /* a generic timer val for timing events that live across procedure boundaries */
RF_Etimer_t recon_timer; /* generic timer for recon stuff */
RF_uint64 index;
} RF_AccTraceEntry_t;
typedef struct RF_AccTotals_s {
/* user acc stats */
RF_uint64 suspend_ovhd_us;
RF_uint64 map_us;
RF_uint64 lock_us;
RF_uint64 dag_create_us;
RF_uint64 dag_retry_us;
RF_uint64 exec_us;
RF_uint64 exec_engine_us;
RF_uint64 cleanup_us;
RF_uint64 user_reccount;
/* recon acc stats */
RF_uint64 recon_start_to_fetch_us;
RF_uint64 recon_fetch_to_return_us;
RF_uint64 recon_return_to_submit_us;
RF_uint64 recon_io_overflow_count;
RF_uint64 recon_phys_io_us;
RF_uint64 recon_num_phys_ios;
RF_uint64 recon_diskwait_us;
RF_uint64 recon_reccount;
/* trace entry stats */
RF_uint64 xor_us;
RF_uint64 q_us;
RF_uint64 plog_us;
RF_uint64 diskqueue_us;
RF_uint64 diskwait_us;
RF_uint64 total_us;
RF_uint64 num_log_ents;
RF_uint64 phys_io_overflow_count;
RF_uint64 num_phys_ios;
RF_uint64 phys_io_us;
RF_uint64 bigvals;
/* histograms */
RF_Hist_t dw_hist[RF_HIST_NUM_BUCKETS];
RF_Hist_t tot_hist[RF_HIST_NUM_BUCKETS];
} RF_AccTotals_t;
#if RF_UTILITY == 0
RF_DECLARE_EXTERN_MUTEX(rf_tracing_mutex)
#endif /* RF_UTILITY == 0 */
int rf_ConfigureAccessTrace(RF_ShutdownList_t **listp);
void rf_LogTraceRec(RF_Raid_t *raid, RF_AccTraceEntry_t *rec);
void rf_FlushAccessTraceBuf(void);
#endif /* !_RF__RF_ACCTRACE_H_ */

View File

@ -0,0 +1,293 @@
/* $NetBSD: rf_alloclist.c,v 1.1 1998/11/13 04:20:26 oster Exp $ */
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
*
* Author: Mark Holland
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/*
* Log: rf_alloclist.c,v
* Revision 1.28 1996/07/27 23:36:08 jimz
* Solaris port of simulator
*
* Revision 1.27 1996/06/12 03:29:54 jimz
* don't barf just because we can't create an alloclist
*
* Revision 1.26 1996/06/10 11:55:47 jimz
* Straightened out some per-array/not-per-array distinctions, fixed
* a couple bugs related to confusion. Added shutdown lists. Removed
* layout shutdown function (now subsumed by shutdown lists).
*
* Revision 1.25 1996/06/09 02:36:46 jimz
* lots of little crufty cleanup- fixup whitespace
* issues, comment #ifdefs, improve typing in some
* places (esp size-related)
*
* Revision 1.24 1996/06/05 18:06:02 jimz
* Major code cleanup. The Great Renaming is now done.
* Better modularity. Better typing. Fixed a bunch of
* synchronization bugs. Made a lot of global stuff
* per-desc or per-array. Removed dead code.
*
* Revision 1.23 1996/05/30 23:22:16 jimz
* bugfixes of serialization, timing problems
* more cleanup
*
* Revision 1.22 1996/05/27 18:56:37 jimz
* more code cleanup
* better typing
* compiles in all 3 environments
*
* Revision 1.21 1996/05/23 21:46:35 jimz
* checkpoint in code cleanup (release prep)
* lots of types, function names have been fixed
*
* Revision 1.20 1996/05/20 16:15:59 jimz
* switch to rf_{mutex,cond}_{init,destroy}
*
* Revision 1.19 1996/05/18 19:51:34 jimz
* major code cleanup- fix syntax, make some types consistent,
* add prototypes, clean out dead code, et cetera
*
* Revision 1.18 1996/05/16 22:27:45 jimz
* get rid of surreal_MakeAllocList (what was that, anyway?)
*
* Revision 1.17 1995/12/12 18:10:06 jimz
* MIN -> RF_MIN, MAX -> RF_MAX, ASSERT -> RF_ASSERT
* fix 80-column brain damage in comments
*
* Revision 1.16 1995/11/30 16:27:07 wvcii
* added copyright info
*
* Revision 1.15 1995/10/05 20:37:56 jimz
* assert non-NULLness of pointer to FREE in FreeAllocList()
*
* Revision 1.14 1995/06/11 20:11:24 holland
* changed fl_hist,miss_count from long to int to get around weird kernel bug
*
* Revision 1.13 1995/05/01 13:28:00 holland
* parity range locks, locking disk requests, recon+parityscan in kernel, etc.
*
* Revision 1.12 1995/04/21 19:13:04 holland
* minor change to avoid a syntax error on DO_FREE
*
* Revision 1.11 1995/02/17 19:39:56 holland
* added size param to all calls to Free().
* this is ignored at user level, but necessary in the kernel.
*
* Revision 1.10 1995/02/10 18:08:07 holland
* added DO_FREE macro to fix what I broke during kernelization
*
* Revision 1.9 1995/02/10 17:34:10 holland
* kernelization changes
*
* Revision 1.8 1995/02/03 22:31:36 holland
* many changes related to kernelization
*
* Revision 1.7 1995/02/01 15:13:05 holland
* moved #include of general.h out of raid.h and into each file
*
* Revision 1.6 1995/01/11 19:27:02 holland
* many changes related to performance tuning
*
* Revision 1.5 1994/11/29 20:53:10 danner
* Marks mods
*
* Revision 1.3 1994/11/19 21:01:07 danner
* First merge with mark
*
* Revision 1.1.1.1 1994/11/19 20:23:38 danner
* First PQ checkin
*
* Revision 1.2 1994/11/16 15:45:35 danner
* fixed free bug in FreeAllocList
*
*
*/
/****************************************************************************
*
* Alloclist.c -- code to manipulate allocation lists
*
* an allocation list is just a list of AllocListElem structures. Each
* such structure contains a fixed-size array of pointers. Calling
* FreeAList() causes each pointer to be freed.
*
***************************************************************************/
#include "rf_types.h"
#include "rf_threadstuff.h"
#include "rf_alloclist.h"
#include "rf_debugMem.h"
#include "rf_etimer.h"
#include "rf_general.h"
#include "rf_shutdown.h"
#include "rf_sys.h"
RF_DECLARE_STATIC_MUTEX(alist_mutex)
static unsigned int fl_hit_count, fl_miss_count;
static RF_AllocListElem_t *al_free_list=NULL;
static int al_free_list_count;
#define RF_AL_FREELIST_MAX 256
#ifndef KERNEL
#define DO_FREE(_p,_sz) free((_p))
#else /* !KERNEL */
#define DO_FREE(_p,_sz) RF_Free((_p),(_sz))
#endif /* !KERNEL */
static void rf_ShutdownAllocList(void *);
static void rf_ShutdownAllocList(ignored)
void *ignored;
{
RF_AllocListElem_t *p, *pt;
for (p = al_free_list; p; ) {
pt = p;
p = p->next;
DO_FREE(pt, sizeof(*pt));
}
rf_mutex_destroy(&alist_mutex);
/*
printf("Alloclist: Free list hit count %lu (%lu %%) miss count %lu (%lu %%)\n",
fl_hit_count, (100*fl_hit_count)/(fl_hit_count+fl_miss_count),
fl_miss_count, (100*fl_miss_count)/(fl_hit_count+fl_miss_count));
*/
}
int rf_ConfigureAllocList(listp)
RF_ShutdownList_t **listp;
{
int rc;
rc = rf_mutex_init(&alist_mutex);
if (rc) {
RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__,
__LINE__, rc);
return(rc);
}
al_free_list = NULL;
fl_hit_count = fl_miss_count = al_free_list_count = 0;
rc = rf_ShutdownCreate(listp, rf_ShutdownAllocList, NULL);
if (rc) {
RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n",
__FILE__, __LINE__, rc);
rf_mutex_destroy(&alist_mutex);
return(rc);
}
return(0);
}
/* we expect the lists to have at most one or two elements, so we're willing
* to search for the end. If you ever observe the lists growing longer,
* increase POINTERS_PER_ALLOC_LIST_ELEMENT.
*/
void rf_real_AddToAllocList(l, p, size, lockflag)
RF_AllocListElem_t *l;
void *p;
int size;
int lockflag;
{
RF_AllocListElem_t *newelem;
for ( ; l->next; l=l->next)
RF_ASSERT(l->numPointers == RF_POINTERS_PER_ALLOC_LIST_ELEMENT); /* find end of list */
RF_ASSERT(l->numPointers >= 0 && l->numPointers <= RF_POINTERS_PER_ALLOC_LIST_ELEMENT);
if (l->numPointers == RF_POINTERS_PER_ALLOC_LIST_ELEMENT) {
newelem = rf_real_MakeAllocList(lockflag);
l->next = newelem;
l = newelem;
}
l->pointers[ l->numPointers ] = p;
l->sizes [ l->numPointers ] = size;
l->numPointers++;
}
/* we use the debug_mem_mutex here because we need to lock it anyway to call free.
* this is probably a bug somewhere else in the code, but when I call malloc/free
* outside of any lock I have endless trouble with malloc appearing to return the
* same pointer twice. Since we have to lock it anyway, we might as well use it
* as the lock around the al_free_list. Note that we can't call Free with the
* debug_mem_mutex locked.
*/
void rf_FreeAllocList(l)
RF_AllocListElem_t *l;
{
int i;
RF_AllocListElem_t *temp, *p;
for (p=l; p; p=p->next) {
RF_ASSERT(p->numPointers >= 0 && p->numPointers <= RF_POINTERS_PER_ALLOC_LIST_ELEMENT);
for (i=0; i<p->numPointers; i++) {
RF_ASSERT(p->pointers[i]);
RF_Free(p->pointers[i], p->sizes[i]);
}
}
#ifndef KERNEL
RF_LOCK_MUTEX(rf_debug_mem_mutex);
#endif /* !KERNEL */
while (l) {
temp = l;
l = l->next;
if (al_free_list_count > RF_AL_FREELIST_MAX) {DO_FREE(temp, sizeof(*temp));}
else {temp->next = al_free_list; al_free_list = temp; al_free_list_count++;}
}
#ifndef KERNEL
RF_UNLOCK_MUTEX(rf_debug_mem_mutex);
#endif /* !KERNEL */
}
RF_AllocListElem_t *rf_real_MakeAllocList(lockflag)
int lockflag;
{
RF_AllocListElem_t *p;
#ifndef KERNEL
if (lockflag) { RF_LOCK_MUTEX(rf_debug_mem_mutex); }
#endif /* !KERNEL */
if (al_free_list) {fl_hit_count++; p = al_free_list; al_free_list = p->next; al_free_list_count--;}
else {
fl_miss_count++;
#ifndef KERNEL
p = (RF_AllocListElem_t *) malloc(sizeof(RF_AllocListElem_t)); /* can't use Malloc at user level b/c we already locked the mutex */
#else /* !KERNEL */
RF_Malloc(p, sizeof(RF_AllocListElem_t), (RF_AllocListElem_t *)); /* no allocation locking in kernel, so this is fine */
#endif /* !KERNEL */
}
#ifndef KERNEL
if (lockflag) { RF_UNLOCK_MUTEX(rf_debug_mem_mutex); }
#endif /* !KERNEL */
if (p == NULL) {
return(NULL);
}
bzero((char *)p, sizeof(RF_AllocListElem_t));
return(p);
}

View File

@ -0,0 +1,83 @@
/* $NetBSD: rf_alloclist.h,v 1.1 1998/11/13 04:20:26 oster Exp $ */
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
*
* Author: Mark Holland
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/****************************************************************************
*
* alloclist.h -- header file for alloclist.c
*
***************************************************************************/
/* :
* Log: rf_alloclist.h,v
* Revision 1.11 1996/07/18 22:57:14 jimz
* port simulator to AIX
*
* Revision 1.10 1996/06/10 11:55:47 jimz
* Straightened out some per-array/not-per-array distinctions, fixed
* a couple bugs related to confusion. Added shutdown lists. Removed
* layout shutdown function (now subsumed by shutdown lists).
*
* Revision 1.9 1996/05/23 21:46:35 jimz
* checkpoint in code cleanup (release prep)
* lots of types, function names have been fixed
*
* Revision 1.8 1996/05/18 19:51:34 jimz
* major code cleanup- fix syntax, make some types consistent,
* add prototypes, clean out dead code, et cetera
*
* Revision 1.7 1995/11/30 16:27:13 wvcii
* added copyright info
*
*/
#ifndef _RF__RF_ALLOCLIST_H_
#define _RF__RF_ALLOCLIST_H_
#include "rf_types.h"
#define RF_POINTERS_PER_ALLOC_LIST_ELEMENT 20
struct RF_AllocListElem_s {
void *pointers[RF_POINTERS_PER_ALLOC_LIST_ELEMENT];
int sizes[RF_POINTERS_PER_ALLOC_LIST_ELEMENT];
int numPointers;
RF_AllocListElem_t *next;
};
#define rf_MakeAllocList(_ptr_) _ptr_ = rf_real_MakeAllocList(1);
#define rf_AddToAllocList(_l_,_ptr_,_sz_) rf_real_AddToAllocList((_l_), (_ptr_), (_sz_), 1)
int rf_ConfigureAllocList(RF_ShutdownList_t **listp);
#if RF_UTILITY == 0
void rf_real_AddToAllocList(RF_AllocListElem_t *l, void *p, int size, int lockflag);
void rf_FreeAllocList(RF_AllocListElem_t *l);
RF_AllocListElem_t *rf_real_MakeAllocList(int lockflag);
#endif /* RF_UTILITY == 0 */
#endif /* !_RF__RF_ALLOCLIST_H_ */

View File

@ -0,0 +1,210 @@
/* $NetBSD: rf_archs.h,v 1.1 1998/11/13 04:20:26 oster Exp $ */
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
*
* Author: Mark Holland
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/* rf_archs.h -- defines for which architectures you want to
* include is some particular build of raidframe. Unfortunately,
* it's difficult to exclude declustering, P+Q, and distributed
* sparing because the code is intermixed with RAID5 code. This
* should be fixed.
*
* this is really intended only for use in the kernel, where I
* am worried about the size of the object module. At user level and
* in the simulator, I don't really care that much, so all the
* architectures can be compiled together. Note that by itself, turning
* off these defines does not affect the size of the executable; you
* have to edit the makefile for that.
*
* comment out any line below to eliminate that architecture.
* the list below includes all the modules that can be compiled
* out.
*
* :
* Log: rf_archs.h,v
* Revision 1.32 1996/08/20 23:05:40 jimz
* define RF_KEEP_DISKSTATS to 1
*
* Revision 1.31 1996/07/31 15:34:04 jimz
* include evenodd
*
* Revision 1.30 1996/07/27 23:36:08 jimz
* Solaris port of simulator
*
* Revision 1.29 1996/07/26 20:11:46 jimz
* only define RF_DEMO for CMU_PDL
*
* Revision 1.28 1996/07/26 20:10:57 jimz
* define RF_CMU_PDL only if it isn't already defined
*
* Revision 1.27 1996/07/18 22:57:14 jimz
* port simulator to AIX
*
* Revision 1.26 1996/06/17 14:38:33 jimz
* properly #if out RF_DEMO code
* fix bug in MakeConfig that was causing weird behavior
* in configuration routines (config was not zeroed at start)
* clean up genplot handling of stacks
*
* Revision 1.25 1996/06/14 21:24:59 jimz
* turn on RF_CMU_PDL by default
*
* Revision 1.24 1996/06/13 20:41:57 jimz
* add RF_INCLUDE_QUEUE_RANDOM (0)
*
* Revision 1.23 1996/06/11 18:12:36 jimz
* get rid of JOIN operations
* use ThreadGroup stuff instead
* fix some allocation/deallocation and sync bugs
*
* Revision 1.22 1996/06/10 22:24:55 wvcii
* added symbols for enabling forward or backward error
* recovery experiments
*
* Revision 1.21 1996/06/05 18:06:02 jimz
* Major code cleanup. The Great Renaming is now done.
* Better modularity. Better typing. Fixed a bunch of
* synchronization bugs. Made a lot of global stuff
* per-desc or per-array. Removed dead code.
*
* Revision 1.20 1996/05/30 11:29:41 jimz
* Numerous bug fixes. Stripe lock release code disagreed with the taking code
* about when stripes should be locked (I made it consistent: no parity, no lock)
* There was a lot of extra serialization of I/Os which I've removed- a lot of
* it was to calculate values for the cache code, which is no longer with us.
* More types, function, macro cleanup. Added code to properly quiesce the array
* on shutdown. Made a lot of stuff array-specific which was (bogusly) general
* before. Fixed memory allocation, freeing bugs.
*
* Revision 1.19 1996/05/27 18:56:37 jimz
* more code cleanup
* better typing
* compiles in all 3 environments
*
* Revision 1.18 1996/05/23 21:46:35 jimz
* checkpoint in code cleanup (release prep)
* lots of types, function names have been fixed
*
* Revision 1.17 1996/05/23 00:33:23 jimz
* code cleanup: move all debug decls to rf_options.c, all extern
* debug decls to rf_options.h, all debug vars preceded by rf_
*
* Revision 1.16 1996/05/18 19:51:34 jimz
* major code cleanup- fix syntax, make some types consistent,
* add prototypes, clean out dead code, et cetera
*
* Revision 1.15 1996/05/15 22:32:59 jimz
* remove cache and vs stuff
*
* Revision 1.14 1995/11/30 16:27:34 wvcii
* added copyright info
*
* Revision 1.13 1995/11/28 21:23:44 amiri
* added the interleaved declustering architecture
* ('I'), with distributed sparing.
*
* Revision 1.12 1995/11/17 16:59:45 amiri
* don't INCLUDE_CHAINDECLUSTER in the kernel
* source.
*
* Revision 1.11 1995/11/16 16:15:21 amiri
* don't include RAID5 with rotated sparing (INCLUDE_RAID5_RS) in kernel
*
* Revision 1.10 1995/10/12 17:40:47 jimz
* define INCLUDE_LS
*
* Revision 1.9 1995/10/11 06:56:47 jimz
* define INCLUDE_VS (sanity check for compilation)
*
* Revision 1.8 1995/10/05 18:56:24 jimz
* don't INCLUDE_VS
*
* Revision 1.7 1995/10/04 03:51:20 wvcii
* added raid 1
*
* Revision 1.6 1995/09/07 09:59:29 wvcii
* unstable archs conditionally defined for !KERNEL makes
*
*
*/
#ifndef _RF__RF_ARCHS_H_
#define _RF__RF_ARCHS_H_
/*
* Turn off if you do not have CMU PDL support compiled
* into your kernel.
*/
#ifndef RF_CMU_PDL
#define RF_CMU_PDL 0
#endif /* !RF_CMU_PDL */
/*
* Khalil's performance-displaying demo stuff.
* Relies on CMU meter tools.
*/
#ifndef KERNEL
#if RF_CMU_PDL > 0
#define RF_DEMO 1
#endif /* RF_CMU_PDL > 0 */
#endif /* !KERNEL */
#define RF_INCLUDE_EVENODD 1
#define RF_INCLUDE_RAID5_RS 1
#define RF_INCLUDE_PARITYLOGGING 1
#define RF_INCLUDE_CHAINDECLUSTER 1
#define RF_INCLUDE_INTERDECLUSTER 1
#define RF_INCLUDE_RAID0 1
#define RF_INCLUDE_RAID1 1
#define RF_INCLUDE_RAID4 1
#define RF_INCLUDE_RAID5 1
#define RF_INCLUDE_RAID6 0
#define RF_INCLUDE_DECL_PQ 0
#define RF_MEMORY_REDZONES 0
#define RF_RECON_STATS 1
#define RF_INCLUDE_QUEUE_RANDOM 0
#define RF_KEEP_DISKSTATS 1
/* These two symbols enable nonstandard forms of error recovery.
* These modes are only valid for performance measurements and
* data corruption will occur if an error occurs when either
* forward or backward error recovery are enabled. In general
* both of the following two definitions should be commented
* out--this forces RAIDframe to use roll-away error recovery
* which does guarantee proper error recovery without data corruption
*/
/* #define RF_FORWARD 1 */
/* #define RF_BACKWARD 1 */
#include "rf_options.h"
#endif /* !_RF__RF_ARCHS_H_ */

View File

@ -0,0 +1,617 @@
/* $NetBSD: rf_aselect.c,v 1.1 1998/11/13 04:20:26 oster Exp $ */
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
*
* Author: Mark Holland, William V. Courtright II
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/*****************************************************************************
*
* aselect.c -- algorithm selection code
*
*****************************************************************************/
/*
* :
* Log: rf_aselect.c,v
* Revision 1.35 1996/07/28 20:31:39 jimz
* i386netbsd port
* true/false fixup
*
* Revision 1.34 1996/07/27 18:39:39 jimz
* cleanup sweep
*
* Revision 1.33 1996/07/22 19:52:16 jimz
* switched node params to RF_DagParam_t, a union of
* a 64-bit int and a void *, for better portability
* attempted hpux port, but failed partway through for
* lack of a single C compiler capable of compiling all
* source files
*
* Revision 1.32 1996/06/12 03:29:40 jimz
* Note: things that call InitHdrNode should check
* for successful return.
*
* Revision 1.31 1996/06/07 21:33:04 jimz
* begin using consistent types for sector numbers,
* stripe numbers, row+col numbers, recon unit numbers
*
* Revision 1.30 1996/06/05 18:06:02 jimz
* Major code cleanup. The Great Renaming is now done.
* Better modularity. Better typing. Fixed a bunch of
* synchronization bugs. Made a lot of global stuff
* per-desc or per-array. Removed dead code.
*
* Revision 1.29 1996/05/31 22:26:54 jimz
* fix a lot of mapping problems, memory allocation problems
* found some weird lock issues, fixed 'em
* more code cleanup
*
* Revision 1.28 1996/05/30 11:29:41 jimz
* Numerous bug fixes. Stripe lock release code disagreed with the taking code
* about when stripes should be locked (I made it consistent: no parity, no lock)
* There was a lot of extra serialization of I/Os which I've removed- a lot of
* it was to calculate values for the cache code, which is no longer with us.
* More types, function, macro cleanup. Added code to properly quiesce the array
* on shutdown. Made a lot of stuff array-specific which was (bogusly) general
* before. Fixed memory allocation, freeing bugs.
*
* Revision 1.27 1996/05/27 18:56:37 jimz
* more code cleanup
* better typing
* compiles in all 3 environments
*
* Revision 1.26 1996/05/24 22:17:04 jimz
* continue code + namespace cleanup
* typed a bunch of flags
*
* Revision 1.25 1996/05/24 04:28:55 jimz
* release cleanup ckpt
*
* Revision 1.24 1996/05/23 21:46:35 jimz
* checkpoint in code cleanup (release prep)
* lots of types, function names have been fixed
*
* Revision 1.23 1996/05/23 00:33:23 jimz
* code cleanup: move all debug decls to rf_options.c, all extern
* debug decls to rf_options.h, all debug vars preceded by rf_
*
* Revision 1.22 1996/05/18 19:51:34 jimz
* major code cleanup- fix syntax, make some types consistent,
* add prototypes, clean out dead code, et cetera
*
* Revision 1.21 1996/05/08 21:01:24 jimz
* fixed up enum type names that were conflicting with other
* enums and function names (ie, "panic")
* future naming trends will be towards RF_ and rf_ for
* everything raidframe-related
*
* Revision 1.20 1996/05/03 19:45:35 wvcii
* removed includes of old deg creation files
* updated SelectAlgorithm comments
*
* Revision 1.19 1995/12/12 18:10:06 jimz
* MIN -> RF_MIN, MAX -> RF_MAX, ASSERT -> RF_ASSERT
* fix 80-column brain damage in comments
*
* Revision 1.18 1995/11/30 16:27:48 wvcii
* added copyright info
*
* Revision 1.17 1995/11/19 16:25:55 wvcii
* SelectAlgorithm now creates an array, returned in desc->dagArray
* return value is now int (1 = FAIL)
*
* Revision 1.16 1995/11/17 15:09:58 wvcii
* fixed bug in SelectAlgorithm in which multiple graphs per stripe are required
*
* Revision 1.15 1995/11/07 17:12:42 wvcii
* changed SelectAlgorithm as follows:
*
* dag creation funcs now create term nodes
* dag selection funcs no longer return numHdrSucc, numTermAnt
* there is now one dag hdr for each dag in a request, implying
* that SelectAlgorithm now returns a linked list of dag hdrs
*
*/
#include "rf_archs.h"
#include "rf_types.h"
#include "rf_raid.h"
#include "rf_dag.h"
#include "rf_dagutils.h"
#include "rf_dagfuncs.h"
#include "rf_general.h"
#include "rf_desc.h"
#include "rf_map.h"
#if defined(__NetBSD__) && defined(_KERNEL)
/* the function below is not used... so don't define it! */
#else
static void TransferDagMemory(RF_DagHeader_t *, RF_DagHeader_t *);
#endif
static int InitHdrNode(RF_DagHeader_t **, RF_Raid_t *, int);
static void UpdateNodeHdrPtr(RF_DagHeader_t *, RF_DagNode_t *);
int rf_SelectAlgorithm(RF_RaidAccessDesc_t *, RF_RaidAccessFlags_t );
/******************************************************************************
*
* Create and Initialiaze a dag header and termination node
*
*****************************************************************************/
static int InitHdrNode(hdr, raidPtr, memChunkEnable)
RF_DagHeader_t **hdr;
RF_Raid_t *raidPtr;
int memChunkEnable;
{
/* create and initialize dag hdr */
*hdr = rf_AllocDAGHeader();
rf_MakeAllocList((*hdr)->allocList);
if ((*hdr)->allocList == NULL) {
rf_FreeDAGHeader(*hdr);
return(ENOMEM);
}
(*hdr)->status = rf_enable;
(*hdr)->numSuccedents = 0;
(*hdr)->raidPtr = raidPtr;
(*hdr)->next = NULL;
return(0);
}
/******************************************************************************
*
* Transfer allocation list and mem chunks from one dag to another
*
*****************************************************************************/
#if defined(__NetBSD__) && defined(_KERNEL)
/* the function below is not used... so don't define it! */
#else
static void TransferDagMemory(daga, dagb)
RF_DagHeader_t *daga;
RF_DagHeader_t *dagb;
{
RF_AccessStripeMapHeader_t *end;
RF_AllocListElem_t *p;
int i, memChunksXfrd = 0, xtraChunksXfrd = 0;
/* transfer allocList from dagb to daga */
for (p = dagb->allocList; p ; p = p->next)
{
for (i = 0; i < p->numPointers; i++)
{
rf_AddToAllocList(daga->allocList, p->pointers[i], p->sizes[i]);
p->pointers[i] = NULL;
p->sizes[i] = 0;
}
p->numPointers = 0;
}
/* transfer chunks from dagb to daga */
while ((memChunksXfrd + xtraChunksXfrd < dagb->chunkIndex + dagb->xtraChunkIndex) && (daga->chunkIndex < RF_MAXCHUNKS))
{
/* stuff chunks into daga's memChunk array */
if (memChunksXfrd < dagb->chunkIndex)
{
daga->memChunk[daga->chunkIndex++] = dagb->memChunk[memChunksXfrd];
dagb->memChunk[memChunksXfrd++] = NULL;
}
else
{
daga->memChunk[daga->xtraChunkIndex++] = dagb->xtraMemChunk[xtraChunksXfrd];
dagb->xtraMemChunk[xtraChunksXfrd++] = NULL;
}
}
/* use escape hatch to hold excess chunks */
while (memChunksXfrd + xtraChunksXfrd < dagb->chunkIndex + dagb->xtraChunkIndex) {
if (memChunksXfrd < dagb->chunkIndex)
{
daga->xtraMemChunk[daga->xtraChunkIndex++] = dagb->memChunk[memChunksXfrd];
dagb->memChunk[memChunksXfrd++] = NULL;
}
else
{
daga->xtraMemChunk[daga->xtraChunkIndex++] = dagb->xtraMemChunk[xtraChunksXfrd];
dagb->xtraMemChunk[xtraChunksXfrd++] = NULL;
}
}
RF_ASSERT((memChunksXfrd == dagb->chunkIndex) && (xtraChunksXfrd == dagb->xtraChunkIndex));
RF_ASSERT(daga->chunkIndex <= RF_MAXCHUNKS);
RF_ASSERT(daga->xtraChunkIndex <= daga->xtraChunkCnt);
dagb->chunkIndex = 0;
dagb->xtraChunkIndex = 0;
/* transfer asmList from dagb to daga */
if (dagb->asmList)
{
if (daga->asmList)
{
end = daga->asmList;
while (end->next)
end = end->next;
end->next = dagb->asmList;
}
else
daga->asmList = dagb->asmList;
dagb->asmList = NULL;
}
}
#endif /* __NetBSD__ */
/*****************************************************************************************
*
* Ensure that all node->dagHdr fields in a dag are consistent
*
* IMPORTANT: This routine recursively searches all succedents of the node. If a
* succedent is encountered whose dagHdr ptr does not require adjusting, that node's
* succedents WILL NOT BE EXAMINED.
*
****************************************************************************************/
static void UpdateNodeHdrPtr(hdr, node)
RF_DagHeader_t *hdr;
RF_DagNode_t *node;
{
int i;
RF_ASSERT(hdr != NULL && node != NULL);
for (i = 0; i < node->numSuccedents; i++)
if (node->succedents[i]->dagHdr != hdr)
UpdateNodeHdrPtr(hdr, node->succedents[i]);
node->dagHdr = hdr;
}
/******************************************************************************
*
* Create a DAG to do a read or write operation.
*
* create an array of dagLists, one list per parity stripe.
* return the lists in the array desc->dagArray.
*
* Normally, each list contains one dag for the entire stripe. In some
* tricky cases, we break this into multiple dags, either one per stripe
* unit or one per block (sector). When this occurs, these dags are returned
* as a linked list (dagList) which is executed sequentially (to preserve
* atomic parity updates in the stripe).
*
* dags which operate on independent parity goups (stripes) are returned in
* independent dagLists (distinct elements in desc->dagArray) and may be
* executed concurrently.
*
* Finally, if the SelectionFunc fails to create a dag for a block, we punt
* and return 1.
*
* The above process is performed in two phases:
* 1) create an array(s) of creation functions (eg stripeFuncs)
* 2) create dags and concatenate/merge to form the final dag.
*
* Because dag's are basic blocks (single entry, single exit, unconditional
* control flow, we can add the following optimizations (future work):
* first-pass optimizer to allow max concurrency (need all data dependencies)
* second-pass optimizer to eliminate common subexpressions (need true
* data dependencies)
* third-pass optimizer to eliminate dead code (need true data dependencies)
*****************************************************************************/
#define MAXNSTRIPES 50
int rf_SelectAlgorithm(desc, flags)
RF_RaidAccessDesc_t *desc;
RF_RaidAccessFlags_t flags;
{
RF_AccessStripeMapHeader_t *asm_h = desc->asmap;
RF_IoType_t type = desc->type;
RF_Raid_t *raidPtr = desc->raidPtr;
void *bp = desc->bp;
RF_AccessStripeMap_t *asmap = asm_h->stripeMap;
RF_AccessStripeMap_t *asm_p;
RF_DagHeader_t *dag_h = NULL, *tempdag_h, *lastdag_h;
int i, j, k;
RF_VoidFuncPtr *stripeFuncs, normalStripeFuncs[MAXNSTRIPES];
RF_AccessStripeMap_t *asm_up, *asm_bp;
RF_AccessStripeMapHeader_t ***asmh_u, *endASMList;
RF_AccessStripeMapHeader_t ***asmh_b;
RF_VoidFuncPtr **stripeUnitFuncs, uFunc;
RF_VoidFuncPtr **blockFuncs, bFunc;
int numStripesBailed = 0, cantCreateDAGs = RF_FALSE;
int numStripeUnitsBailed = 0;
int stripeNum, numUnitDags = 0, stripeUnitNum, numBlockDags = 0;
RF_StripeNum_t numStripeUnits;
RF_SectorNum_t numBlocks;
RF_RaidAddr_t address;
int length;
RF_PhysDiskAddr_t *physPtr;
caddr_t buffer;
lastdag_h = NULL;
asmh_u = asmh_b = NULL;
stripeUnitFuncs = NULL;
blockFuncs = NULL;
/* get an array of dag-function creation pointers, try to avoid calling malloc */
if (asm_h->numStripes <= MAXNSTRIPES) stripeFuncs = normalStripeFuncs;
else RF_Calloc(stripeFuncs, asm_h->numStripes, sizeof(RF_VoidFuncPtr), (RF_VoidFuncPtr *));
/* walk through the asm list once collecting information */
/* attempt to find a single creation function for each stripe */
desc->numStripes = 0;
for (i=0,asm_p = asmap; asm_p; asm_p=asm_p->next,i++) {
desc->numStripes++;
(raidPtr->Layout.map->SelectionFunc)(raidPtr, type, asm_p, &stripeFuncs[i]);
/* check to see if we found a creation func for this stripe */
if (stripeFuncs[i] == (RF_VoidFuncPtr) NULL)
{
/* could not find creation function for entire stripe
so, let's see if we can find one for each stripe unit in the stripe */
if (numStripesBailed == 0)
{
/* one stripe map header for each stripe we bail on */
RF_Malloc(asmh_u, sizeof(RF_AccessStripeMapHeader_t **) * asm_h->numStripes, (RF_AccessStripeMapHeader_t ***));
/* create an array of ptrs to arrays of stripeFuncs */
RF_Calloc(stripeUnitFuncs, asm_h->numStripes, sizeof(RF_VoidFuncPtr), (RF_VoidFuncPtr **));
}
/* create an array of creation funcs (called stripeFuncs) for this stripe */
numStripeUnits = asm_p->numStripeUnitsAccessed;
RF_Calloc(stripeUnitFuncs[numStripesBailed], numStripeUnits, sizeof(RF_VoidFuncPtr), (RF_VoidFuncPtr *));
RF_Malloc(asmh_u[numStripesBailed], numStripeUnits * sizeof(RF_AccessStripeMapHeader_t *), (RF_AccessStripeMapHeader_t **));
/* lookup array of stripeUnitFuncs for this stripe */
for (j=0, physPtr = asm_p->physInfo; physPtr; physPtr = physPtr->next, j++)
{
/* remap for series of single stripe-unit accesses */
address = physPtr->raidAddress;
length = physPtr->numSector;
buffer = physPtr->bufPtr;
asmh_u[numStripesBailed][j] = rf_MapAccess(raidPtr, address, length, buffer, RF_DONT_REMAP);
asm_up = asmh_u[numStripesBailed][j]->stripeMap;
/* get the creation func for this stripe unit */
(raidPtr->Layout.map-> SelectionFunc)(raidPtr, type, asm_up, &(stripeUnitFuncs[numStripesBailed][j]));
/* check to see if we found a creation func for this stripe unit */
if (stripeUnitFuncs[numStripesBailed][j] == (RF_VoidFuncPtr) NULL)
{
/* could not find creation function for stripe unit so,
let's see if we can find one for each block in the stripe unit */
if (numStripeUnitsBailed == 0)
{
/* one stripe map header for each stripe unit we bail on */
RF_Malloc(asmh_b, sizeof(RF_AccessStripeMapHeader_t **) * asm_h->numStripes * raidPtr->Layout.numDataCol, (RF_AccessStripeMapHeader_t ***));
/* create an array of ptrs to arrays of blockFuncs */
RF_Calloc(blockFuncs, asm_h->numStripes * raidPtr->Layout.numDataCol, sizeof(RF_VoidFuncPtr), (RF_VoidFuncPtr **));
}
/* create an array of creation funcs (called blockFuncs) for this stripe unit */
numBlocks = physPtr->numSector;
numBlockDags += numBlocks;
RF_Calloc(blockFuncs[numStripeUnitsBailed], numBlocks, sizeof(RF_VoidFuncPtr), (RF_VoidFuncPtr *));
RF_Malloc(asmh_b[numStripeUnitsBailed], numBlocks * sizeof(RF_AccessStripeMapHeader_t *), (RF_AccessStripeMapHeader_t **));
/* lookup array of blockFuncs for this stripe unit */
for (k=0; k < numBlocks; k++)
{
/* remap for series of single stripe-unit accesses */
address = physPtr->raidAddress + k;
length = 1;
buffer = physPtr->bufPtr + (k * (1<<raidPtr->logBytesPerSector));
asmh_b[numStripeUnitsBailed][k] = rf_MapAccess(raidPtr, address, length, buffer, RF_DONT_REMAP);
asm_bp = asmh_b[numStripeUnitsBailed][k]->stripeMap;
/* get the creation func for this stripe unit */
(raidPtr->Layout.map-> SelectionFunc)(raidPtr, type, asm_bp, &(blockFuncs[numStripeUnitsBailed][k]));
/* check to see if we found a creation func for this stripe unit */
if (blockFuncs[numStripeUnitsBailed][k] == NULL)
cantCreateDAGs = RF_TRUE;
}
numStripeUnitsBailed++;
}
else
{
numUnitDags++;
}
}
RF_ASSERT(j == numStripeUnits);
numStripesBailed++;
}
}
if (cantCreateDAGs)
{
/* free memory and punt */
if (asm_h->numStripes > MAXNSTRIPES)
RF_Free(stripeFuncs, asm_h->numStripes * sizeof(RF_VoidFuncPtr));
if (numStripesBailed > 0)
{
stripeNum = 0;
for (i = 0, asm_p = asmap; asm_p; asm_p = asm_p->next, i++)
if (stripeFuncs[i] == NULL)
{
numStripeUnits = asm_p->numStripeUnitsAccessed;
for (j = 0; j < numStripeUnits; j++)
rf_FreeAccessStripeMap(asmh_u[stripeNum][j]);
RF_Free(asmh_u[stripeNum], numStripeUnits * sizeof(RF_AccessStripeMapHeader_t *));
RF_Free(stripeUnitFuncs[stripeNum], numStripeUnits * sizeof(RF_VoidFuncPtr));
stripeNum++;
}
RF_ASSERT(stripeNum == numStripesBailed);
RF_Free(stripeUnitFuncs, asm_h->numStripes * sizeof(RF_VoidFuncPtr));
RF_Free(asmh_u, asm_h->numStripes * sizeof(RF_AccessStripeMapHeader_t **));
}
return(1);
}
else
{
/* begin dag creation */
stripeNum = 0;
stripeUnitNum = 0;
/* create an array of dagLists and fill them in */
RF_CallocAndAdd(desc->dagArray, desc->numStripes, sizeof(RF_DagList_t), (RF_DagList_t *), desc->cleanupList);
for (i=0, asm_p = asmap; asm_p; asm_p=asm_p->next,i++) {
/* grab dag header for this stripe */
dag_h = NULL;
desc->dagArray[i].desc = desc;
if (stripeFuncs[i] == (RF_VoidFuncPtr) NULL)
{
/* use bailout functions for this stripe */
for (j = 0, physPtr = asm_p->physInfo; physPtr; physPtr=physPtr->next, j++)
{
uFunc = stripeUnitFuncs[stripeNum][j];
if (uFunc == (RF_VoidFuncPtr) NULL)
{
/* use bailout functions for this stripe unit */
for (k = 0; k < physPtr->numSector; k++)
{
/* create a dag for this block */
InitHdrNode(&tempdag_h, raidPtr, rf_useMemChunks);
desc->dagArray[i].numDags++;
if (dag_h == NULL) {
dag_h = tempdag_h;
}
else {
lastdag_h->next = tempdag_h;
}
lastdag_h = tempdag_h;
bFunc = blockFuncs[stripeUnitNum][k];
RF_ASSERT(bFunc);
asm_bp = asmh_b[stripeUnitNum][k]->stripeMap;
(*bFunc)(raidPtr, asm_bp, tempdag_h, bp, flags, tempdag_h->allocList);
}
stripeUnitNum++;
}
else
{
/* create a dag for this unit */
InitHdrNode(&tempdag_h, raidPtr, rf_useMemChunks);
desc->dagArray[i].numDags++;
if (dag_h == NULL) {
dag_h = tempdag_h;
}
else {
lastdag_h->next = tempdag_h;
}
lastdag_h = tempdag_h;
asm_up = asmh_u[stripeNum][j]->stripeMap;
(*uFunc)(raidPtr, asm_up, tempdag_h, bp, flags, tempdag_h->allocList);
}
}
RF_ASSERT(j == asm_p->numStripeUnitsAccessed);
/* merge linked bailout dag to existing dag collection */
stripeNum++;
}
else {
/* Create a dag for this parity stripe */
InitHdrNode(&tempdag_h, raidPtr, rf_useMemChunks);
desc->dagArray[i].numDags++;
if (dag_h == NULL) {
dag_h = tempdag_h;
}
else {
lastdag_h->next = tempdag_h;
}
lastdag_h = tempdag_h;
(stripeFuncs[i])(raidPtr, asm_p, tempdag_h, bp, flags, tempdag_h->allocList);
}
desc->dagArray[i].dags = dag_h;
}
RF_ASSERT(i == desc->numStripes);
/* free memory */
if (asm_h->numStripes > MAXNSTRIPES)
RF_Free(stripeFuncs, asm_h->numStripes * sizeof(RF_VoidFuncPtr));
if ((numStripesBailed > 0) || (numStripeUnitsBailed > 0))
{
stripeNum = 0;
stripeUnitNum = 0;
if (dag_h->asmList)
{
endASMList = dag_h->asmList;
while (endASMList->next)
endASMList = endASMList->next;
}
else
endASMList = NULL;
/* walk through io, stripe by stripe */
for (i = 0, asm_p = asmap; asm_p; asm_p = asm_p->next, i++)
if (stripeFuncs[i] == NULL)
{
numStripeUnits = asm_p->numStripeUnitsAccessed;
/* walk through stripe, stripe unit by stripe unit */
for (j = 0, physPtr = asm_p->physInfo; physPtr; physPtr = physPtr->next, j++)
{
if (stripeUnitFuncs[stripeNum][j] == NULL)
{
numBlocks = physPtr->numSector;
/* walk through stripe unit, block by block */
for (k = 0; k < numBlocks; k++)
if (dag_h->asmList == NULL)
{
dag_h->asmList = asmh_b[stripeUnitNum][k];
endASMList = dag_h->asmList;
}
else
{
endASMList->next = asmh_b[stripeUnitNum][k];
endASMList = endASMList->next;
}
RF_Free(asmh_b[stripeUnitNum], numBlocks * sizeof(RF_AccessStripeMapHeader_t *));
RF_Free(blockFuncs[stripeUnitNum], numBlocks * sizeof(RF_VoidFuncPtr));
stripeUnitNum++;
}
if (dag_h->asmList == NULL)
{
dag_h->asmList = asmh_u[stripeNum][j];
endASMList = dag_h->asmList;
}
else
{
endASMList->next = asmh_u[stripeNum][j];
endASMList = endASMList->next;
}
}
RF_Free(asmh_u[stripeNum], numStripeUnits * sizeof(RF_AccessStripeMapHeader_t *));
RF_Free(stripeUnitFuncs[stripeNum], numStripeUnits * sizeof(RF_VoidFuncPtr));
stripeNum++;
}
RF_ASSERT(stripeNum == numStripesBailed);
RF_Free(stripeUnitFuncs, asm_h->numStripes * sizeof(RF_VoidFuncPtr));
RF_Free(asmh_u, asm_h->numStripes * sizeof(RF_AccessStripeMapHeader_t **));
if (numStripeUnitsBailed > 0)
{
RF_ASSERT(stripeUnitNum == numStripeUnitsBailed);
RF_Free(blockFuncs, raidPtr->Layout.numDataCol * asm_h->numStripes * sizeof(RF_VoidFuncPtr));
RF_Free(asmh_b, raidPtr->Layout.numDataCol * asm_h->numStripes * sizeof(RF_AccessStripeMapHeader_t **));
}
}
return(0);
}
}

View File

@ -0,0 +1,59 @@
/* $NetBSD: rf_aselect.h,v 1.1 1998/11/13 04:20:26 oster Exp $ */
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
*
* Author: Mark Holland, William V. Courtright II
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/*****************************************************************************
*
* aselect.h -- header file for algorithm selection code
*
*****************************************************************************/
/* :
* Log: rf_aselect.h,v
* Revision 1.5 1996/05/24 22:17:04 jimz
* continue code + namespace cleanup
* typed a bunch of flags
*
* Revision 1.4 1996/05/23 21:46:35 jimz
* checkpoint in code cleanup (release prep)
* lots of types, function names have been fixed
*
* Revision 1.3 1995/11/30 16:28:00 wvcii
* added copyright info
*
* Revision 1.2 1995/11/19 16:20:46 wvcii
* changed SelectAlgorithm prototype
*
*/
#ifndef _RF__RF_ASELECT_H_
#define _RF__RF_ASELECT_H_
#include "rf_desc.h"
int rf_SelectAlgorithm(RF_RaidAccessDesc_t *desc, RF_RaidAccessFlags_t flags);
#endif /* !_RF__RF_ASELECT_H_ */

View File

@ -0,0 +1,120 @@
/* $NetBSD: rf_callback.c,v 1.1 1998/11/13 04:20:26 oster Exp $ */
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
*
* Author: Jim Zelenka
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/*****************************************************************************************
*
* callback.c -- code to manipulate callback descriptor
*
****************************************************************************************/
/* :
* Log: rf_callback.c,v
* Revision 1.11 1996/06/17 03:18:04 jimz
* include shutdown.h for macroized ShutdownCreate
*
* Revision 1.10 1996/06/10 11:55:47 jimz
* Straightened out some per-array/not-per-array distinctions, fixed
* a couple bugs related to confusion. Added shutdown lists. Removed
* layout shutdown function (now subsumed by shutdown lists).
*
* Revision 1.9 1996/05/23 21:46:35 jimz
* checkpoint in code cleanup (release prep)
* lots of types, function names have been fixed
*
* Revision 1.8 1996/05/18 19:51:34 jimz
* major code cleanup- fix syntax, make some types consistent,
* add prototypes, clean out dead code, et cetera
*
* Revision 1.7 1996/05/17 16:30:41 jimz
* convert to RF_FREELIST stuff
*
* Revision 1.6 1995/12/01 15:16:04 root
* added copyright info
*
*/
#ifndef _KERNEL
#ifdef __NetBSD__
#include <unistd.h>
#endif /* __NetBSD__ */
#endif
#include "rf_types.h"
#include "rf_threadstuff.h"
#include "rf_callback.h"
#include "rf_debugMem.h"
#include "rf_freelist.h"
#include "rf_shutdown.h"
static RF_FreeList_t *rf_callback_freelist;
#define RF_MAX_FREE_CALLBACK 64
#define RF_CALLBACK_INC 4
#define RF_CALLBACK_INITIAL 4
static void rf_ShutdownCallback(void *);
static void rf_ShutdownCallback(ignored)
void *ignored;
{
RF_FREELIST_DESTROY(rf_callback_freelist,next,(RF_CallbackDesc_t *));
}
int rf_ConfigureCallback(listp)
RF_ShutdownList_t **listp;
{
int rc;
RF_FREELIST_CREATE(rf_callback_freelist, RF_MAX_FREE_CALLBACK,
RF_CALLBACK_INC, sizeof(RF_CallbackDesc_t));
if (rf_callback_freelist == NULL)
return(ENOMEM);
rc = rf_ShutdownCreate(listp, rf_ShutdownCallback, NULL);
if (rc) {
RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", __FILE__,
__LINE__, rc);
rf_ShutdownCallback(NULL);
return(rc);
}
RF_FREELIST_PRIME(rf_callback_freelist, RF_CALLBACK_INITIAL,next,
(RF_CallbackDesc_t *));
return(0);
}
RF_CallbackDesc_t *rf_AllocCallbackDesc()
{
RF_CallbackDesc_t *p;
RF_FREELIST_GET(rf_callback_freelist,p,next,(RF_CallbackDesc_t *));
return(p);
}
void rf_FreeCallbackDesc(p)
RF_CallbackDesc_t *p;
{
RF_FREELIST_FREE(rf_callback_freelist,p,next);
}

View File

@ -0,0 +1,91 @@
/* $NetBSD: rf_callback.h,v 1.1 1998/11/13 04:20:26 oster Exp $ */
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
*
* Author: Mark Holland
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/*****************************************************************************************
*
* callback.h -- header file for callback.c
*
* the reconstruction code must manage concurrent I/Os on multiple drives.
* it sometimes needs to suspend operation on a particular drive until some
* condition occurs. we can't block the thread, of course, or we wouldn't
* be able to manage our other outstanding I/Os. Instead we just suspend
* new activity on the indicated disk, and create a callback descriptor and
* put it someplace where it will get invoked when the condition that's
* stalling us has cleared. When the descriptor is invoked, it will call
* a function that will restart operation on the indicated disk.
*
****************************************************************************************/
/* :
* Log: rf_callback.h,v
* Revision 1.8 1996/08/01 15:57:28 jimz
* minor cleanup
*
* Revision 1.7 1996/07/27 23:36:08 jimz
* Solaris port of simulator
*
* Revision 1.6 1996/06/10 11:55:47 jimz
* Straightened out some per-array/not-per-array distinctions, fixed
* a couple bugs related to confusion. Added shutdown lists. Removed
* layout shutdown function (now subsumed by shutdown lists).
*
* Revision 1.5 1996/05/23 21:46:35 jimz
* checkpoint in code cleanup (release prep)
* lots of types, function names have been fixed
*
* Revision 1.4 1996/05/18 19:51:34 jimz
* major code cleanup- fix syntax, make some types consistent,
* add prototypes, clean out dead code, et cetera
*
* Revision 1.3 1996/05/17 16:30:46 jimz
* add prototypes
*
* Revision 1.2 1995/12/01 15:15:55 root
* added copyright info
*
*/
#ifndef _RF__RF_CALLBACK_H_
#define _RF__RF_CALLBACK_H_
#include "rf_types.h"
struct RF_CallbackDesc_s {
void (*callbackFunc)(RF_CBParam_t); /* function to call */
RF_CBParam_t callbackArg; /* args to give to function, or just info about this callback */
RF_CBParam_t callbackArg2;
RF_RowCol_t row; /* disk row and column IDs to give to the callback func */
RF_RowCol_t col;
RF_CallbackDesc_t *next; /* next entry in list */
};
int rf_ConfigureCallback(RF_ShutdownList_t **listp);
RF_CallbackDesc_t *rf_AllocCallbackDesc(void);
void rf_FreeCallbackDesc(RF_CallbackDesc_t *p);
#endif /* !_RF__RF_CALLBACK_H_ */

114
sys/dev/raidframe/rf_ccmn.h Normal file
View File

@ -0,0 +1,114 @@
/* $NetBSD: rf_ccmn.h,v 1.1 1998/11/13 04:20:26 oster Exp $ */
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
*
* Author: Mark Holland
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/* rf_ccmn.h
* header file that declares the ccmn routines, and includes
* the files needed to use them.
*/
/* :
* Log: rf_ccmn.h,v
* Revision 1.4 1996/07/18 22:57:14 jimz
* port simulator to AIX
*
* Revision 1.3 1996/05/23 21:46:35 jimz
* checkpoint in code cleanup (release prep)
* lots of types, function names have been fixed
*
* Revision 1.2 1995/12/01 15:16:45 root
* added copyright info
*
*/
#ifndef _RF__RF_CCMN_H_
#define _RF__RF_CCMN_H_
#ifdef __osf__
#include <sys/errno.h>
#include <sys/types.h>
#include <sys/file.h>
#include <sys/param.h>
#include <sys/uio.h>
#include <sys/time.h>
#include <sys/buf.h>
#include <sys/ioctl.h>
#include <io/common/iotypes.h>
#include <io/cam/cam_debug.h>
#include <io/cam/cam.h>
#include <io/cam/dec_cam.h>
#include <io/cam/uagt.h>
#include <io/cam/scsi_all.h>
#include <io/cam/scsi_direct.h>
#ifdef KERNEL
#include <sys/conf.h>
#include <sys/mtio.h>
#include <io/common/devio.h>
#include <io/common/devdriver.h>
#include <io/cam/scsi_status.h>
#include <io/cam/pdrv.h>
#include <io/common/pt.h>
#include <sys/disklabel.h>
#include <io/cam/cam_disk.h>
#include <io/cam/ccfg.h>
extern void ccmn_init();
extern long ccmn_open_unit();
extern void ccmn_close_unit();
extern u_long ccmn_send_ccb();
extern void ccmn_rem_ccb();
extern void ccmn_abort_que();
extern void ccmn_term_que();
extern CCB_HEADER *ccmn_get_ccb();
extern void ccmn_rel_ccb();
extern CCB_SCSIIO *ccmn_io_ccb_bld();
extern CCB_GETDEV *ccmn_gdev_ccb_bld();
extern CCB_SETDEV *ccmn_sdev_ccb_bld();
extern CCB_SETASYNC *ccmn_sasy_ccb_bld();
extern CCB_RELSIM *ccmn_rsq_ccb_bld();
extern CCB_PATHINQ *ccmn_pinq_ccb_bld();
extern CCB_ABORT *ccmn_abort_ccb_bld();
extern CCB_TERMIO *ccmn_term_ccb_bld();
extern CCB_RESETDEV *ccmn_bdr_ccb_bld();
extern CCB_RESETBUS *ccmn_br_ccb_bld();
extern CCB_SCSIIO *ccmn_tur();
extern CCB_SCSIIO *ccmn_mode_select();
extern u_long ccmn_ccb_status();
extern struct buf *ccmn_get_bp();
extern void ccmn_rel_bp();
extern u_char *ccmn_get_dbuf();
extern void ccmn_rel_dbuf();
extern struct device *camdinfo[];
extern struct controller *camminfo[];
extern PDRV_UNIT_ELEM pdrv_unit_table[];
#endif /* KERNEL */
#endif /* __osf__ */
#endif /* !_RF__RF_CCMN_H_ */

View File

@ -0,0 +1,381 @@
/* $NetBSD: rf_chaindecluster.c,v 1.1 1998/11/13 04:20:26 oster Exp $ */
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
*
* Author: Khalil Amiri
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/******************************************************************************
*
* rf_chaindecluster.c -- implements chained declustering
*
*****************************************************************************/
/* :
* Log: rf_chaindecluster.c,v
* Revision 1.33 1996/08/02 13:20:34 jimz
* get rid of bogus (long) casts
*
* Revision 1.32 1996/07/31 16:56:18 jimz
* dataBytesPerStripe, sectorsPerDisk init arch-indep.
*
* Revision 1.31 1996/07/29 14:05:12 jimz
* fix numPUs/numRUs confusion (everything is now numRUs)
* clean up some commenting, return values
*
* Revision 1.30 1996/07/22 19:52:16 jimz
* switched node params to RF_DagParam_t, a union of
* a 64-bit int and a void *, for better portability
* attempted hpux port, but failed partway through for
* lack of a single C compiler capable of compiling all
* source files
*
* Revision 1.29 1996/07/18 22:57:14 jimz
* port simulator to AIX
*
* Revision 1.28 1996/06/19 17:53:48 jimz
* move GetNumSparePUs, InstallSpareTable ops into layout switch
*
* Revision 1.27 1996/06/11 15:19:57 wvcii
* added include of rf_chaindecluster.h
* fixed parameter list of rf_ConfigureChainDecluster
*
* Revision 1.26 1996/06/11 08:55:15 jimz
* improved error-checking at configuration time
*
* Revision 1.25 1996/06/10 11:55:47 jimz
* Straightened out some per-array/not-per-array distinctions, fixed
* a couple bugs related to confusion. Added shutdown lists. Removed
* layout shutdown function (now subsumed by shutdown lists).
*
* Revision 1.24 1996/06/07 22:26:27 jimz
* type-ify which_ru (RF_ReconUnitNum_t)
*
* Revision 1.23 1996/06/07 21:33:04 jimz
* begin using consistent types for sector numbers,
* stripe numbers, row+col numbers, recon unit numbers
*
* Revision 1.22 1996/06/06 17:31:30 jimz
* use CreateMirrorPartitionReadDAG for mirrored reads
*
* Revision 1.21 1996/06/03 23:28:26 jimz
* more bugfixes
* check in tree to sync for IPDS runs with current bugfixes
* there still may be a problem with threads in the script test
* getting I/Os stuck- not trivially reproducible (runs ~50 times
* in a row without getting stuck)
*
* Revision 1.20 1996/06/02 17:31:48 jimz
* Moved a lot of global stuff into array structure, where it belongs.
* Fixed up paritylogging, pss modules in this manner. Some general
* code cleanup. Removed lots of dead code, some dead files.
*
* Revision 1.19 1996/05/31 22:26:54 jimz
* fix a lot of mapping problems, memory allocation problems
* found some weird lock issues, fixed 'em
* more code cleanup
*
* Revision 1.18 1996/05/31 16:13:28 amiri
* removed/added some commnets.
*
* Revision 1.17 1996/05/31 05:01:52 amiri
* fixed a bug related to sparing layout.
*
* Revision 1.16 1996/05/30 23:22:16 jimz
* bugfixes of serialization, timing problems
* more cleanup
*
* Revision 1.15 1996/05/27 18:56:37 jimz
* more code cleanup
* better typing
* compiles in all 3 environments
*
* Revision 1.14 1996/05/24 22:17:04 jimz
* continue code + namespace cleanup
* typed a bunch of flags
*
* Revision 1.13 1996/05/23 21:46:35 jimz
* checkpoint in code cleanup (release prep)
* lots of types, function names have been fixed
*
* Revision 1.12 1996/05/23 00:33:23 jimz
* code cleanup: move all debug decls to rf_options.c, all extern
* debug decls to rf_options.h, all debug vars preceded by rf_
*
* Revision 1.11 1996/05/18 19:51:34 jimz
* major code cleanup- fix syntax, make some types consistent,
* add prototypes, clean out dead code, et cetera
*
* Revision 1.10 1996/05/03 19:53:56 wvcii
* removed include of rf_redstripe.h
* moved dag creation routines to new dag library
*
*/
#include "rf_archs.h"
#include "rf_types.h"
#include "rf_raid.h"
#include "rf_chaindecluster.h"
#include "rf_dag.h"
#include "rf_dagutils.h"
#include "rf_dagffrd.h"
#include "rf_dagffwr.h"
#include "rf_dagdegrd.h"
#include "rf_dagfuncs.h"
#include "rf_threadid.h"
#include "rf_general.h"
#include "rf_utils.h"
typedef struct RF_ChaindeclusterConfigInfo_s {
RF_RowCol_t **stripeIdentifier; /* filled in at config time
* and used by IdentifyStripe */
RF_StripeCount_t numSparingRegions;
RF_StripeCount_t stripeUnitsPerSparingRegion;
RF_SectorNum_t mirrorStripeOffset;
} RF_ChaindeclusterConfigInfo_t;
int rf_ConfigureChainDecluster(
RF_ShutdownList_t **listp,
RF_Raid_t *raidPtr,
RF_Config_t *cfgPtr)
{
RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
RF_StripeCount_t num_used_stripeUnitsPerDisk;
RF_ChaindeclusterConfigInfo_t *info;
RF_RowCol_t i;
/* create a Chained Declustering configuration structure */
RF_MallocAndAdd(info, sizeof(RF_ChaindeclusterConfigInfo_t), (RF_ChaindeclusterConfigInfo_t *), raidPtr->cleanupList);
if (info == NULL)
return(ENOMEM);
layoutPtr->layoutSpecificInfo = (void *) info;
/* fill in the config structure. */
info->stripeIdentifier = rf_make_2d_array(raidPtr->numCol, 2 , raidPtr->cleanupList);
if (info->stripeIdentifier == NULL)
return(ENOMEM);
for (i=0; i< raidPtr->numCol; i++) {
info->stripeIdentifier[i][0] = i % raidPtr->numCol;
info->stripeIdentifier[i][1] = (i+1) % raidPtr->numCol;
}
RF_ASSERT(raidPtr->numRow == 1);
/* fill in the remaining layout parameters */
num_used_stripeUnitsPerDisk = layoutPtr->stripeUnitsPerDisk - (layoutPtr->stripeUnitsPerDisk %
(2*raidPtr->numCol-2) );
info->numSparingRegions = num_used_stripeUnitsPerDisk / (2*raidPtr->numCol-2);
info->stripeUnitsPerSparingRegion = raidPtr->numCol * (raidPtr->numCol - 1);
info->mirrorStripeOffset = info->numSparingRegions * (raidPtr->numCol-1);
layoutPtr->numStripe = info->numSparingRegions * info->stripeUnitsPerSparingRegion;
layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << raidPtr->logBytesPerSector;
layoutPtr->numDataCol = 1;
layoutPtr->dataSectorsPerStripe = layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit;
layoutPtr->numParityCol = 1;
layoutPtr->dataStripeUnitsPerDisk = num_used_stripeUnitsPerDisk;
raidPtr->sectorsPerDisk =
num_used_stripeUnitsPerDisk * layoutPtr->sectorsPerStripeUnit;
raidPtr->totalSectors =
(layoutPtr->numStripe) * layoutPtr->sectorsPerStripeUnit;
layoutPtr->stripeUnitsPerDisk = raidPtr->sectorsPerDisk / layoutPtr->sectorsPerStripeUnit;
return(0);
}
RF_ReconUnitCount_t rf_GetNumSpareRUsChainDecluster(raidPtr)
RF_Raid_t *raidPtr;
{
RF_ChaindeclusterConfigInfo_t *info = (RF_ChaindeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
/*
* The layout uses two stripe units per disk as spare within each
* sparing region.
*/
return (2*info->numSparingRegions);
}
/* Maps to the primary copy of the data, i.e. the first mirror pair */
void rf_MapSectorChainDecluster(
RF_Raid_t *raidPtr,
RF_RaidAddr_t raidSector,
RF_RowCol_t *row,
RF_RowCol_t *col,
RF_SectorNum_t *diskSector,
int remap)
{
RF_ChaindeclusterConfigInfo_t *info = (RF_ChaindeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit;
RF_SectorNum_t index_within_region, index_within_disk;
RF_StripeNum_t sparing_region_id;
int col_before_remap;
*row = 0;
sparing_region_id = SUID / info->stripeUnitsPerSparingRegion;
index_within_region = SUID % info->stripeUnitsPerSparingRegion;
index_within_disk = index_within_region / raidPtr->numCol;
col_before_remap = SUID % raidPtr->numCol;
if (!remap) {
*col = col_before_remap;
*diskSector = ( index_within_disk + ( (raidPtr->numCol-1) * sparing_region_id) ) *
raidPtr->Layout.sectorsPerStripeUnit;
*diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit);
}
else {
/* remap sector to spare space...*/
*diskSector = sparing_region_id * (raidPtr->numCol+1) * raidPtr->Layout.sectorsPerStripeUnit;
*diskSector += (raidPtr->numCol-1) * raidPtr->Layout.sectorsPerStripeUnit;
*diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit);
index_within_disk = index_within_region / raidPtr->numCol;
if (index_within_disk < col_before_remap )
*col = index_within_disk;
else if (index_within_disk == raidPtr->numCol-2 ) {
*col = (col_before_remap+raidPtr->numCol-1) % raidPtr->numCol;
*diskSector += raidPtr->Layout.sectorsPerStripeUnit;
}
else
*col = (index_within_disk + 2) % raidPtr->numCol;
}
}
/* Maps to the second copy of the mirror pair, which is chain declustered. The second copy is contained
in the next disk (mod numCol) after the disk containing the primary copy.
The offset into the disk is one-half disk down */
void rf_MapParityChainDecluster(
RF_Raid_t *raidPtr,
RF_RaidAddr_t raidSector,
RF_RowCol_t *row,
RF_RowCol_t *col,
RF_SectorNum_t *diskSector,
int remap)
{
RF_ChaindeclusterConfigInfo_t *info = (RF_ChaindeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit;
RF_SectorNum_t index_within_region, index_within_disk;
RF_StripeNum_t sparing_region_id;
int col_before_remap;
*row = 0;
if (!remap) {
*col = SUID % raidPtr->numCol;
*col = (*col + 1) % raidPtr->numCol;
*diskSector = info->mirrorStripeOffset * raidPtr->Layout.sectorsPerStripeUnit;
*diskSector += ( SUID / raidPtr->numCol ) * raidPtr->Layout.sectorsPerStripeUnit;
*diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit);
}
else {
/* remap parity to spare space ... */
sparing_region_id = SUID / info->stripeUnitsPerSparingRegion;
index_within_region = SUID % info->stripeUnitsPerSparingRegion;
index_within_disk = index_within_region / raidPtr->numCol;
*diskSector = sparing_region_id * (raidPtr->numCol+1) * raidPtr->Layout.sectorsPerStripeUnit;
*diskSector += (raidPtr->numCol) * raidPtr->Layout.sectorsPerStripeUnit;
*diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit);
col_before_remap = SUID % raidPtr->numCol;
if (index_within_disk < col_before_remap)
*col = index_within_disk;
else if (index_within_disk == raidPtr->numCol-2 ) {
*col = (col_before_remap+2) % raidPtr->numCol;
*diskSector -= raidPtr->Layout.sectorsPerStripeUnit;
}
else
*col = (index_within_disk + 2) % raidPtr->numCol;
}
}
void rf_IdentifyStripeChainDecluster(
RF_Raid_t *raidPtr,
RF_RaidAddr_t addr,
RF_RowCol_t **diskids,
RF_RowCol_t *outRow)
{
RF_ChaindeclusterConfigInfo_t *info = (RF_ChaindeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
RF_StripeNum_t SUID;
RF_RowCol_t col;
SUID = addr / raidPtr->Layout.sectorsPerStripeUnit;
col = SUID % raidPtr->numCol;
*outRow = 0;
*diskids = info->stripeIdentifier[ col ];
}
void rf_MapSIDToPSIDChainDecluster(
RF_RaidLayout_t *layoutPtr,
RF_StripeNum_t stripeID,
RF_StripeNum_t *psID,
RF_ReconUnitNum_t *which_ru)
{
*which_ru = 0;
*psID = stripeID;
}
/******************************************************************************
* select a graph to perform a single-stripe access
*
* Parameters: raidPtr - description of the physical array
* type - type of operation (read or write) requested
* asmap - logical & physical addresses for this access
* createFunc - function to use to create the graph (return value)
*****************************************************************************/
void rf_RAIDCDagSelect(
RF_Raid_t *raidPtr,
RF_IoType_t type,
RF_AccessStripeMap_t *asmap,
RF_VoidFuncPtr *createFunc)
#if 0
void (**createFunc)(RF_Raid_t *, RF_AccessStripeMap_t *,
RF_DagHeader_t *, void *, RF_RaidAccessFlags_t,
RF_AllocListElem_t *))
#endif
{
RF_ASSERT(RF_IO_IS_R_OR_W(type));
RF_ASSERT(raidPtr->numRow == 1);
if (asmap->numDataFailed + asmap->numParityFailed > 1) {
RF_ERRORMSG("Multiple disks failed in a single group! Aborting I/O operation.\n");
*createFunc = NULL;
return;
}
*createFunc = (type == RF_IO_TYPE_READ) ? (RF_VoidFuncPtr)rf_CreateFaultFreeReadDAG :(RF_VoidFuncPtr) rf_CreateRaidOneWriteDAG;
if (type == RF_IO_TYPE_READ) {
if ( ( raidPtr->status[0] == rf_rs_degraded ) || ( raidPtr->status[0] == rf_rs_reconstructing) )
*createFunc = (RF_VoidFuncPtr)rf_CreateRaidCDegradedReadDAG; /* array status is degraded, implement workload shifting */
else
*createFunc = (RF_VoidFuncPtr)rf_CreateMirrorPartitionReadDAG; /* array status not degraded, so use mirror partition dag */
}
else
*createFunc = (RF_VoidFuncPtr)rf_CreateRaidOneWriteDAG;
}

View File

@ -0,0 +1,122 @@
/* $NetBSD: rf_chaindecluster.h,v 1.1 1998/11/13 04:20:26 oster Exp $ */
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
*
* Author: Khalil Amiri
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/* rf_chaindecluster.h
* header file for Chained Declustering
*/
/*
* :
* Log: rf_chaindecluster.h,v
* Revision 1.14 1996/07/29 14:05:12 jimz
* fix numPUs/numRUs confusion (everything is now numRUs)
* clean up some commenting, return values
*
* Revision 1.13 1996/07/22 19:52:16 jimz
* switched node params to RF_DagParam_t, a union of
* a 64-bit int and a void *, for better portability
* attempted hpux port, but failed partway through for
* lack of a single C compiler capable of compiling all
* source files
*
* Revision 1.12 1996/06/10 11:55:47 jimz
* Straightened out some per-array/not-per-array distinctions, fixed
* a couple bugs related to confusion. Added shutdown lists. Removed
* layout shutdown function (now subsumed by shutdown lists).
*
* Revision 1.11 1996/06/07 22:26:27 jimz
* type-ify which_ru (RF_ReconUnitNum_t)
*
* Revision 1.10 1996/06/07 21:33:04 jimz
* begin using consistent types for sector numbers,
* stripe numbers, row+col numbers, recon unit numbers
*
* Revision 1.9 1996/06/03 23:28:26 jimz
* more bugfixes
* check in tree to sync for IPDS runs with current bugfixes
* there still may be a problem with threads in the script test
* getting I/Os stuck- not trivially reproducible (runs ~50 times
* in a row without getting stuck)
*
* Revision 1.8 1996/05/31 22:26:54 jimz
* fix a lot of mapping problems, memory allocation problems
* found some weird lock issues, fixed 'em
* more code cleanup
*
* Revision 1.7 1996/05/27 18:56:37 jimz
* more code cleanup
* better typing
* compiles in all 3 environments
*
* Revision 1.6 1996/05/23 21:46:35 jimz
* checkpoint in code cleanup (release prep)
* lots of types, function names have been fixed
*
* Revision 1.5 1996/05/18 19:51:34 jimz
* major code cleanup- fix syntax, make some types consistent,
* add prototypes, clean out dead code, et cetera
*
* Revision 1.4 1996/02/22 16:45:59 amiri
* added declaration of dag selection function
*
* Revision 1.3 1995/12/01 15:16:56 root
* added copyright info
*
* Revision 1.2 1995/11/17 19:55:21 amiri
* prototyped MapParityChainDecluster
*/
#ifndef _RF__RF_CHAINDECLUSTER_H_
#define _RF__RF_CHAINDECLUSTER_H_
int rf_ConfigureChainDecluster(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
RF_Config_t *cfgPtr);
RF_ReconUnitCount_t rf_GetNumSpareRUsChainDecluster(RF_Raid_t *raidPtr);
void rf_MapSectorChainDecluster(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector,
RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap);
void rf_MapParityChainDecluster(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector,
RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap);
void rf_IdentifyStripeChainDecluster(RF_Raid_t *raidPtr, RF_RaidAddr_t addr,
RF_RowCol_t **diskids, RF_RowCol_t *outRow);
void rf_MapSIDToPSIDChainDecluster(RF_RaidLayout_t *layoutPtr,
RF_StripeNum_t stripeID, RF_StripeNum_t *psID,
RF_ReconUnitNum_t *which_ru);
void rf_RAIDCDagSelect(RF_Raid_t *raidPtr, RF_IoType_t type,
RF_AccessStripeMap_t *asmap,
RF_VoidFuncPtr *);
#if 0
void (**createFunc)(RF_Raid_t *,
RF_AccessStripeMap_t *,
RF_DagHeader_t *,
void *,
RF_RaidAccessFlags_t,
RF_AllocListElem_t *)
);
#endif
#endif /* !_RF__RF_CHAINDECLUSTER_H_ */

View File

@ -0,0 +1,126 @@
/* $NetBSD: rf_configure.h,v 1.1 1998/11/13 04:20:26 oster Exp $ */
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
*
* Author: Mark Holland
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/********************************
*
* rf_configure.h
*
* header file for raidframe configuration in the kernel version only.
* configuration is invoked via ioctl rather than at boot time
*
*******************************/
/* :
* Log: rf_configure.h,v
* Revision 1.16 1996/06/19 14:57:53 jimz
* move layout-specific config parsing hooks into RF_LayoutSW_t
* table in rf_layout.c
*
* Revision 1.15 1996/06/07 21:33:04 jimz
* begin using consistent types for sector numbers,
* stripe numbers, row+col numbers, recon unit numbers
*
* Revision 1.14 1996/05/31 22:26:54 jimz
* fix a lot of mapping problems, memory allocation problems
* found some weird lock issues, fixed 'em
* more code cleanup
*
* Revision 1.13 1996/05/30 11:29:41 jimz
* Numerous bug fixes. Stripe lock release code disagreed with the taking code
* about when stripes should be locked (I made it consistent: no parity, no lock)
* There was a lot of extra serialization of I/Os which I've removed- a lot of
* it was to calculate values for the cache code, which is no longer with us.
* More types, function, macro cleanup. Added code to properly quiesce the array
* on shutdown. Made a lot of stuff array-specific which was (bogusly) general
* before. Fixed memory allocation, freeing bugs.
*
* Revision 1.12 1996/05/27 18:56:37 jimz
* more code cleanup
* better typing
* compiles in all 3 environments
*
* Revision 1.11 1996/05/24 01:59:45 jimz
* another checkpoint in code cleanup for release
* time to sync kernel tree
*
* Revision 1.10 1996/05/23 00:33:23 jimz
* code cleanup: move all debug decls to rf_options.c, all extern
* debug decls to rf_options.h, all debug vars preceded by rf_
*
* Revision 1.9 1996/05/18 20:09:51 jimz
* bit of cleanup to compile cleanly in kernel, once again
*
* Revision 1.8 1996/05/18 19:51:34 jimz
* major code cleanup- fix syntax, make some types consistent,
* add prototypes, clean out dead code, et cetera
*
* Revision 1.7 1995/12/01 15:16:26 root
* added copyright info
*
*/
#ifndef _RF__RF_CONFIGURE_H_
#define _RF__RF_CONFIGURE_H_
#include "rf_archs.h"
#include "rf_types.h"
#include <sys/param.h>
#include <sys/proc.h>
#include <sys/ioctl.h>
/* the raidframe configuration, passed down through an ioctl.
* the driver can be reconfigured (with total loss of data) at any time,
* but it must be shut down first.
*/
struct RF_Config_s {
RF_RowCol_t numRow, numCol, numSpare; /* number of rows, columns, and spare disks */
dev_t devs[RF_MAXROW][RF_MAXCOL]; /* device numbers for disks comprising array */
char devnames[RF_MAXROW][RF_MAXCOL][50]; /* device names */
dev_t spare_devs[RF_MAXSPARE]; /* device numbers for spare disks */
char spare_names[RF_MAXSPARE][50]; /* device names */
RF_SectorNum_t sectPerSU; /* sectors per stripe unit */
RF_StripeNum_t SUsPerPU; /* stripe units per parity unit */
RF_StripeNum_t SUsPerRU; /* stripe units per reconstruction unit */
RF_ParityConfig_t parityConfig; /* identifies the RAID architecture to be used */
RF_DiskQueueType_t diskQueueType; /* 'f' = fifo, 'c' = cvscan, not used in kernel */
char maxOutstandingDiskReqs; /* # concurrent reqs to be sent to a disk. not used in kernel. */
char debugVars[RF_MAXDBGV][50]; /* space for specifying debug variables & their values */
unsigned int layoutSpecificSize; /* size in bytes of layout-specific info */
void *layoutSpecific; /* a pointer to a layout-specific structure to be copied in */
};
#ifndef KERNEL
int rf_MakeConfig(char *configname, RF_Config_t *cfgPtr);
int rf_MakeLayoutSpecificNULL(FILE *fp, RF_Config_t *cfgPtr, void *arg);
int rf_MakeLayoutSpecificDeclustered(FILE *configfp, RF_Config_t *cfgPtr, void *arg);
void *rf_ReadSpareTable(RF_SparetWait_t *req, char *fname);
#endif /* !KERNEL */
#endif /* !_RF__RF_CONFIGURE_H_ */

View File

@ -0,0 +1,574 @@
/* $NetBSD: rf_copyback.c,v 1.1 1998/11/13 04:20:27 oster Exp $ */
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
*
* Author: Mark Holland
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/*****************************************************************************************
*
* copyback.c -- code to copy reconstructed data back from spare space to
* the replaced disk.
*
* the code operates using callbacks on the I/Os to continue with the next
* unit to be copied back. We do this because a simple loop containing blocking I/Os
* will not work in the simulator.
*
****************************************************************************************/
/*
* :
* Log: rf_copyback.c,v
* Revision 1.26 1996/08/06 22:26:00 jimz
* don't include sys/buf.h on linux
*
* Revision 1.25 1996/07/30 03:30:40 jimz
* include rf_types.h first
*
* Revision 1.24 1996/07/27 18:39:52 jimz
* cleanup sweep
*
* Revision 1.23 1996/07/18 22:57:14 jimz
* port simulator to AIX
*
* Revision 1.22 1996/07/11 19:08:00 jimz
* generalize reconstruction mechanism
* allow raid1 reconstructs via copyback (done with array
* quiesced, not online, therefore not disk-directed)
*
* Revision 1.21 1996/07/11 16:03:47 jimz
* fixed hanging bug in rf_CopybackWriteDoneProc()
*
* Revision 1.20 1996/06/10 11:55:47 jimz
* Straightened out some per-array/not-per-array distinctions, fixed
* a couple bugs related to confusion. Added shutdown lists. Removed
* layout shutdown function (now subsumed by shutdown lists).
*
* Revision 1.19 1996/06/09 02:36:46 jimz
* lots of little crufty cleanup- fixup whitespace
* issues, comment #ifdefs, improve typing in some
* places (esp size-related)
*
* Revision 1.18 1996/06/07 21:33:04 jimz
* begin using consistent types for sector numbers,
* stripe numbers, row+col numbers, recon unit numbers
*
* Revision 1.17 1996/06/05 18:06:02 jimz
* Major code cleanup. The Great Renaming is now done.
* Better modularity. Better typing. Fixed a bunch of
* synchronization bugs. Made a lot of global stuff
* per-desc or per-array. Removed dead code.
*
* Revision 1.16 1996/06/03 23:28:26 jimz
* more bugfixes
* check in tree to sync for IPDS runs with current bugfixes
* there still may be a problem with threads in the script test
* getting I/Os stuck- not trivially reproducible (runs ~50 times
* in a row without getting stuck)
*
* Revision 1.15 1996/06/02 17:31:48 jimz
* Moved a lot of global stuff into array structure, where it belongs.
* Fixed up paritylogging, pss modules in this manner. Some general
* code cleanup. Removed lots of dead code, some dead files.
*
* Revision 1.14 1996/05/31 22:26:54 jimz
* fix a lot of mapping problems, memory allocation problems
* found some weird lock issues, fixed 'em
* more code cleanup
*
* Revision 1.13 1996/05/30 11:29:41 jimz
* Numerous bug fixes. Stripe lock release code disagreed with the taking code
* about when stripes should be locked (I made it consistent: no parity, no lock)
* There was a lot of extra serialization of I/Os which I've removed- a lot of
* it was to calculate values for the cache code, which is no longer with us.
* More types, function, macro cleanup. Added code to properly quiesce the array
* on shutdown. Made a lot of stuff array-specific which was (bogusly) general
* before. Fixed memory allocation, freeing bugs.
*
* Revision 1.12 1996/05/27 18:56:37 jimz
* more code cleanup
* better typing
* compiles in all 3 environments
*
* Revision 1.11 1996/05/24 22:17:04 jimz
* continue code + namespace cleanup
* typed a bunch of flags
*
* Revision 1.10 1996/05/24 01:59:45 jimz
* another checkpoint in code cleanup for release
* time to sync kernel tree
*
* Revision 1.9 1996/05/23 21:46:35 jimz
* checkpoint in code cleanup (release prep)
* lots of types, function names have been fixed
*
* Revision 1.8 1996/05/23 00:33:23 jimz
* code cleanup: move all debug decls to rf_options.c, all extern
* debug decls to rf_options.h, all debug vars preceded by rf_
*
* Revision 1.7 1996/05/18 19:51:34 jimz
* major code cleanup- fix syntax, make some types consistent,
* add prototypes, clean out dead code, et cetera
*
* Revision 1.6 1995/12/12 18:10:06 jimz
* MIN -> RF_MIN, MAX -> RF_MAX, ASSERT -> RF_ASSERT
* fix 80-column brain damage in comments
*
* Revision 1.5 1995/12/01 15:15:31 root
* added copyright info
*
* Revision 1.4 1995/06/23 13:41:36 robby
* updeated to prototypes in rf_layout.h
*
*/
#include "rf_types.h"
#include <sys/time.h>
#ifndef LINUX
#include <sys/buf.h>
#endif /* !LINUX */
#include "rf_raid.h"
#include "rf_threadid.h"
#include "rf_mcpair.h"
#include "rf_acctrace.h"
#include "rf_etimer.h"
#include "rf_general.h"
#include "rf_utils.h"
#include "rf_copyback.h"
#if !defined(__NetBSD__)
#include "rf_camlayer.h"
#endif
#include "rf_decluster.h"
#include "rf_driver.h"
#include "rf_shutdown.h"
#include "rf_sys.h"
#define RF_COPYBACK_DATA 0
#define RF_COPYBACK_PARITY 1
int rf_copyback_in_progress;
static int rf_CopybackReadDoneProc(RF_CopybackDesc_t *desc, int status);
static int rf_CopybackWriteDoneProc(RF_CopybackDesc_t *desc, int status);
static void rf_CopybackOne(RF_CopybackDesc_t *desc, int typ,
RF_RaidAddr_t addr, RF_RowCol_t testRow, RF_RowCol_t testCol,
RF_SectorNum_t testOffs);
static void rf_CopybackComplete(RF_CopybackDesc_t *desc, int status);
int rf_ConfigureCopyback(listp)
RF_ShutdownList_t **listp;
{
rf_copyback_in_progress = 0;
return(0);
}
#if defined(__NetBSD__) && defined(_KERNEL)
#include <sys/types.h>
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/ioctl.h>
#include <sys/fcntl.h>
#include <sys/vnode.h>
int raidlookup __P((char *, struct proc *, struct vnode **));
#endif
/* do a complete copyback */
void rf_CopybackReconstructedData(raidPtr)
RF_Raid_t *raidPtr;
{
#if defined(__NetBSD__) && defined(_KERNEL)
int done,retcode;
RF_CopybackDesc_t *desc;
RF_RowCol_t frow, fcol;
RF_RaidDisk_t *badDisk;
char *databuf;
struct partinfo dpart;
struct vnode *vp;
struct vattr va;
struct proc *proc;
#else
int bus, targ, lun, done, retcode;
RF_CopybackDesc_t *desc;
RF_RowCol_t frow, fcol;
RF_RaidDisk_t *badDisk;
RF_DiskOp_t *tur_op;
char *databuf;
#endif
done = 0;
fcol = 0;
for (frow=0; frow<raidPtr->numRow; frow++) {
for (fcol=0; fcol<raidPtr->numCol; fcol++) {
if (raidPtr->Disks[frow][fcol].status == rf_ds_dist_spared
|| raidPtr->Disks[frow][fcol].status == rf_ds_spared)
{
done = 1;
break;
}
}
if (done)
break;
}
if (frow == raidPtr->numRow) {
printf("COPYBACK: no disks need copyback\n");
return;
}
badDisk = &raidPtr->Disks[frow][fcol];
#ifndef SIMULATE
#if defined(__NetBSD__) && defined(_KERNEL)
proc = raidPtr->proc; /* XXX Yes, this is not nice.. */
#if 0
printf("Pretending the disk is happy...\n");
retcode = 0; /* XXX this should be set to something more realistic. */
#endif
/* This device may have been opened successfully the first time.
Close it before trying to open it again.. */
if (raidPtr->raid_cinfo[frow][fcol].ci_vp != NULL) {
printf("Closed the open device: %s\n",
raidPtr->Disks[frow][fcol].devname);
(void)vn_close(raidPtr->raid_cinfo[frow][fcol].ci_vp,
FREAD|FWRITE, proc->p_ucred, proc);
}
printf("About to (re-)open the device: %s\n",
raidPtr->Disks[frow][fcol].devname);
retcode = raidlookup(raidPtr->Disks[frow][fcol].devname, proc, &vp);
if (retcode) {
printf("COPYBACK: raidlookup on device: %s failed: %d!\n",
raidPtr->Disks[frow][fcol].devname, retcode);
/* XXX the component isn't responding properly...
must be still dead :-( */
return;
} else {
/* Ok, so we can at least do a lookup... How about actually
getting a vp for it? */
if ((retcode = VOP_GETATTR(vp, &va, proc->p_ucred, proc)) != 0) {
return;
}
retcode = VOP_IOCTL(vp, DIOCGPART, (caddr_t)&dpart,
FREAD, proc->p_ucred, proc);
if (retcode) {
return;
}
raidPtr->Disks[frow][fcol].blockSize = dpart.disklab->d_secsize;
raidPtr->Disks[frow][fcol].numBlocks = dpart.part->p_size -
rf_protectedSectors;
raidPtr->raid_cinfo[frow][fcol].ci_vp = vp;
raidPtr->raid_cinfo[frow][fcol].ci_dev = va.va_rdev;
raidPtr->Disks[frow][fcol].dev = va.va_rdev; /* XXX or the above? */
/* we allow the user to specify that only a fraction of the
* disks should be used this is just for debug: it speeds up
* the parity scan
*/
raidPtr->Disks[frow][fcol].numBlocks =
raidPtr->Disks[frow][fcol].numBlocks *
rf_sizePercentage / 100;
}
#else
if (rf_extract_ids(badDisk->devname, &bus, &targ, &lun)) {
printf("COPYBACK: unable to extract bus, target, lun from devname %s\n",
badDisk->devname);
return;
}
/* TUR the disk that's marked as bad to be sure that it's actually alive */
rf_SCSI_AllocTUR(&tur_op);
retcode = rf_SCSI_DoTUR(tur_op, bus, targ, lun, badDisk->dev);
rf_SCSI_FreeDiskOp(tur_op, 0);
#endif
if (retcode) {
printf("COPYBACK: target disk failed TUR\n");
return;
}
#endif /* !SIMULATE */
/* get a buffer to hold one SU */
RF_Malloc(databuf, rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit), (char *));
/* create a descriptor */
RF_Malloc(desc, sizeof(*desc), (RF_CopybackDesc_t *));
desc->raidPtr = raidPtr;
desc->status = 0;
desc->frow = frow;
desc->fcol = fcol;
desc->spRow = badDisk->spareRow;
desc->spCol = badDisk->spareCol;
desc->stripeAddr = 0;
desc->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
desc->sectPerStripe = raidPtr->Layout.sectorsPerStripeUnit * raidPtr->Layout.numDataCol;
desc->databuf = databuf;
#ifndef SIMULATE
desc->mcpair = rf_AllocMCPair();
#endif /* !SIMULATE */
printf("COPYBACK: Quiescing the array\n");
/* quiesce the array, since we don't want to code support for user accs here */
rf_SuspendNewRequestsAndWait(raidPtr);
/* adjust state of the array and of the disks */
RF_LOCK_MUTEX(raidPtr->mutex);
raidPtr->Disks[desc->frow][desc->fcol].status = rf_ds_optimal;
raidPtr->status[desc->frow] = rf_rs_optimal;
rf_copyback_in_progress = 1; /* debug only */
RF_UNLOCK_MUTEX(raidPtr->mutex);
printf("COPYBACK: Beginning\n");
RF_GETTIME(desc->starttime);
rf_ContinueCopyback(desc);
}
/*
* invoked via callback after a copyback I/O has completed to
* continue on with the next one
*/
void rf_ContinueCopyback(desc)
RF_CopybackDesc_t *desc;
{
RF_SectorNum_t testOffs, stripeAddr;
RF_Raid_t *raidPtr = desc->raidPtr;
RF_RaidAddr_t addr;
RF_RowCol_t testRow, testCol;
int old_pctg, new_pctg, done;
struct timeval t, diff;
old_pctg = (-1);
while (1) {
stripeAddr = desc->stripeAddr;
if (rf_prReconSched) {
old_pctg = 100 * desc->stripeAddr / raidPtr->totalSectors;
}
desc->stripeAddr += desc->sectPerStripe;
if (rf_prReconSched) {
new_pctg = 100 * desc->stripeAddr / raidPtr->totalSectors;
if (new_pctg != old_pctg) {
RF_GETTIME(t);
RF_TIMEVAL_DIFF(&desc->starttime, &t, &diff);
printf("%d %d.%06d\n",new_pctg, (int)diff.tv_sec, (int)diff.tv_usec);
}
}
if (stripeAddr >= raidPtr->totalSectors) {
rf_CopybackComplete(desc, 0);
return;
}
/* walk through the current stripe, su-by-su */
for (done=0, addr = stripeAddr; addr < stripeAddr+desc->sectPerStripe; addr += desc->sectPerSU) {
/* map the SU, disallowing remap to spare space */
(raidPtr->Layout.map->MapSector)(raidPtr, addr, &testRow, &testCol, &testOffs, RF_DONT_REMAP);
if (testRow == desc->frow && testCol == desc->fcol) {
rf_CopybackOne(desc, RF_COPYBACK_DATA, addr, testRow, testCol, testOffs);
#ifdef SIMULATE
return;
#else /* SIMULATE */
done = 1;
break;
#endif /* SIMULATE */
}
}
if (!done) {
/* we didn't find the failed disk in the data part. check parity. */
/* map the parity for this stripe, disallowing remap to spare space */
(raidPtr->Layout.map->MapParity)(raidPtr, stripeAddr, &testRow, &testCol, &testOffs, RF_DONT_REMAP);
if (testRow == desc->frow && testCol == desc->fcol) {
rf_CopybackOne(desc, RF_COPYBACK_PARITY, stripeAddr, testRow, testCol, testOffs);
#ifdef SIMULATE
return;
#endif /* SIMULATE */
}
}
/* check to see if the last read/write pair failed */
if (desc->status) {
rf_CopybackComplete(desc, 1);
return;
}
/* we didn't find any units to copy back in this stripe. Continue with the next one */
}
}
/* copyback one unit */
static void rf_CopybackOne(desc, typ, addr, testRow, testCol, testOffs)
RF_CopybackDesc_t *desc;
int typ;
RF_RaidAddr_t addr;
RF_RowCol_t testRow;
RF_RowCol_t testCol;
RF_SectorNum_t testOffs;
{
RF_SectorCount_t sectPerSU = desc->sectPerSU;
RF_Raid_t *raidPtr = desc->raidPtr;
RF_RowCol_t spRow = desc->spRow;
RF_RowCol_t spCol = desc->spCol;
RF_SectorNum_t spOffs;
/* find the spare spare location for this SU */
if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) {
if (typ == RF_COPYBACK_DATA)
raidPtr->Layout.map->MapSector(raidPtr, addr, &spRow, &spCol, &spOffs, RF_REMAP);
else
raidPtr->Layout.map->MapParity(raidPtr, addr, &spRow, &spCol, &spOffs, RF_REMAP);
} else {
spOffs = testOffs;
}
/* create reqs to read the old location & write the new */
desc->readreq = rf_CreateDiskQueueData(RF_IO_TYPE_READ, spOffs,
sectPerSU, desc->databuf, 0L, 0,
(int (*)(void *,int)) rf_CopybackReadDoneProc, desc,
NULL, NULL, (void *) raidPtr, RF_DISKQUEUE_DATA_FLAGS_NONE, NULL);
desc->writereq = rf_CreateDiskQueueData(RF_IO_TYPE_WRITE, testOffs,
sectPerSU, desc->databuf, 0L, 0,
(int (*)(void *,int)) rf_CopybackWriteDoneProc, desc,
NULL, NULL, (void *) raidPtr, RF_DISKQUEUE_DATA_FLAGS_NONE, NULL);
desc->frow = testRow;
desc->fcol = testCol;
/* enqueue the read. the write will go out as part of the callback on the read.
* at user-level & in the kernel, wait for the read-write pair to complete.
* in the simulator, just return, since everything will happen as callbacks
*/
#ifndef SIMULATE
RF_LOCK_MUTEX(desc->mcpair->mutex);
desc->mcpair->flag = 0;
#endif /* !SIMULATE */
rf_DiskIOEnqueue(&raidPtr->Queues[spRow][spCol], desc->readreq, RF_IO_NORMAL_PRIORITY);
#ifndef SIMULATE
while (!desc->mcpair->flag) {
RF_WAIT_MCPAIR(desc->mcpair);
}
RF_UNLOCK_MUTEX(desc->mcpair->mutex);
rf_FreeDiskQueueData(desc->readreq);
rf_FreeDiskQueueData(desc->writereq);
#endif /* !SIMULATE */
}
/* called at interrupt context when the read has completed. just send out the write */
static int rf_CopybackReadDoneProc(desc, status)
RF_CopybackDesc_t *desc;
int status;
{
if (status) { /* invoke the callback with bad status */
printf("COPYBACK: copyback read failed. Aborting.\n");
(desc->writereq->CompleteFunc)(desc, -100);
}
else {
rf_DiskIOEnqueue(&(desc->raidPtr->Queues[desc->frow][desc->fcol]), desc->writereq, RF_IO_NORMAL_PRIORITY);
}
return(0);
}
/* called at interrupt context when the write has completed.
* at user level & in the kernel, wake up the copyback thread.
* in the simulator, invoke the next copyback directly.
* can't free diskqueuedata structs in the kernel b/c we're at interrupt context.
*/
static int rf_CopybackWriteDoneProc(desc, status)
RF_CopybackDesc_t *desc;
int status;
{
if (status && status != -100) {
printf("COPYBACK: copyback write failed. Aborting.\n");
}
#ifdef SIMULATE
rf_FreeDiskQueueData(desc->readreq);
rf_FreeDiskQueueData(desc->writereq);
if (!status)
rf_ContinueCopyback(desc);
else
rf_CopybackComplete(desc, 1);
#else /* SIMULATE */
desc->status = status;
rf_MCPairWakeupFunc(desc->mcpair);
#endif /* SIMULATE */
return(0);
}
/* invoked when the copyback has completed */
static void rf_CopybackComplete(desc, status)
RF_CopybackDesc_t *desc;
int status;
{
RF_Raid_t *raidPtr = desc->raidPtr;
struct timeval t, diff;
if (!status) {
RF_LOCK_MUTEX(raidPtr->mutex);
if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) {
RF_ASSERT(raidPtr->Layout.map->parityConfig == 'D');
rf_FreeSpareTable(raidPtr);
} else {
raidPtr->Disks[desc->spRow][desc->spCol].status = rf_ds_spare;
}
RF_UNLOCK_MUTEX(raidPtr->mutex);
RF_GETTIME(t);
RF_TIMEVAL_DIFF(&desc->starttime, &t, &diff);
printf("Copyback time was %d.%06d seconds\n",
(int)diff.tv_sec, (int)diff.tv_usec);
} else printf("COPYBACK: Failure.\n");
RF_Free(desc->databuf, rf_RaidAddressToByte(raidPtr, desc->sectPerSU));
#ifndef SIMULATE
rf_FreeMCPair(desc->mcpair);
#endif /* !SIMULATE */
RF_Free(desc, sizeof(*desc));
rf_copyback_in_progress = 0;
rf_ResumeNewRequests(raidPtr);
}

View File

@ -0,0 +1,87 @@
/* $NetBSD: rf_copyback.h,v 1.1 1998/11/13 04:20:27 oster Exp $ */
/*
* rf_copyback.h
*/
/*
* Copyright (c) 1996 Carnegie-Mellon University.
* All rights reserved.
*
* Author: Jim Zelenka
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/*
* :
* Log: rf_copyback.h,v
* Revision 1.5 1996/07/11 19:08:00 jimz
* generalize reconstruction mechanism
* allow raid1 reconstructs via copyback (done with array
* quiesced, not online, therefore not disk-directed)
*
* Revision 1.4 1996/06/10 11:55:47 jimz
* Straightened out some per-array/not-per-array distinctions, fixed
* a couple bugs related to confusion. Added shutdown lists. Removed
* layout shutdown function (now subsumed by shutdown lists).
*
* Revision 1.3 1996/05/24 01:59:45 jimz
* another checkpoint in code cleanup for release
* time to sync kernel tree
*
* Revision 1.2 1996/05/23 21:46:35 jimz
* checkpoint in code cleanup (release prep)
* lots of types, function names have been fixed
*
* Revision 1.1 1996/05/18 19:55:02 jimz
* Initial revision
*
*/
#ifndef _RF__RF_COPYBACK_H_
#define _RF__RF_COPYBACK_H_
#include "rf_types.h"
typedef struct RF_CopybackDesc_s {
RF_Raid_t *raidPtr;
RF_RowCol_t frow;
RF_RowCol_t fcol;
RF_RowCol_t spRow;
RF_RowCol_t spCol;
int status;
RF_StripeNum_t stripeAddr;
RF_SectorCount_t sectPerSU;
RF_SectorCount_t sectPerStripe;
char *databuf;
RF_DiskQueueData_t *readreq;
RF_DiskQueueData_t *writereq;
struct timeval starttime;
#ifndef SIMULATE
RF_MCPair_t *mcpair;
#endif /* !SIMULATE */
} RF_CopybackDesc_t;
extern int rf_copyback_in_progress;
int rf_ConfigureCopyback(RF_ShutdownList_t **listp);
void rf_CopybackReconstructedData(RF_Raid_t *raidPtr);
void rf_ContinueCopyback(RF_CopybackDesc_t *desc);
#endif /* !_RF__RF_COPYBACK_H_ */

View File

@ -0,0 +1,194 @@
/* $NetBSD: rf_cpuutil.c,v 1.1 1998/11/13 04:20:27 oster Exp $ */
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
*
* Authors: Mark Holland, Jim Zelenka
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/*
* rf_cpuutil.c
*
* track cpu utilization
*/
#ifdef _KERNEL
#define KERNEL
#endif
#include "rf_cpuutil.h"
#ifndef KERNEL
#include <errno.h>
#endif /* !KERNEL */
#include "rf_types.h"
#include "rf_general.h"
#include "rf_shutdown.h"
#include "rf_sys.h"
#ifdef __osf__
#include <sys/table.h>
#endif /* __osf__ */
#ifdef AIX
#include <nlist.h>
#include <sys/sysinfo.h>
#endif /* AIX */
#ifdef KERNEL
#ifndef __NetBSD__
#include <sys/dk.h>
#endif /* __NetBSD__ */
#else /* KERNEL */
extern int table(int id, int index, void *addr, int nel, u_int lel);
#endif /* KERNEL */
#ifdef __osf__
static struct tbl_sysinfo start, stop;
#endif /* __osf__ */
#ifdef AIX
static int kmem_fd;
static off_t sysinfo_offset;
static struct sysinfo sysinfo_start, sysinfo_stop;
static struct nlist namelist[] = {
{{"sysinfo"}},
{{""}},
};
#endif /* AIX */
#ifdef AIX
static void rf_ShutdownCpuMonitor(ignored)
void *ignored;
{
close(kmem_fd);
}
#endif /* AIX */
int rf_ConfigureCpuMonitor(listp)
RF_ShutdownList_t **listp;
{
#ifdef AIX
int rc;
rc = knlist(namelist, 1, sizeof(struct nlist));
if (rc) {
RF_ERRORMSG("Could not knlist() to config CPU monitor\n");
return(errno);
}
if (namelist[0].n_value == 0) {
RF_ERRORMSG("Got bogus results from knlist() for CPU monitor\n");
return(EIO);
}
sysinfo_offset = namelist[0].n_value;
kmem_fd = open("/dev/kmem", O_RDONLY);
if (kmem_fd < 0) {
perror("/dev/kmem");
return(errno);
}
rc = rf_ShutdownCreate(listp, rf_ShutdownCpuMonitor, NULL);
if (rc) {
RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", __FILE__,
__LINE__, rc);
rf_ShutdownCpuMonitor(NULL);
return(rc);
}
#endif /* AIX */
return(0);
}
void rf_start_cpu_monitor()
{
#ifdef __osf__
#ifndef KERNEL
if (table(TBL_SYSINFO, 0, &start, 1, sizeof(start)) != 1) {
printf("Unable to get sysinfo for cpu utilization monitor\n");
perror("start_cpu_monitor");
}
#else /* !KERNEL */
/* start.si_user = cp_time[CP_USER];
start.si_nice = cp_time[CP_NICE];
start.si_sys = cp_time[CP_SYS];
start.si_idle = cp_time[CP_IDLE];
start.wait = cp_time[CP_WAIT]; */
#endif /* !KERNEL */
#endif /* __osf__ */
#ifdef AIX
off_t off;
int rc;
off = lseek(kmem_fd, sysinfo_offset, SEEK_SET);
RF_ASSERT(off == sysinfo_offset);
rc = read(kmem_fd, &sysinfo_start, sizeof(struct sysinfo));
if (rc != sizeof(struct sysinfo)) {
RF_ERRORMSG2("Starting CPU monitor: rc=%d != %d\n", rc,
sizeof(struct sysinfo));
}
#endif /* AIX */
}
void rf_stop_cpu_monitor()
{
#ifdef __osf__
#ifndef KERNEL
if (table(TBL_SYSINFO, 0, &stop, 1, sizeof(stop)) != 1) {
printf("Unable to get sysinfo for cpu utilization monitor\n");
perror("stop_cpu_monitor");
}
#else /* !KERNEL */
/* stop.si_user = cp_time[CP_USER];
stop.si_nice = cp_time[CP_NICE];
stop.si_sys = cp_time[CP_SYS];
stop.si_idle = cp_time[CP_IDLE];
stop.wait = cp_time[CP_WAIT]; */
#endif /* !KERNEL */
#endif /* __osf__ */
#ifdef AIX
off_t off;
int rc;
off = lseek(kmem_fd, sysinfo_offset, SEEK_SET);
RF_ASSERT(off == sysinfo_offset);
rc = read(kmem_fd, &sysinfo_stop, sizeof(struct sysinfo));
if (rc != sizeof(struct sysinfo)) {
RF_ERRORMSG2("Stopping CPU monitor: rc=%d != %d\n", rc,
sizeof(struct sysinfo));
}
#endif /* AIX */
}
void rf_print_cpu_util(s)
char *s;
{
#ifdef __osf__
long totalticks, idleticks;
idleticks = stop.si_idle - start.si_idle + stop.wait - start.wait;
totalticks = stop.si_user - start.si_user + stop.si_nice - start.si_nice +
stop.si_sys - start.si_sys + idleticks;
printf("CPU utilization during %s was %d %%\n", s, 100 - 100*idleticks/totalticks);
#endif /* __osf__ */
#ifdef AIX
long idle;
/* XXX compute a percentage here */
idle = (long)(sysinfo_stop.cpu[CPU_IDLE] - sysinfo_start.cpu[CPU_IDLE]);
printf("%ld idle ticks during %s.\n", idle, s);
#endif /* AIX */
}

View File

@ -0,0 +1,56 @@
/* $NetBSD: rf_cpuutil.h,v 1.1 1998/11/13 04:20:27 oster Exp $ */
/*
* rf_cpuutil.h
*/
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
*
* Author: Mark Holland, Jim Zelenka
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/*
* :
* Log: rf_cpuutil.h,v
* Revision 1.3 1996/07/18 22:57:14 jimz
* port simulator to AIX
*
* Revision 1.2 1996/05/24 01:59:45 jimz
* another checkpoint in code cleanup for release
* time to sync kernel tree
*
* Revision 1.1 1996/05/18 19:55:29 jimz
* Initial revision
*
*/
#ifndef _RF__RF_CPUUTIL_H_
#define _RF__RF_CPUUTIL_H_
#include "rf_types.h"
int rf_ConfigureCpuMonitor(RF_ShutdownList_t **listp);
void rf_start_cpu_monitor(void);
void rf_stop_cpu_monitor(void);
void rf_print_cpu_util(char *s);
#endif /* !_RF__RF_CPUUTIL_H_ */

View File

@ -0,0 +1,449 @@
/* $NetBSD: rf_cvscan.c,v 1.1 1998/11/13 04:20:27 oster Exp $ */
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
*
* Author: Mark Holland
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/*******************************************************************************
*
* cvscan.c -- prioritized cvscan disk queueing code.
*
* Nov 9, 1994, adapted from raidSim version (MCH)
*
******************************************************************************/
/*
* :
* Log: rf_cvscan.c,v
* Revision 1.6 1996/07/27 23:36:08 jimz
* Solaris port of simulator
*
* Revision 1.5 1996/07/15 17:22:18 jimz
* nit-pick code cleanup
* resolve stdlib problems on DEC OSF
*
* Revision 1.4 1996/06/09 02:36:46 jimz
* lots of little crufty cleanup- fixup whitespace
* issues, comment #ifdefs, improve typing in some
* places (esp size-related)
*
* Revision 1.3 1996/06/07 22:26:27 jimz
* type-ify which_ru (RF_ReconUnitNum_t)
*
* Revision 1.2 1996/06/07 21:33:04 jimz
* begin using consistent types for sector numbers,
* stripe numbers, row+col numbers, recon unit numbers
*
* Revision 1.1 1996/06/05 19:17:40 jimz
* Initial revision
*
*/
#include "rf_types.h"
#include "rf_alloclist.h"
#include "rf_stripelocks.h"
#include "rf_layout.h"
#include "rf_diskqueue.h"
#include "rf_cvscan.h"
#include "rf_debugMem.h"
#include "rf_general.h"
#include "rf_sys.h"
#define DO_CHECK_STATE(_hdr_) CheckCvscanState((_hdr_), __FILE__, __LINE__)
#define pri_ok(p) ( ((p) == RF_IO_NORMAL_PRIORITY) || ((p) == RF_IO_LOW_PRIORITY))
static void CheckCvscanState(RF_CvscanHeader_t *hdr, char *file, int line)
{
long i, key;
RF_DiskQueueData_t *tmp;
if( hdr->left != (RF_DiskQueueData_t *) NULL )
RF_ASSERT( hdr->left->sectorOffset < hdr->cur_block );
for( key=hdr->cur_block, i=0, tmp=hdr->left;
tmp != (RF_DiskQueueData_t *) NULL;
key=tmp->sectorOffset, i++, tmp=tmp->next )
RF_ASSERT( tmp->sectorOffset <= key
&& tmp->priority == hdr->nxt_priority && pri_ok(tmp->priority) );
RF_ASSERT( i == hdr->left_cnt );
for( key=hdr->cur_block, i=0, tmp=hdr->right;
tmp != (RF_DiskQueueData_t *) NULL;
key=tmp->sectorOffset, i++, tmp=tmp->next )
{
RF_ASSERT(key <= tmp->sectorOffset);
RF_ASSERT(tmp->priority == hdr->nxt_priority);
RF_ASSERT(pri_ok(tmp->priority));
}
RF_ASSERT( i == hdr->right_cnt );
for( key=hdr->nxt_priority-1, tmp=hdr->burner;
tmp != (RF_DiskQueueData_t *) NULL;
key=tmp->priority, tmp=tmp->next )
{
RF_ASSERT(tmp);
RF_ASSERT(hdr);
RF_ASSERT(pri_ok(tmp->priority));
RF_ASSERT(key >= tmp->priority);
RF_ASSERT(tmp->priority < hdr->nxt_priority);
}
}
static void PriorityInsert(RF_DiskQueueData_t **list_ptr, RF_DiskQueueData_t *req )
{
/*
** insert block pointed to by req in to list whose first
** entry is pointed to by the pointer that list_ptr points to
** ie., list_ptr is a grandparent of the first entry
*/
for( ; (*list_ptr)!=(RF_DiskQueueData_t *)NULL &&
(*list_ptr)->priority > req->priority;
list_ptr = &((*list_ptr)->next) ) {}
req->next = (*list_ptr);
(*list_ptr) = req;
}
static void ReqInsert(RF_DiskQueueData_t **list_ptr, RF_DiskQueueData_t *req, RF_CvscanArmDir_t order)
{
/*
** insert block pointed to by req in to list whose first
** entry is pointed to by the pointer that list_ptr points to
** ie., list_ptr is a grandparent of the first entry
*/
for( ; (*list_ptr)!=(RF_DiskQueueData_t *)NULL &&
( (order==rf_cvscan_RIGHT && (*list_ptr)->sectorOffset <= req->sectorOffset)
|| (order==rf_cvscan_LEFT && (*list_ptr)->sectorOffset > req->sectorOffset) );
list_ptr = &((*list_ptr)->next) ) {}
req->next = (*list_ptr);
(*list_ptr) = req;
}
static RF_DiskQueueData_t *ReqDequeue(RF_DiskQueueData_t **list_ptr)
{
RF_DiskQueueData_t * ret = (*list_ptr);
if( (*list_ptr) != (RF_DiskQueueData_t *) NULL ) {
(*list_ptr) = (*list_ptr)->next;
}
return( ret );
}
static void ReBalance(RF_CvscanHeader_t *hdr)
{
/* DO_CHECK_STATE(hdr); */
while( hdr->right != (RF_DiskQueueData_t *) NULL
&& hdr->right->sectorOffset < hdr->cur_block ) {
hdr->right_cnt--;
hdr->left_cnt++;
ReqInsert( &hdr->left, ReqDequeue( &hdr->right ), rf_cvscan_LEFT );
}
/* DO_CHECK_STATE(hdr); */
}
static void Transfer(RF_DiskQueueData_t **to_list_ptr, RF_DiskQueueData_t **from_list_ptr )
{
RF_DiskQueueData_t *gp;
for( gp=(*from_list_ptr); gp != (RF_DiskQueueData_t *) NULL; ) {
RF_DiskQueueData_t *p = gp->next;
PriorityInsert( to_list_ptr, gp );
gp = p;
}
(*from_list_ptr) = (RF_DiskQueueData_t *) NULL;
}
static void RealEnqueue(RF_CvscanHeader_t *hdr, RF_DiskQueueData_t *req)
{
RF_ASSERT(req->priority == RF_IO_NORMAL_PRIORITY || req->priority == RF_IO_LOW_PRIORITY);
DO_CHECK_STATE(hdr);
if( hdr->left_cnt == 0 && hdr->right_cnt == 0 ) {
hdr->nxt_priority = req->priority;
}
if( req->priority > hdr->nxt_priority ) {
/*
** dump all other outstanding requests on the back burner
*/
Transfer( &hdr->burner, &hdr->left );
Transfer( &hdr->burner, &hdr->right );
hdr->left_cnt = 0;
hdr->right_cnt = 0;
hdr->nxt_priority = req->priority;
}
if( req->priority < hdr->nxt_priority ) {
/*
** yet another low priority task!
*/
PriorityInsert( &hdr->burner, req );
} else {
if( req->sectorOffset < hdr->cur_block ) {
/* this request is to the left of the current arms */
ReqInsert( &hdr->left, req, rf_cvscan_LEFT );
hdr->left_cnt++;
} else {
/* this request is to the right of the current arms */
ReqInsert( &hdr->right, req, rf_cvscan_RIGHT );
hdr->right_cnt++;
}
}
DO_CHECK_STATE(hdr);
}
void rf_CvscanEnqueue(void *q_in, RF_DiskQueueData_t *elem, int priority)
{
RF_CvscanHeader_t *hdr = (RF_CvscanHeader_t *) q_in;
RealEnqueue( hdr, elem /*req*/ );
}
RF_DiskQueueData_t *rf_CvscanDequeue(void *q_in)
{
RF_CvscanHeader_t *hdr = (RF_CvscanHeader_t *) q_in;
long range, i, sum_dist_left, sum_dist_right;
RF_DiskQueueData_t *ret;
RF_DiskQueueData_t *tmp;
DO_CHECK_STATE(hdr);
if( hdr->left_cnt == 0 && hdr->right_cnt == 0 ) return( (RF_DiskQueueData_t *) NULL );
range = RF_MIN( hdr->range_for_avg, RF_MIN(hdr->left_cnt,hdr->right_cnt));
for( i=0, tmp=hdr->left, sum_dist_left=
((hdr->direction==rf_cvscan_RIGHT)?range*hdr->change_penalty:0);
tmp != (RF_DiskQueueData_t *) NULL && i < range;
tmp = tmp->next, i++ ) {
sum_dist_left += hdr->cur_block - tmp->sectorOffset;
}
for( i=0, tmp=hdr->right, sum_dist_right=
((hdr->direction==rf_cvscan_LEFT)?range*hdr->change_penalty:0);
tmp != (RF_DiskQueueData_t *) NULL && i < range;
tmp = tmp->next, i++ ) {
sum_dist_right += tmp->sectorOffset - hdr->cur_block;
}
if( hdr->right_cnt == 0 || sum_dist_left < sum_dist_right ) {
hdr->direction = rf_cvscan_LEFT;
hdr->cur_block = hdr->left->sectorOffset + hdr->left->numSector;
hdr->left_cnt = RF_MAX(hdr->left_cnt-1,0);
tmp = hdr->left;
ret = (ReqDequeue(&hdr->left))/*->parent*/;
} else {
hdr->direction = rf_cvscan_RIGHT;
hdr->cur_block = hdr->right->sectorOffset + hdr->right->numSector;
hdr->right_cnt = RF_MAX(hdr->right_cnt-1,0);
tmp = hdr->right;
ret = (ReqDequeue(&hdr->right))/*->parent*/;
}
ReBalance( hdr );
if( hdr->left_cnt == 0 && hdr->right_cnt == 0
&& hdr->burner != (RF_DiskQueueData_t *) NULL ) {
/*
** restore low priority requests for next dequeue
*/
RF_DiskQueueData_t *burner = hdr->burner;
hdr->nxt_priority = burner->priority;
while( burner != (RF_DiskQueueData_t *) NULL
&& burner->priority == hdr->nxt_priority ) {
RF_DiskQueueData_t *next = burner->next;
RealEnqueue( hdr, burner );
burner = next;
}
hdr->burner = burner;
}
DO_CHECK_STATE(hdr);
return( ret );
}
RF_DiskQueueData_t *rf_CvscanPeek(void *q_in)
{
RF_CvscanHeader_t *hdr = (RF_CvscanHeader_t *) q_in;
long range, i, sum_dist_left, sum_dist_right;
RF_DiskQueueData_t *tmp, *headElement;
DO_CHECK_STATE(hdr);
if( hdr->left_cnt == 0 && hdr->right_cnt == 0 )
headElement = NULL;
else {
range = RF_MIN( hdr->range_for_avg, RF_MIN(hdr->left_cnt,hdr->right_cnt));
for( i=0, tmp=hdr->left, sum_dist_left=
((hdr->direction==rf_cvscan_RIGHT)?range*hdr->change_penalty:0);
tmp != (RF_DiskQueueData_t *) NULL && i < range;
tmp = tmp->next, i++ ) {
sum_dist_left += hdr->cur_block - tmp->sectorOffset;
}
for( i=0, tmp=hdr->right, sum_dist_right=
((hdr->direction==rf_cvscan_LEFT)?range*hdr->change_penalty:0);
tmp != (RF_DiskQueueData_t *) NULL && i < range;
tmp = tmp->next, i++ ) {
sum_dist_right += tmp->sectorOffset - hdr->cur_block;
}
if( hdr->right_cnt == 0 || sum_dist_left < sum_dist_right )
headElement = hdr->left;
else
headElement = hdr->right;
}
return(headElement);
}
/*
** CVSCAN( 1, 0 ) is Shortest Seek Time First (SSTF)
** lowest average response time
** CVSCAN( 1, infinity ) is SCAN
** lowest response time standard deviation
*/
int rf_CvscanConfigure()
{
return(0);
}
void *rf_CvscanCreate(RF_SectorCount_t sectPerDisk,
RF_AllocListElem_t *clList,
RF_ShutdownList_t **listp)
{
RF_CvscanHeader_t *hdr;
long range = 2; /* Currently no mechanism to change these */
long penalty = sectPerDisk / 5;
RF_MallocAndAdd(hdr, sizeof(RF_CvscanHeader_t), (RF_CvscanHeader_t *), clList);
bzero((char *)hdr, sizeof(RF_CvscanHeader_t));
hdr->range_for_avg = RF_MAX( range, 1 );
hdr->change_penalty = RF_MAX( penalty, 0 );
hdr->direction = rf_cvscan_RIGHT;
hdr->cur_block = 0;
hdr->left_cnt = hdr->right_cnt = 0;
hdr->left = hdr->right = (RF_DiskQueueData_t *) NULL;
hdr->burner = (RF_DiskQueueData_t *) NULL;
DO_CHECK_STATE(hdr);
return( (void *) hdr );
}
#if defined(__NetBSD__) && defined(_KERNEL)
/* PrintCvscanQueue is not used, so we ignore it... */
#else
static void PrintCvscanQueue(RF_CvscanHeader_t *hdr)
{
RF_DiskQueueData_t *tmp;
printf( "CVSCAN(%d,%d) at %d going %s\n",
(int)hdr->range_for_avg,
(int)hdr->change_penalty,
(int)hdr->cur_block,
(hdr->direction==rf_cvscan_LEFT)?"LEFT":"RIGHT" );
printf( "\tLeft(%d): ", hdr->left_cnt );
for( tmp = hdr->left; tmp != (RF_DiskQueueData_t *) NULL; tmp = tmp->next)
printf( "(%d,%ld,%d) ",
(int) tmp->sectorOffset,
(long) (tmp->sectorOffset + tmp->numSector),
tmp->priority );
printf( "\n" );
printf( "\tRight(%d): ", hdr->right_cnt );
for( tmp = hdr->right; tmp != (RF_DiskQueueData_t *) NULL; tmp = tmp->next)
printf( "(%d,%ld,%d) ",
(int) tmp->sectorOffset,
(long) (tmp->sectorOffset + tmp->numSector),
tmp->priority );
printf( "\n" );
printf( "\tBurner: " );
for( tmp = hdr->burner; tmp != (RF_DiskQueueData_t *) NULL; tmp = tmp->next)
printf( "(%d,%ld,%d) ",
(int) tmp->sectorOffset,
(long) (tmp->sectorOffset + tmp->numSector),
tmp->priority );
printf( "\n" );
}
#endif
/* promotes reconstruction accesses for the given stripeID to normal priority.
* returns 1 if an access was found and zero otherwise. Normally, we should
* only have one or zero entries in the burner queue, so execution time should
* be short.
*/
int rf_CvscanPromote(void *q_in, RF_StripeNum_t parityStripeID, RF_ReconUnitNum_t which_ru)
{
RF_CvscanHeader_t *hdr = (RF_CvscanHeader_t *) q_in;
RF_DiskQueueData_t *trailer, *tmp = hdr->burner, *tlist = NULL;
int retval=0;
DO_CHECK_STATE(hdr);
while (tmp) { /* handle entries at the front of the list */
if (tmp->parityStripeID == parityStripeID && tmp->which_ru == which_ru) {
hdr->burner = tmp->next;
tmp->priority = RF_IO_NORMAL_PRIORITY;
tmp->next = tlist; tlist=tmp;
tmp = hdr->burner;
} else break;
}
if (tmp) {trailer=tmp; tmp=tmp->next;}
while (tmp) { /* handle entries on the rest of the list */
if (tmp->parityStripeID == parityStripeID && tmp->which_ru == which_ru) {
trailer->next = tmp->next;
tmp->priority = RF_IO_NORMAL_PRIORITY;
tmp->next = tlist; tlist=tmp; /* insert on a temp queue */
tmp = trailer->next;
} else {
trailer=tmp; tmp=tmp->next;
}
}
while (tlist) {
retval++;
tmp = tlist->next;
RealEnqueue(hdr, tlist);
tlist = tmp;
}
RF_ASSERT(retval==0 || retval==1);
DO_CHECK_STATE((RF_CvscanHeader_t *)q_in);
return(retval);
}

View File

@ -0,0 +1,96 @@
/* $NetBSD: rf_cvscan.h,v 1.1 1998/11/13 04:20:27 oster Exp $ */
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
*
* Author: Mark Holland
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/*
** Disk scheduling by CVSCAN( N, r )
**
** Given a set of requests, partition them into one set on each
** side of the current arm position. The trick is to pick which
** side you are going to service next; once a side is picked you will
** service the closest request.
** Let there be n1 requests on one side and n2 requests on the other
** side. If one of n1 or n2 is zero, select the other side.
** If both n1 and n2 are nonzero, select a "range" for examination
** that is N' = min( n1, n2, N ). Average the distance from the
** current position to the nearest N' requests on each side giving
** d1 and d2.
** Suppose the last decision was to move toward set 2, then the
** current direction is toward set 2, and you will only switch to set
** 1 if d1+R < d2 where R is r*(total number of cylinders), r in [0,1].
**
** I extend this by applying only to the set of requests that all
** share the same, highest priority level.
*/
/* :
* Log: rf_cvscan.h,v
* Revision 1.3 1996/06/07 22:26:27 jimz
* type-ify which_ru (RF_ReconUnitNum_t)
*
* Revision 1.2 1996/06/07 21:33:04 jimz
* begin using consistent types for sector numbers,
* stripe numbers, row+col numbers, recon unit numbers
*
* Revision 1.1 1996/06/05 19:17:40 jimz
* Initial revision
*
*/
#ifndef _RF__RF_CVSCAN_H_
#define _RF__RF_CVSCAN_H_
#include "rf_diskqueue.h"
typedef enum RF_CvscanArmDir_e {
rf_cvscan_LEFT,
rf_cvscan_RIGHT
} RF_CvscanArmDir_t;
typedef struct RF_CvscanHeader_s {
long range_for_avg; /* CVSCAN param N */
long change_penalty; /* CVSCAN param R */
RF_CvscanArmDir_t direction;
RF_SectorNum_t cur_block;
int nxt_priority;
RF_DiskQueueData_t *left;
int left_cnt;
RF_DiskQueueData_t *right;
int right_cnt;
RF_DiskQueueData_t *burner;
} RF_CvscanHeader_t;
int rf_CvscanConfigure(void);
void *rf_CvscanCreate(RF_SectorCount_t sect_per_disk,
RF_AllocListElem_t *cl_list, RF_ShutdownList_t **listp);
void rf_CvscanEnqueue(void *qptr, RF_DiskQueueData_t *req, int priority);
RF_DiskQueueData_t *rf_CvscanDequeue(void *qptr);
RF_DiskQueueData_t *rf_CvscanPeek(void *qptr);
int rf_CvscanPromote(void *qptr, RF_StripeNum_t parityStripeID,
RF_ReconUnitNum_t which_ru);
#endif /* !_RF__RF_CVSCAN_H_ */

319
sys/dev/raidframe/rf_dag.h Normal file
View File

@ -0,0 +1,319 @@
/* $NetBSD: rf_dag.h,v 1.1 1998/11/13 04:20:27 oster Exp $ */
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
*
* Author: William V. Courtright II, Mark Holland
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/****************************************************************************
* *
* dag.h -- header file for DAG-related data structures *
* *
****************************************************************************/
/*
*
* :
* Log: rf_dag.h,v
* Revision 1.35 1996/11/05 18:38:37 jimz
* add patch from galvarez@cs.ucsd.edu (Guillermo Alvarez)
* to fix dag_params memory-sizing problem (should be an array
* of the type, not an array of pointers to the type)
*
* Revision 1.34 1996/07/28 20:31:39 jimz
* i386netbsd port
* true/false fixup
*
* Revision 1.33 1996/07/22 19:52:16 jimz
* switched node params to RF_DagParam_t, a union of
* a 64-bit int and a void *, for better portability
* attempted hpux port, but failed partway through for
* lack of a single C compiler capable of compiling all
* source files
*
* Revision 1.32 1996/06/10 22:22:13 wvcii
* added two node status types for use in backward error
* recovery experiments.
*
* Revision 1.31 1996/06/09 02:36:46 jimz
* lots of little crufty cleanup- fixup whitespace
* issues, comment #ifdefs, improve typing in some
* places (esp size-related)
*
* Revision 1.30 1996/06/07 22:49:18 jimz
* fix up raidPtr typing
*
* Revision 1.29 1996/05/27 18:56:37 jimz
* more code cleanup
* better typing
* compiles in all 3 environments
*
* Revision 1.28 1996/05/24 22:17:04 jimz
* continue code + namespace cleanup
* typed a bunch of flags
*
* Revision 1.27 1996/05/24 04:28:55 jimz
* release cleanup ckpt
*
* Revision 1.26 1996/05/23 21:46:35 jimz
* checkpoint in code cleanup (release prep)
* lots of types, function names have been fixed
*
* Revision 1.25 1996/05/23 00:33:23 jimz
* code cleanup: move all debug decls to rf_options.c, all extern
* debug decls to rf_options.h, all debug vars preceded by rf_
*
* Revision 1.24 1996/05/18 19:51:34 jimz
* major code cleanup- fix syntax, make some types consistent,
* add prototypes, clean out dead code, et cetera
*
* Revision 1.23 1996/05/16 23:05:20 jimz
* Added dag_ptrs field, RF_DAG_PTRCACHESIZE
*
* The dag_ptrs field of the node is basically some scribble
* space to be used here. We could get rid of it, and always
* allocate the range of pointers, but that's expensive. So,
* we pick a "common case" size for the pointer cache. Hopefully,
* we'll find that:
* (1) Generally, nptrs doesn't exceed RF_DAG_PTRCACHESIZE by
* only a little bit (least efficient case)
* (2) Generally, ntprs isn't a lot less than RF_DAG_PTRCACHESIZE
* (wasted memory)
*
* Revision 1.22 1996/05/08 21:01:24 jimz
* fixed up enum type names that were conflicting with other
* enums and function names (ie, "panic")
* future naming trends will be towards RF_ and rf_ for
* everything raidframe-related
*
* Revision 1.21 1996/05/08 15:23:47 wvcii
* added new node states: undone, recover, panic
*
* Revision 1.20 1995/12/01 14:59:19 root
* increased MAX_ANTECEDENTS from 10 to 20
* should consider getting rid of this (eliminate static array)
*
* Revision 1.19 1995/11/30 15:58:59 wvcii
* added copyright info
*
* Revision 1.18 1995/11/19 16:27:03 wvcii
* created struct dagList
*
* Revision 1.17 1995/11/07 15:43:01 wvcii
* added static array to DAGnode: antType
* added commitNode type
* added commit node counts to dag header
* added ptr (firstDag) to support multi-dag requests
* added succedent done/fired counts to nodes to support rollback
* added node status type "skipped"
* added hdr status types "rollForward, rollBackward"
* deleted hdr status type "disable"
* updated ResetNode & ResetDAGHeader to zero new fields
*
*/
#ifndef _RF__RF_DAG_H_
#define _RF__RF_DAG_H_
#include "rf_types.h"
#include "rf_threadstuff.h"
#include "rf_alloclist.h"
#include "rf_stripelocks.h"
#include "rf_layout.h"
#include "rf_dagflags.h"
#include "rf_acctrace.h"
#include "rf_memchunk.h"
#define RF_THREAD_CONTEXT 0 /* we were invoked from thread context */
#define RF_INTR_CONTEXT 1 /* we were invoked from interrupt context */
#define RF_MAX_ANTECEDENTS 20 /* max num of antecedents a node may posses */
#ifdef KERNEL
#include <sys/buf.h>
#endif /* KERNEL */
struct RF_PropHeader_s { /* structure for propagation of results */
int resultNum; /* bind result # resultNum */
int paramNum; /* to parameter # paramNum */
RF_PropHeader_t *next; /* linked list for multiple results/params */
};
typedef enum RF_NodeStatus_e {
rf_bwd1, /* node is ready for undo logging (backward error recovery only) */
rf_bwd2, /* node has completed undo logging (backward error recovery only) */
rf_wait, /* node is waiting to be executed */
rf_fired, /* node is currently executing its do function */
rf_good, /* node successfully completed execution of its do function */
rf_bad, /* node failed to successfully execute its do function */
rf_skipped, /* not used anymore, used to imply a node was not executed */
rf_recover, /* node is currently executing its undo function */
rf_panic, /* node failed to successfully execute its undo function */
rf_undone /* node successfully executed its undo function */
} RF_NodeStatus_t;
/*
* These were used to control skipping a node.
* Now, these are only used as comments.
*/
typedef enum RF_AntecedentType_e {
rf_trueData,
rf_antiData,
rf_outputData,
rf_control
} RF_AntecedentType_t;
#define RF_DAG_PTRCACHESIZE 40
#define RF_DAG_PARAMCACHESIZE 12
typedef RF_uint8 RF_DagNodeFlags_t;
struct RF_DagNode_s {
RF_NodeStatus_t status; /* current status of this node */
int (*doFunc)(RF_DagNode_t *); /* normal function */
int (*undoFunc)(RF_DagNode_t *); /* func to remove effect of doFunc */
int (*wakeFunc)(RF_DagNode_t *, int status); /* func called when the node completes an I/O */
int numParams; /* number of parameters required by *funcPtr */
int numResults; /* number of results produced by *funcPtr */
int numAntecedents; /* number of antecedents */
int numAntDone; /* number of antecedents which have finished */
int numSuccedents; /* number of succedents */
int numSuccFired; /* incremented when a succedent is fired during forward execution */
int numSuccDone; /* incremented when a succedent finishes during rollBackward */
int commitNode; /* boolean flag - if true, this is a commit node */
RF_DagNode_t **succedents; /* succedents, array size numSuccedents */
RF_DagNode_t **antecedents; /* antecedents, array size numAntecedents */
RF_AntecedentType_t antType[RF_MAX_ANTECEDENTS]; /* type of each antecedent */
void **results; /* array of results produced by *funcPtr */
RF_DagParam_t *params; /* array of parameters required by *funcPtr */
RF_PropHeader_t **propList; /* propagation list, size numSuccedents */
RF_DagHeader_t *dagHdr; /* ptr to head of dag containing this node */
void *dagFuncData; /* dag execution func uses this for whatever it wants */
RF_DagNode_t *next;
int nodeNum; /* used by PrintDAG for debug only */
int visited; /* used to avoid re-visiting nodes on DAG walks */
/* ANY CODE THAT USES THIS FIELD MUST MAINTAIN THE PROPERTY
* THAT AFTER IT FINISHES, ALL VISITED FLAGS IN THE DAG ARE IDENTICAL */
char *name; /* debug only */
RF_DagNodeFlags_t flags; /* see below */
RF_DagNode_t *dag_ptrs[RF_DAG_PTRCACHESIZE]; /* cache for performance */
RF_DagParam_t dag_params[RF_DAG_PARAMCACHESIZE]; /* cache for performance */
};
/*
* Bit values for flags field of RF_DagNode_t
*/
#define RF_DAGNODE_FLAG_NONE 0x00
#define RF_DAGNODE_FLAG_YIELD 0x01 /* in the kernel, yield the processor before firing this node */
/* enable - DAG ready for normal execution, no errors encountered
* rollForward - DAG encountered an error after commit point, rolling forward
* rollBackward - DAG encountered an error prior to commit point, rolling backward
*/
typedef enum RF_DagStatus_e {
rf_enable,
rf_rollForward,
rf_rollBackward
} RF_DagStatus_t;
#define RF_MAX_HDR_SUCC 1
#define RF_MAXCHUNKS 10
struct RF_DagHeader_s {
RF_DagStatus_t status; /* status of this DAG */
int numSuccedents; /* DAG may be a tree, i.e. may have > 1 root */
int numCommitNodes; /* number of commit nodes in graph */
int numCommits; /* number of commit nodes which have been fired */
RF_DagNode_t *succedents[RF_MAX_HDR_SUCC]; /* array of succedents, size numSuccedents */
RF_DagHeader_t *next; /* ptr to allow a list of dags */
RF_AllocListElem_t *allocList; /* ptr to list of ptrs to be freed prior to freeing DAG */
RF_AccessStripeMapHeader_t *asmList; /* list of access stripe maps to be freed */
int nodeNum; /* used by PrintDAG for debug only */
int numNodesCompleted;
RF_AccTraceEntry_t *tracerec; /* perf mon only */
void (*cbFunc)(void *); /* function to call when the dag completes */
void *cbArg; /* argument for cbFunc */
char *creator; /* name of function used to create this dag */
RF_Raid_t *raidPtr; /* the descriptor for the RAID device this DAG is for */
void *bp; /* the bp for this I/O passed down from the file system. ignored outside kernel */
RF_ChunkDesc_t *memChunk[RF_MAXCHUNKS]; /* experimental- Chunks of memory to be retained upon DAG free for re-use */
int chunkIndex; /* the idea is to avoid calls to alloc and free */
RF_ChunkDesc_t **xtraMemChunk; /* escape hatch which allows SelectAlgorithm to merge memChunks from several dags */
int xtraChunkIndex; /* number of ptrs to valid chunks */
int xtraChunkCnt; /* number of ptrs to chunks allocated */
#ifdef SIMULATE
int done; /* Tag to tell if termination node has been fired */
#endif /* SIMULATE */
};
struct RF_DagList_s {
/* common info for a list of dags which will be fired sequentially */
int numDags; /* number of dags in the list */
int numDagsFired; /* number of dags in list which have initiated execution */
int numDagsDone; /* number of dags in list which have completed execution */
RF_DagHeader_t *dags; /* list of dags */
RF_RaidAccessDesc_t *desc; /* ptr to descriptor for this access */
RF_AccTraceEntry_t tracerec; /* perf mon info for dags (not user info) */
};
/* resets a node so that it can be fired again */
#define RF_ResetNode(_n_) { \
(_n_)->status = rf_wait; \
(_n_)->numAntDone = 0; \
(_n_)->numSuccFired = 0; \
(_n_)->numSuccDone = 0; \
(_n_)->next = NULL; \
}
#ifdef SIMULATE
#define RF_ResetDagHeader(_h_) { \
(_h_)->done = RF_FALSE; \
(_h_)->numNodesCompleted = 0; \
(_h_)->numCommits = 0; \
(_h_)->status = rf_enable; \
}
#else /* SIMULATE */
#define RF_ResetDagHeader(_h_) { \
(_h_)->numNodesCompleted = 0; \
(_h_)->numCommits = 0; \
(_h_)->status = rf_enable; \
}
#endif /* SIMULATE */
/* convience macro for declaring a create dag function */
#define RF_CREATE_DAG_FUNC_DECL(_name_) \
void _name_ ( \
RF_Raid_t *raidPtr, \
RF_AccessStripeMap_t *asmap, \
RF_DagHeader_t *dag_h, \
void *bp, \
RF_RaidAccessFlags_t flags, \
RF_AllocListElem_t *allocList)
#endif /* !_RF__RF_DAG_H_ */

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,87 @@
/* $NetBSD: rf_dagdegrd.h,v 1.1 1998/11/13 04:20:27 oster Exp $ */
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
*
* Author: Mark Holland, Daniel Stodolsky, William V. Courtright II
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/*
* :
* Log: rf_dagdegrd.h,v
* Revision 1.6 1996/07/31 16:29:06 jimz
* asm/asmap re-fix (EO merge)
*
* Revision 1.5 1996/07/31 15:34:40 jimz
* evenodd changes; bugfixes for double-degraded archs, generalize
* some formerly PQ-only functions
*
* Revision 1.4 1996/07/22 19:52:16 jimz
* switched node params to RF_DagParam_t, a union of
* a 64-bit int and a void *, for better portability
* attempted hpux port, but failed partway through for
* lack of a single C compiler capable of compiling all
* source files
*
* Revision 1.3 1996/05/24 22:17:04 jimz
* continue code + namespace cleanup
* typed a bunch of flags
*
* Revision 1.2 1996/05/23 21:46:35 jimz
* checkpoint in code cleanup (release prep)
* lots of types, function names have been fixed
*
* Revision 1.1 1996/05/03 19:22:06 wvcii
* Initial revision
*
*/
#ifndef _RF__RF_DAGDEGRD_H_
#define _RF__RF_DAGDEGRD_H_
#include "rf_types.h"
/* degraded read DAG creation routines */
void rf_CreateRaidFiveDegradedReadDAG(RF_Raid_t *raidPtr,
RF_AccessStripeMap_t *asmap, RF_DagHeader_t *dag_h, void *bp,
RF_RaidAccessFlags_t flags, RF_AllocListElem_t *allocList);
void rf_CreateRaidOneDegradedReadDAG(RF_Raid_t *raidPtr,
RF_AccessStripeMap_t *asmap, RF_DagHeader_t *dag_h, void *bp,
RF_RaidAccessFlags_t flags, RF_AllocListElem_t *allocList);
void rf_CreateDegradedReadDAG(RF_Raid_t *raidPtr,
RF_AccessStripeMap_t *asmap, RF_DagHeader_t *dag_h, void *bp,
RF_RaidAccessFlags_t flags, RF_AllocListElem_t *allocList,
RF_RedFuncs_t *recFunc);
void rf_CreateRaidCDegradedReadDAG(RF_Raid_t *raidPtr,
RF_AccessStripeMap_t *asmap, RF_DagHeader_t *dag_h, void *bp,
RF_RaidAccessFlags_t flags, RF_AllocListElem_t *allocList);
void rf_DD_GenerateFailedAccessASMs(RF_Raid_t *raidPtr,
RF_AccessStripeMap_t *asmap, RF_PhysDiskAddr_t **pdap,
int *nNodep, RF_PhysDiskAddr_t **pqpdap, int *nPQNodep,
RF_AllocListElem_t *allocList);
void rf_DoubleDegRead(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
RF_DagHeader_t *dag_h, void *bp, RF_RaidAccessFlags_t flags,
RF_AllocListElem_t *allocList, char *redundantReadNodeName,
char *recoveryNodeName, int (*recovFunc)(RF_DagNode_t *));
#endif /* !_RF__RF_DAGDEGRD_H_ */

View File

@ -0,0 +1,968 @@
/* $NetBSD: rf_dagdegwr.c,v 1.1 1998/11/13 04:20:27 oster Exp $ */
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
*
* Author: Mark Holland, Daniel Stodolsky, William V. Courtright II
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/*
* rf_dagdegwr.c
*
* code for creating degraded write DAGs
*
* :
* Log: rf_dagdegwr.c,v
* Revision 1.23 1996/11/05 21:10:40 jimz
* failed pda generalization
*
* Revision 1.22 1996/08/23 14:49:48 jimz
* remove bogus assert from small write double deg DAG generator
*
* Revision 1.21 1996/08/21 05:09:44 jimz
* get rid of bogus fakery in DoubleDegSmallWrite
*
* Revision 1.20 1996/08/21 04:14:35 jimz
* cleanup doubledegsmallwrite
* NOTE: we need doubledeglargewrite
*
* Revision 1.19 1996/08/19 21:39:38 jimz
* CommonCreateSimpleDegradedWriteDAG() was unable to correctly create DAGs for
* complete stripe overwrite accesses- it assumed the necessity to read old
* data. Rather than do the "right" thing, and risk breaking a critical DAG so
* close to release, I made a no-op read node to stick in and link up in this
* case. Seems to work.
*
* Revision 1.18 1996/07/31 15:35:34 jimz
* evenodd changes; bugfixes for double-degraded archs, generalize
* some formerly PQ-only functions
*
* Revision 1.17 1996/07/28 20:31:39 jimz
* i386netbsd port
* true/false fixup
*
* Revision 1.16 1996/07/27 23:36:08 jimz
* Solaris port of simulator
*
* Revision 1.15 1996/07/27 16:30:19 jimz
* cleanup sweep
*
* Revision 1.14 1996/07/22 19:52:16 jimz
* switched node params to RF_DagParam_t, a union of
* a 64-bit int and a void *, for better portability
* attempted hpux port, but failed partway through for
* lack of a single C compiler capable of compiling all
* source files
*
* Revision 1.13 1996/06/09 02:36:46 jimz
* lots of little crufty cleanup- fixup whitespace
* issues, comment #ifdefs, improve typing in some
* places (esp size-related)
*
* Revision 1.12 1996/06/07 22:26:27 jimz
* type-ify which_ru (RF_ReconUnitNum_t)
*
* Revision 1.11 1996/06/07 21:33:04 jimz
* begin using consistent types for sector numbers,
* stripe numbers, row+col numbers, recon unit numbers
*
* Revision 1.10 1996/05/31 22:26:54 jimz
* fix a lot of mapping problems, memory allocation problems
* found some weird lock issues, fixed 'em
* more code cleanup
*
* Revision 1.9 1996/05/30 11:29:41 jimz
* Numerous bug fixes. Stripe lock release code disagreed with the taking code
* about when stripes should be locked (I made it consistent: no parity, no lock)
* There was a lot of extra serialization of I/Os which I've removed- a lot of
* it was to calculate values for the cache code, which is no longer with us.
* More types, function, macro cleanup. Added code to properly quiesce the array
* on shutdown. Made a lot of stuff array-specific which was (bogusly) general
* before. Fixed memory allocation, freeing bugs.
*
* Revision 1.8 1996/05/27 18:56:37 jimz
* more code cleanup
* better typing
* compiles in all 3 environments
*
* Revision 1.7 1996/05/24 22:17:04 jimz
* continue code + namespace cleanup
* typed a bunch of flags
*
* Revision 1.6 1996/05/24 04:28:55 jimz
* release cleanup ckpt
*
* Revision 1.5 1996/05/23 21:46:35 jimz
* checkpoint in code cleanup (release prep)
* lots of types, function names have been fixed
*
* Revision 1.4 1996/05/23 00:33:23 jimz
* code cleanup: move all debug decls to rf_options.c, all extern
* debug decls to rf_options.h, all debug vars preceded by rf_
*
* Revision 1.3 1996/05/18 19:51:34 jimz
* major code cleanup- fix syntax, make some types consistent,
* add prototypes, clean out dead code, et cetera
*
* Revision 1.2 1996/05/08 21:01:24 jimz
* fixed up enum type names that were conflicting with other
* enums and function names (ie, "panic")
* future naming trends will be towards RF_ and rf_ for
* everything raidframe-related
*
* Revision 1.1 1996/05/03 19:21:50 wvcii
* Initial revision
*
*/
#include "rf_types.h"
#include "rf_raid.h"
#include "rf_dag.h"
#include "rf_dagutils.h"
#include "rf_dagfuncs.h"
#include "rf_threadid.h"
#include "rf_debugMem.h"
#include "rf_memchunk.h"
#include "rf_general.h"
#include "rf_dagdegwr.h"
#include "rf_sys.h"
/******************************************************************************
*
* General comments on DAG creation:
*
* All DAGs in this file use roll-away error recovery. Each DAG has a single
* commit node, usually called "Cmt." If an error occurs before the Cmt node
* is reached, the execution engine will halt forward execution and work
* backward through the graph, executing the undo functions. Assuming that
* each node in the graph prior to the Cmt node are undoable and atomic - or -
* does not make changes to permanent state, the graph will fail atomically.
* If an error occurs after the Cmt node executes, the engine will roll-forward
* through the graph, blindly executing nodes until it reaches the end.
* If a graph reaches the end, it is assumed to have completed successfully.
*
* A graph has only 1 Cmt node.
*
*/
/******************************************************************************
*
* The following wrappers map the standard DAG creation interface to the
* DAG creation routines. Additionally, these wrappers enable experimentation
* with new DAG structures by providing an extra level of indirection, allowing
* the DAG creation routines to be replaced at this single point.
*/
static RF_CREATE_DAG_FUNC_DECL(rf_CreateSimpleDegradedWriteDAG)
{
rf_CommonCreateSimpleDegradedWriteDAG(raidPtr, asmap, dag_h, bp,
flags, allocList,1, rf_RecoveryXorFunc, RF_TRUE);
}
void rf_CreateDegradedWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList)
RF_Raid_t *raidPtr;
RF_AccessStripeMap_t *asmap;
RF_DagHeader_t *dag_h;
void *bp;
RF_RaidAccessFlags_t flags;
RF_AllocListElem_t *allocList;
{
RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
RF_PhysDiskAddr_t *failedPDA = asmap->failedPDAs[0];
RF_ASSERT( asmap->numDataFailed == 1 );
dag_h->creator = "DegradedWriteDAG";
/* if the access writes only a portion of the failed unit, and also writes
* some portion of at least one surviving unit, we create two DAGs, one for
* the failed component and one for the non-failed component, and do them
* sequentially. Note that the fact that we're accessing only a portion of
* the failed unit indicates that the access either starts or ends in the
* failed unit, and hence we need create only two dags. This is inefficient
* in that the same data or parity can get read and written twice using this
* structure. I need to fix this to do the access all at once.
*/
RF_ASSERT(!(asmap->numStripeUnitsAccessed != 1 && failedPDA->numSector != layoutPtr->sectorsPerStripeUnit));
rf_CreateSimpleDegradedWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList);
}
/******************************************************************************
*
* DAG creation code begins here
*/
/******************************************************************************
*
* CommonCreateSimpleDegradedWriteDAG -- creates a DAG to do a degraded-mode
* write, which is as follows
*
* / {Wnq} --\
* hdr -> blockNode -> Rod -> Xor -> Cmt -> Wnp ----> unblock -> term
* \ {Rod} / \ Wnd ---/
* \ {Wnd} -/
*
* commit nodes: Xor, Wnd
*
* IMPORTANT:
* This DAG generator does not work for double-degraded archs since it does not
* generate Q
*
* This dag is essentially identical to the large-write dag, except that the
* write to the failed data unit is suppressed.
*
* IMPORTANT: this dag does not work in the case where the access writes only
* a portion of the failed unit, and also writes some portion of at least one
* surviving SU. this case is handled in CreateDegradedWriteDAG above.
*
* The block & unblock nodes are leftovers from a previous version. They
* do nothing, but I haven't deleted them because it would be a tremendous
* effort to put them back in.
*
* This dag is used whenever a one of the data units in a write has failed.
* If it is the parity unit that failed, the nonredundant write dag (below)
* is used.
*****************************************************************************/
void rf_CommonCreateSimpleDegradedWriteDAG(raidPtr, asmap, dag_h, bp, flags,
allocList, nfaults, redFunc, allowBufferRecycle)
RF_Raid_t *raidPtr;
RF_AccessStripeMap_t *asmap;
RF_DagHeader_t *dag_h;
void *bp;
RF_RaidAccessFlags_t flags;
RF_AllocListElem_t *allocList;
int nfaults;
int (*redFunc)(RF_DagNode_t *);
int allowBufferRecycle;
{
int nNodes, nRrdNodes, nWndNodes, nXorBufs, i, j, paramNum, rdnodesFaked;
RF_DagNode_t *blockNode, *unblockNode, *wnpNode, *wnqNode, *termNode;
RF_DagNode_t *nodes, *wndNodes, *rrdNodes, *xorNode, *commitNode;
RF_SectorCount_t sectorsPerSU;
RF_ReconUnitNum_t which_ru;
char *xorTargetBuf = NULL; /* the target buffer for the XOR operation */
char *overlappingPDAs; /* a temporary array of flags */
RF_AccessStripeMapHeader_t *new_asm_h[2];
RF_PhysDiskAddr_t *pda, *parityPDA;
RF_StripeNum_t parityStripeID;
RF_PhysDiskAddr_t *failedPDA;
RF_RaidLayout_t *layoutPtr;
layoutPtr = &(raidPtr->Layout);
parityStripeID = rf_RaidAddressToParityStripeID(layoutPtr, asmap->raidAddress,
&which_ru);
sectorsPerSU = layoutPtr->sectorsPerStripeUnit;
/* failedPDA points to the pda within the asm that targets the failed disk */
failedPDA = asmap->failedPDAs[0];
if (rf_dagDebug)
printf("[Creating degraded-write DAG]\n");
RF_ASSERT( asmap->numDataFailed == 1 );
dag_h->creator = "SimpleDegradedWriteDAG";
/*
* Generate two ASMs identifying the surviving data
* we need in order to recover the lost data.
*/
/* overlappingPDAs array must be zero'd */
RF_Calloc(overlappingPDAs, asmap->numStripeUnitsAccessed, sizeof(char), (char *));
rf_GenerateFailedAccessASMs(raidPtr, asmap, failedPDA, dag_h, new_asm_h,
&nXorBufs, NULL, overlappingPDAs, allocList);
/* create all the nodes at once */
nWndNodes = asmap->numStripeUnitsAccessed - 1; /* no access is generated
* for the failed pda */
nRrdNodes = ((new_asm_h[0]) ? new_asm_h[0]->stripeMap->numStripeUnitsAccessed : 0) +
((new_asm_h[1]) ? new_asm_h[1]->stripeMap->numStripeUnitsAccessed : 0);
/*
* XXX
*
* There's a bug with a complete stripe overwrite- that means 0 reads
* of old data, and the rest of the DAG generation code doesn't like
* that. A release is coming, and I don't wanna risk breaking a critical
* DAG generator, so here's what I'm gonna do- if there's no read nodes,
* I'm gonna fake there being a read node, and I'm gonna swap in a
* no-op node in its place (to make all the link-up code happy).
* This should be fixed at some point. --jimz
*/
if (nRrdNodes == 0) {
nRrdNodes = 1;
rdnodesFaked = 1;
}
else {
rdnodesFaked = 0;
}
/* lock, unlock, xor, Wnd, Rrd, W(nfaults) */
nNodes = 5 + nfaults + nWndNodes + nRrdNodes;
RF_CallocAndAdd(nodes, nNodes, sizeof(RF_DagNode_t),
(RF_DagNode_t *), allocList);
i = 0;
blockNode = &nodes[i]; i += 1;
commitNode = &nodes[i]; i += 1;
unblockNode = &nodes[i]; i += 1;
termNode = &nodes[i]; i += 1;
xorNode = &nodes[i]; i += 1;
wnpNode = &nodes[i]; i += 1;
wndNodes = &nodes[i]; i += nWndNodes;
rrdNodes = &nodes[i]; i += nRrdNodes;
if (nfaults == 2) {
wnqNode = &nodes[i]; i += 1;
}
else {
wnqNode = NULL;
}
RF_ASSERT(i == nNodes);
/* this dag can not commit until all rrd and xor Nodes have completed */
dag_h->numCommitNodes = 1;
dag_h->numCommits = 0;
dag_h->numSuccedents = 1;
RF_ASSERT( nRrdNodes > 0 );
rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc,
NULL, nRrdNodes, 0, 0, 0, dag_h, "Nil", allocList);
rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc,
NULL, nWndNodes + nfaults, 1, 0, 0, dag_h, "Cmt", allocList);
rf_InitNode(unblockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc,
NULL, 1, nWndNodes + nfaults, 0, 0, dag_h, "Nil", allocList);
rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc,
NULL, 0, 1, 0, 0, dag_h, "Trm", allocList);
rf_InitNode(xorNode, rf_wait, RF_FALSE, redFunc, rf_NullNodeUndoFunc, NULL, 1,
nRrdNodes, 2*nXorBufs+2, nfaults, dag_h, "Xrc", allocList);
/*
* Fill in the Rrd nodes. If any of the rrd buffers are the same size as
* the failed buffer, save a pointer to it so we can use it as the target
* of the XOR. The pdas in the rrd nodes have been range-restricted, so if
* a buffer is the same size as the failed buffer, it must also be at the
* same alignment within the SU.
*/
i = 0;
if (new_asm_h[0]) {
for (i=0, pda=new_asm_h[0]->stripeMap->physInfo;
i<new_asm_h[0]->stripeMap->numStripeUnitsAccessed;
i++, pda=pda->next)
{
rf_InitNode(&rrdNodes[i], rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc,
rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rrd", allocList);
RF_ASSERT(pda);
rrdNodes[i].params[0].p = pda;
rrdNodes[i].params[1].p = pda->bufPtr;
rrdNodes[i].params[2].v = parityStripeID;
rrdNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
}
}
/* i now equals the number of stripe units accessed in new_asm_h[0] */
if (new_asm_h[1]) {
for (j=0,pda=new_asm_h[1]->stripeMap->physInfo;
j<new_asm_h[1]->stripeMap->numStripeUnitsAccessed;
j++, pda=pda->next)
{
rf_InitNode(&rrdNodes[i+j], rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc,
rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rrd", allocList);
RF_ASSERT(pda);
rrdNodes[i+j].params[0].p = pda;
rrdNodes[i+j].params[1].p = pda->bufPtr;
rrdNodes[i+j].params[2].v = parityStripeID;
rrdNodes[i+j].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
if (allowBufferRecycle && (pda->numSector == failedPDA->numSector))
xorTargetBuf = pda->bufPtr;
}
}
if (rdnodesFaked) {
/*
* This is where we'll init that fake noop read node
* (XXX should the wakeup func be different?)
*/
rf_InitNode(&rrdNodes[0], rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc,
NULL, 1, 1, 0, 0, dag_h, "RrN", allocList);
}
/*
* Make a PDA for the parity unit. The parity PDA should start at
* the same offset into the SU as the failed PDA.
*/
/*
* Danner comment:
* I don't think this copy is really necessary.
* We are in one of two cases here.
* (1) The entire failed unit is written. Then asmap->parityInfo will
* describe the entire parity.
* (2) We are only writing a subset of the failed unit and nothing
* else. Then the asmap->parityInfo describes the failed unit and
* the copy can also be avoided.
*/
RF_MallocAndAdd(parityPDA, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList);
parityPDA->row = asmap->parityInfo->row;
parityPDA->col = asmap->parityInfo->col;
parityPDA->startSector = ((asmap->parityInfo->startSector / sectorsPerSU)
* sectorsPerSU) + (failedPDA->startSector % sectorsPerSU);
parityPDA->numSector = failedPDA->numSector;
if (!xorTargetBuf) {
RF_CallocAndAdd(xorTargetBuf, 1,
rf_RaidAddressToByte(raidPtr, failedPDA->numSector), (char *), allocList);
}
/* init the Wnp node */
rf_InitNode(wnpNode, rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wnp", allocList);
wnpNode->params[0].p = parityPDA;
wnpNode->params[1].p = xorTargetBuf;
wnpNode->params[2].v = parityStripeID;
wnpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
/* fill in the Wnq Node */
if (nfaults == 2) {
{
RF_MallocAndAdd(parityPDA, sizeof(RF_PhysDiskAddr_t),
(RF_PhysDiskAddr_t *), allocList);
parityPDA->row = asmap->qInfo->row;
parityPDA->col = asmap->qInfo->col;
parityPDA->startSector = ((asmap->qInfo->startSector / sectorsPerSU)
* sectorsPerSU) + (failedPDA->startSector % sectorsPerSU);
parityPDA->numSector = failedPDA->numSector;
rf_InitNode(wnqNode, rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wnq", allocList);
wnqNode->params[0].p = parityPDA;
RF_CallocAndAdd(xorNode->results[1], 1,
rf_RaidAddressToByte(raidPtr, failedPDA->numSector), (char *), allocList);
wnqNode->params[1].p = xorNode->results[1];
wnqNode->params[2].v = parityStripeID;
wnqNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
}
}
/* fill in the Wnd nodes */
for (pda=asmap->physInfo, i=0; i<nWndNodes; i++, pda=pda->next) {
if (pda == failedPDA) {
i--;
continue;
}
rf_InitNode(&wndNodes[i], rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wnd", allocList);
RF_ASSERT(pda);
wndNodes[i].params[0].p = pda;
wndNodes[i].params[1].p = pda->bufPtr;
wndNodes[i].params[2].v = parityStripeID;
wndNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
}
/* fill in the results of the xor node */
xorNode->results[0] = xorTargetBuf;
/* fill in the params of the xor node */
paramNum=0;
if (rdnodesFaked == 0) {
for (i=0; i<nRrdNodes; i++) {
/* all the Rrd nodes need to be xored together */
xorNode->params[paramNum++] = rrdNodes[i].params[0];
xorNode->params[paramNum++] = rrdNodes[i].params[1];
}
}
for (i=0; i < nWndNodes; i++) {
/* any Wnd nodes that overlap the failed access need to be xored in */
if (overlappingPDAs[i]) {
RF_MallocAndAdd(pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList);
bcopy((char *)wndNodes[i].params[0].p, (char *)pda, sizeof(RF_PhysDiskAddr_t));
rf_RangeRestrictPDA(raidPtr, failedPDA, pda, RF_RESTRICT_DOBUFFER, 0);
xorNode->params[paramNum++].p = pda;
xorNode->params[paramNum++].p = pda->bufPtr;
}
}
RF_Free(overlappingPDAs, asmap->numStripeUnitsAccessed * sizeof(char));
/*
* Install the failed PDA into the xor param list so that the
* new data gets xor'd in.
*/
xorNode->params[paramNum++].p = failedPDA;
xorNode->params[paramNum++].p = failedPDA->bufPtr;
/*
* The last 2 params to the recovery xor node are always the failed
* PDA and the raidPtr. install the failedPDA even though we have just
* done so above. This allows us to use the same XOR function for both
* degraded reads and degraded writes.
*/
xorNode->params[paramNum++].p = failedPDA;
xorNode->params[paramNum++].p = raidPtr;
RF_ASSERT( paramNum == 2*nXorBufs+2 );
/*
* Code to link nodes begins here
*/
/* link header to block node */
RF_ASSERT(blockNode->numAntecedents == 0);
dag_h->succedents[0] = blockNode;
/* link block node to rd nodes */
RF_ASSERT(blockNode->numSuccedents == nRrdNodes);
for (i = 0; i < nRrdNodes; i++) {
RF_ASSERT(rrdNodes[i].numAntecedents == 1);
blockNode->succedents[i] = &rrdNodes[i];
rrdNodes[i].antecedents[0] = blockNode;
rrdNodes[i].antType[0] = rf_control;
}
/* link read nodes to xor node*/
RF_ASSERT(xorNode->numAntecedents == nRrdNodes);
for (i = 0; i < nRrdNodes; i++) {
RF_ASSERT(rrdNodes[i].numSuccedents == 1);
rrdNodes[i].succedents[0] = xorNode;
xorNode->antecedents[i] = &rrdNodes[i];
xorNode->antType[i] = rf_trueData;
}
/* link xor node to commit node */
RF_ASSERT(xorNode->numSuccedents == 1);
RF_ASSERT(commitNode->numAntecedents == 1);
xorNode->succedents[0] = commitNode;
commitNode->antecedents[0] = xorNode;
commitNode->antType[0] = rf_control;
/* link commit node to wnd nodes */
RF_ASSERT(commitNode->numSuccedents == nfaults + nWndNodes);
for (i = 0; i < nWndNodes; i++) {
RF_ASSERT(wndNodes[i].numAntecedents == 1);
commitNode->succedents[i] = &wndNodes[i];
wndNodes[i].antecedents[0] = commitNode;
wndNodes[i].antType[0] = rf_control;
}
/* link the commit node to wnp, wnq nodes */
RF_ASSERT(wnpNode->numAntecedents == 1);
commitNode->succedents[nWndNodes] = wnpNode;
wnpNode->antecedents[0] = commitNode;
wnpNode->antType[0] = rf_control;
if (nfaults == 2) {
RF_ASSERT(wnqNode->numAntecedents == 1);
commitNode->succedents[nWndNodes + 1] = wnqNode;
wnqNode->antecedents[0] = commitNode;
wnqNode->antType[0] = rf_control;
}
/* link write new data nodes to unblock node */
RF_ASSERT(unblockNode->numAntecedents == (nWndNodes + nfaults));
for(i = 0; i < nWndNodes; i++) {
RF_ASSERT(wndNodes[i].numSuccedents == 1);
wndNodes[i].succedents[0] = unblockNode;
unblockNode->antecedents[i] = &wndNodes[i];
unblockNode->antType[i] = rf_control;
}
/* link write new parity node to unblock node */
RF_ASSERT(wnpNode->numSuccedents == 1);
wnpNode->succedents[0] = unblockNode;
unblockNode->antecedents[nWndNodes] = wnpNode;
unblockNode->antType[nWndNodes] = rf_control;
/* link write new q node to unblock node */
if (nfaults == 2) {
RF_ASSERT(wnqNode->numSuccedents == 1);
wnqNode->succedents[0] = unblockNode;
unblockNode->antecedents[nWndNodes+1] = wnqNode;
unblockNode->antType[nWndNodes+1] = rf_control;
}
/* link unblock node to term node */
RF_ASSERT(unblockNode->numSuccedents == 1);
RF_ASSERT(termNode->numAntecedents == 1);
RF_ASSERT(termNode->numSuccedents == 0);
unblockNode->succedents[0] = termNode;
termNode->antecedents[0] = unblockNode;
termNode->antType[0] = rf_control;
}
#define CONS_PDA(if,start,num) \
pda_p->row = asmap->if->row; pda_p->col = asmap->if->col; \
pda_p->startSector = ((asmap->if->startSector / secPerSU) * secPerSU) + start; \
pda_p->numSector = num; \
pda_p->next = NULL; \
RF_MallocAndAdd(pda_p->bufPtr,rf_RaidAddressToByte(raidPtr,num),(char *), allocList)
void rf_WriteGenerateFailedAccessASMs(
RF_Raid_t *raidPtr,
RF_AccessStripeMap_t *asmap,
RF_PhysDiskAddr_t **pdap,
int *nNodep,
RF_PhysDiskAddr_t **pqpdap,
int *nPQNodep,
RF_AllocListElem_t *allocList)
{
RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
int PDAPerDisk,i;
RF_SectorCount_t secPerSU = layoutPtr->sectorsPerStripeUnit;
int numDataCol = layoutPtr->numDataCol;
int state;
unsigned napdas;
RF_SectorNum_t fone_start, fone_end, ftwo_start = 0, ftwo_end;
RF_PhysDiskAddr_t *fone = asmap->failedPDAs[0], *ftwo = asmap->failedPDAs[1];
RF_PhysDiskAddr_t *pda_p;
RF_RaidAddr_t sosAddr;
/* determine how many pda's we will have to generate per unaccess stripe.
If there is only one failed data unit, it is one; if two, possibly two,
depending wether they overlap. */
fone_start = rf_StripeUnitOffset(layoutPtr,fone->startSector);
fone_end = fone_start + fone->numSector;
if (asmap->numDataFailed==1)
{
PDAPerDisk = 1;
state = 1;
RF_MallocAndAdd(*pqpdap,2*sizeof(RF_PhysDiskAddr_t),(RF_PhysDiskAddr_t *), allocList);
pda_p = *pqpdap;
/* build p */
CONS_PDA(parityInfo,fone_start,fone->numSector);
pda_p->type = RF_PDA_TYPE_PARITY;
pda_p++;
/* build q */
CONS_PDA(qInfo,fone_start,fone->numSector);
pda_p->type = RF_PDA_TYPE_Q;
}
else
{
ftwo_start = rf_StripeUnitOffset(layoutPtr,ftwo->startSector);
ftwo_end = ftwo_start + ftwo->numSector;
if (fone->numSector + ftwo->numSector > secPerSU)
{
PDAPerDisk = 1;
state = 2;
RF_MallocAndAdd(*pqpdap,2*sizeof(RF_PhysDiskAddr_t),(RF_PhysDiskAddr_t *), allocList);
pda_p = *pqpdap;
CONS_PDA(parityInfo,0,secPerSU);
pda_p->type = RF_PDA_TYPE_PARITY;
pda_p++;
CONS_PDA(qInfo,0,secPerSU);
pda_p->type = RF_PDA_TYPE_Q;
}
else
{
PDAPerDisk = 2;
state = 3;
/* four of them, fone, then ftwo */
RF_MallocAndAdd(*pqpdap,4*sizeof(RF_PhysDiskAddr_t),(RF_PhysDiskAddr_t *), allocList);
pda_p = *pqpdap;
CONS_PDA(parityInfo,fone_start,fone->numSector);
pda_p->type = RF_PDA_TYPE_PARITY;
pda_p++;
CONS_PDA(qInfo,fone_start,fone->numSector);
pda_p->type = RF_PDA_TYPE_Q;
pda_p++;
CONS_PDA(parityInfo,ftwo_start,ftwo->numSector);
pda_p->type = RF_PDA_TYPE_PARITY;
pda_p++;
CONS_PDA(qInfo,ftwo_start,ftwo->numSector);
pda_p->type = RF_PDA_TYPE_Q;
}
}
/* figure out number of nonaccessed pda */
napdas = PDAPerDisk * (numDataCol - 2);
*nPQNodep = PDAPerDisk;
*nNodep = napdas;
if (napdas == 0) return; /* short circuit */
/* allocate up our list of pda's */
RF_CallocAndAdd(pda_p, napdas, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList);
*pdap = pda_p;
/* linkem together */
for (i=0; i < (napdas-1); i++)
pda_p[i].next = pda_p+(i+1);
sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress);
for (i=0; i < numDataCol; i++)
{
if ((pda_p - (*pdap)) == napdas)
continue;
pda_p->type = RF_PDA_TYPE_DATA;
pda_p->raidAddress = sosAddr + (i * secPerSU);
(raidPtr->Layout.map->MapSector)(raidPtr,pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0);
/* skip over dead disks */
if (RF_DEAD_DISK(raidPtr->Disks[pda_p->row][pda_p->col].status))
continue;
switch (state)
{
case 1: /* fone */
pda_p->numSector = fone->numSector;
pda_p->raidAddress += fone_start;
pda_p->startSector += fone_start;
RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr,pda_p->numSector), (char *), allocList);
break;
case 2: /* full stripe */
pda_p->numSector = secPerSU;
RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr,secPerSU), (char *), allocList);
break;
case 3: /* two slabs */
pda_p->numSector = fone->numSector;
pda_p->raidAddress += fone_start;
pda_p->startSector += fone_start;
RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr,pda_p->numSector), (char *), allocList);
pda_p++;
pda_p->type = RF_PDA_TYPE_DATA;
pda_p->raidAddress = sosAddr + (i * secPerSU);
(raidPtr->Layout.map->MapSector)(raidPtr,pda_p->raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0);
pda_p->numSector = ftwo->numSector;
pda_p->raidAddress += ftwo_start;
pda_p->startSector += ftwo_start;
RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr,pda_p->numSector), (char *), allocList);
break;
default:
RF_PANIC();
}
pda_p++;
}
RF_ASSERT (pda_p - *pdap == napdas);
return;
}
#define DISK_NODE_PDA(node) ((node)->params[0].p)
#define DISK_NODE_PARAMS(_node_,_p_) \
(_node_).params[0].p = _p_ ; \
(_node_).params[1].p = (_p_)->bufPtr; \
(_node_).params[2].v = parityStripeID; \
(_node_).params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru)
void rf_DoubleDegSmallWrite(
RF_Raid_t *raidPtr,
RF_AccessStripeMap_t *asmap,
RF_DagHeader_t *dag_h,
void *bp,
RF_RaidAccessFlags_t flags,
RF_AllocListElem_t *allocList,
char *redundantReadNodeName,
char *redundantWriteNodeName,
char *recoveryNodeName,
int (*recovFunc)(RF_DagNode_t *))
{
RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
RF_DagNode_t *nodes, *wudNodes, *rrdNodes, *recoveryNode, *blockNode, *unblockNode, *rpNodes,*rqNodes, *wpNodes, *wqNodes, *termNode;
RF_PhysDiskAddr_t *pda, *pqPDAs;
RF_PhysDiskAddr_t *npdas;
int nWriteNodes, nNodes, nReadNodes, nRrdNodes, nWudNodes, i;
RF_ReconUnitNum_t which_ru;
int nPQNodes;
RF_StripeNum_t parityStripeID = rf_RaidAddressToParityStripeID(layoutPtr, asmap->raidAddress, &which_ru);
/* simple small write case -
First part looks like a reconstruct-read of the failed data units.
Then a write of all data units not failed. */
/*
Hdr
|
------Block-
/ / \
Rrd Rrd ... Rrd Rp Rq
\ \ /
-------PQ-----
/ \ \
Wud Wp WQ
\ | /
--Unblock-
|
T
Rrd = read recovery data (potentially none)
Wud = write user data (not incl. failed disks)
Wp = Write P (could be two)
Wq = Write Q (could be two)
*/
rf_WriteGenerateFailedAccessASMs(raidPtr, asmap, &npdas, &nRrdNodes, &pqPDAs, &nPQNodes,allocList);
RF_ASSERT(asmap->numDataFailed == 1);
nWudNodes = asmap->numStripeUnitsAccessed - (asmap->numDataFailed);
nReadNodes = nRrdNodes + 2*nPQNodes;
nWriteNodes = nWudNodes+ 2*nPQNodes;
nNodes = 4 + nReadNodes + nWriteNodes;
RF_CallocAndAdd(nodes, nNodes, sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList);
blockNode = nodes;
unblockNode = blockNode+1;
termNode = unblockNode+1;
recoveryNode = termNode+1;
rrdNodes = recoveryNode+1;
rpNodes = rrdNodes + nRrdNodes;
rqNodes = rpNodes + nPQNodes;
wudNodes = rqNodes + nPQNodes;
wpNodes = wudNodes + nWudNodes;
wqNodes = wpNodes + nPQNodes;
dag_h->creator = "PQ_DDSimpleSmallWrite";
dag_h->numSuccedents = 1;
dag_h->succedents[0] = blockNode;
rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, dag_h, "Trm", allocList);
termNode->antecedents[0] = unblockNode;
termNode->antType[0] = rf_control;
/* init the block and unblock nodes */
/* The block node has all the read nodes as successors */
rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, nReadNodes, 0, 0, 0, dag_h, "Nil", allocList);
for (i=0; i < nReadNodes; i++)
blockNode->succedents[i] = rrdNodes+i;
/* The unblock node has all the writes as successors */
rf_InitNode(unblockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, 1, nWriteNodes, 0, 0, dag_h, "Nil", allocList);
for (i=0; i < nWriteNodes; i++) {
unblockNode->antecedents[i] = wudNodes+i;
unblockNode->antType[i] = rf_control;
}
unblockNode->succedents[0] = termNode;
#define INIT_READ_NODE(node,name) \
rf_InitNode(node, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, name, allocList); \
(node)->succedents[0] = recoveryNode; \
(node)->antecedents[0] = blockNode; \
(node)->antType[0] = rf_control;
/* build the read nodes */
pda = npdas;
for (i=0; i < nRrdNodes; i++, pda = pda->next) {
INIT_READ_NODE(rrdNodes+i,"rrd");
DISK_NODE_PARAMS(rrdNodes[i],pda);
}
/* read redundancy pdas */
pda = pqPDAs;
INIT_READ_NODE(rpNodes,"Rp");
RF_ASSERT(pda);
DISK_NODE_PARAMS(rpNodes[0],pda);
pda++;
INIT_READ_NODE(rqNodes, redundantReadNodeName );
RF_ASSERT(pda);
DISK_NODE_PARAMS(rqNodes[0],pda);
if (nPQNodes==2)
{
pda++;
INIT_READ_NODE(rpNodes+1,"Rp");
RF_ASSERT(pda);
DISK_NODE_PARAMS(rpNodes[1],pda);
pda++;
INIT_READ_NODE(rqNodes+1,redundantReadNodeName );
RF_ASSERT(pda);
DISK_NODE_PARAMS(rqNodes[1],pda);
}
/* the recovery node has all reads as precedessors and all writes as successors.
It generates a result for every write P or write Q node.
As parameters, it takes a pda per read and a pda per stripe of user data written.
It also takes as the last params the raidPtr and asm.
For results, it takes PDA for P & Q. */
rf_InitNode(recoveryNode, rf_wait, RF_FALSE, recovFunc, rf_NullNodeUndoFunc, NULL,
nWriteNodes, /* succesors */
nReadNodes, /* preds */
nReadNodes + nWudNodes + 3, /* params */
2 * nPQNodes, /* results */
dag_h, recoveryNodeName, allocList);
for (i=0; i < nReadNodes; i++ )
{
recoveryNode->antecedents[i] = rrdNodes+i;
recoveryNode->antType[i] = rf_control;
recoveryNode->params[i].p = DISK_NODE_PDA(rrdNodes+i);
}
for (i=0; i < nWudNodes; i++)
{
recoveryNode->succedents[i] = wudNodes+i;
}
recoveryNode->params[nReadNodes+nWudNodes].p = asmap->failedPDAs[0];
recoveryNode->params[nReadNodes+nWudNodes+1].p = raidPtr;
recoveryNode->params[nReadNodes+nWudNodes+2].p = asmap;
for ( ; i < nWriteNodes; i++)
recoveryNode->succedents[i] = wudNodes+i;
pda = pqPDAs;
recoveryNode->results[0] = pda;
pda++;
recoveryNode->results[1] = pda;
if ( nPQNodes == 2)
{
pda++;
recoveryNode->results[2] = pda;
pda++;
recoveryNode->results[3] = pda;
}
/* fill writes */
#define INIT_WRITE_NODE(node,name) \
rf_InitNode(node, rf_wait, RF_FALSE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, name, allocList); \
(node)->succedents[0] = unblockNode; \
(node)->antecedents[0] = recoveryNode; \
(node)->antType[0] = rf_control;
pda = asmap->physInfo;
for (i=0; i < nWudNodes; i++)
{
INIT_WRITE_NODE(wudNodes+i,"Wd");
DISK_NODE_PARAMS(wudNodes[i],pda);
recoveryNode->params[nReadNodes+i].p = DISK_NODE_PDA(wudNodes+i);
pda = pda->next;
}
/* write redundancy pdas */
pda = pqPDAs;
INIT_WRITE_NODE(wpNodes,"Wp");
RF_ASSERT(pda);
DISK_NODE_PARAMS(wpNodes[0],pda);
pda++;
INIT_WRITE_NODE(wqNodes,"Wq");
RF_ASSERT(pda);
DISK_NODE_PARAMS(wqNodes[0],pda);
if (nPQNodes==2)
{
pda++;
INIT_WRITE_NODE(wpNodes+1,"Wp");
RF_ASSERT(pda);
DISK_NODE_PARAMS(wpNodes[1],pda);
pda++;
INIT_WRITE_NODE(wqNodes+1,"Wq");
RF_ASSERT(pda);
DISK_NODE_PARAMS(wqNodes[1],pda);
}
}

View File

@ -0,0 +1,80 @@
/* $NetBSD: rf_dagdegwr.h,v 1.1 1998/11/13 04:20:27 oster Exp $ */
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
*
* Author: Mark Holland, Daniel Stodolsky, William V. Courtright II
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/*
* :
* Log: rf_dagdegwr.h,v
* Revision 1.6 1996/07/31 16:30:28 jimz
* asm/asmap fix (EO merge)
*
* Revision 1.5 1996/07/31 15:35:38 jimz
* evenodd changes; bugfixes for double-degraded archs, generalize
* some formerly PQ-only functions
*
* Revision 1.4 1996/07/22 19:52:16 jimz
* switched node params to RF_DagParam_t, a union of
* a 64-bit int and a void *, for better portability
* attempted hpux port, but failed partway through for
* lack of a single C compiler capable of compiling all
* source files
*
* Revision 1.3 1996/05/24 22:17:04 jimz
* continue code + namespace cleanup
* typed a bunch of flags
*
* Revision 1.2 1996/05/23 21:46:35 jimz
* checkpoint in code cleanup (release prep)
* lots of types, function names have been fixed
*
* Revision 1.1 1996/05/03 19:21:28 wvcii
* Initial revision
*
*/
#ifndef _RF__RF_DAGDEGWR_H_
#define _RF__RF_DAGDEGWR_H_
/* degraded write DAG creation routines */
void rf_CreateDegradedWriteDAG(RF_Raid_t *raidPtr,
RF_AccessStripeMap_t *asmap, RF_DagHeader_t *dag_h, void *bp,
RF_RaidAccessFlags_t flags, RF_AllocListElem_t *allocList);
void rf_CommonCreateSimpleDegradedWriteDAG(RF_Raid_t *raidPtr,
RF_AccessStripeMap_t *asmap, RF_DagHeader_t *dag_h, void *bp,
RF_RaidAccessFlags_t flags, RF_AllocListElem_t *allocList,
int nfaults, int (*redFunc)(RF_DagNode_t *), int allowBufferRecycle);
void rf_WriteGenerateFailedAccessASMs(RF_Raid_t *raidPtr,
RF_AccessStripeMap_t *asmap, RF_PhysDiskAddr_t **pdap,
int *nNodep, RF_PhysDiskAddr_t **pqpdap,
int *nPQNodep, RF_AllocListElem_t *allocList);
void rf_DoubleDegSmallWrite(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
RF_DagHeader_t *dag_h, void *bp, RF_RaidAccessFlags_t flags,
RF_AllocListElem_t *allocList, char *redundantReadNodeName,
char *redundantWriteNodeName, char *recoveryNodeName,
int (*recovFunc)(RF_DagNode_t *));
#endif /* !_RF__RF_DAGDEGWR_H_ */

View File

@ -0,0 +1,499 @@
/* $NetBSD: rf_dagffrd.c,v 1.1 1998/11/13 04:20:27 oster Exp $ */
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
*
* Author: Mark Holland, Daniel Stodolsky, William V. Courtright II
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/*
* rf_dagffrd.c
*
* code for creating fault-free read DAGs
*
* :
* Log: rf_dagffrd.c,v
* Revision 1.14 1996/07/28 20:31:39 jimz
* i386netbsd port
* true/false fixup
*
* Revision 1.13 1996/07/22 19:52:16 jimz
* switched node params to RF_DagParam_t, a union of
* a 64-bit int and a void *, for better portability
* attempted hpux port, but failed partway through for
* lack of a single C compiler capable of compiling all
* source files
*
* Revision 1.12 1996/06/09 02:36:46 jimz
* lots of little crufty cleanup- fixup whitespace
* issues, comment #ifdefs, improve typing in some
* places (esp size-related)
*
* Revision 1.11 1996/06/06 17:30:44 jimz
* turn old Raid1 mirror read creation into a more generic function
* parameterized by an addtional parameter: type of mirrored read
* this is now used by other dag creation routines so chained declustering
* and raid1 can share dag creation code, but have different mirroring
* policies
*
* Revision 1.10 1996/05/31 22:26:54 jimz
* fix a lot of mapping problems, memory allocation problems
* found some weird lock issues, fixed 'em
* more code cleanup
*
* Revision 1.9 1996/05/30 11:29:41 jimz
* Numerous bug fixes. Stripe lock release code disagreed with the taking code
* about when stripes should be locked (I made it consistent: no parity, no lock)
* There was a lot of extra serialization of I/Os which I've removed- a lot of
* it was to calculate values for the cache code, which is no longer with us.
* More types, function, macro cleanup. Added code to properly quiesce the array
* on shutdown. Made a lot of stuff array-specific which was (bogusly) general
* before. Fixed memory allocation, freeing bugs.
*
* Revision 1.8 1996/05/27 18:56:37 jimz
* more code cleanup
* better typing
* compiles in all 3 environments
*
* Revision 1.7 1996/05/24 22:17:04 jimz
* continue code + namespace cleanup
* typed a bunch of flags
*
* Revision 1.6 1996/05/24 04:28:55 jimz
* release cleanup ckpt
*
* Revision 1.5 1996/05/23 21:46:35 jimz
* checkpoint in code cleanup (release prep)
* lots of types, function names have been fixed
*
* Revision 1.4 1996/05/23 00:33:23 jimz
* code cleanup: move all debug decls to rf_options.c, all extern
* debug decls to rf_options.h, all debug vars preceded by rf_
*
* Revision 1.3 1996/05/18 19:51:34 jimz
* major code cleanup- fix syntax, make some types consistent,
* add prototypes, clean out dead code, et cetera
*
* Revision 1.2 1996/05/08 21:01:24 jimz
* fixed up enum type names that were conflicting with other
* enums and function names (ie, "panic")
* future naming trends will be towards RF_ and rf_ for
* everything raidframe-related
*
* Revision 1.1 1996/05/03 19:19:20 wvcii
* Initial revision
*
*/
#include "rf_types.h"
#include "rf_raid.h"
#include "rf_dag.h"
#include "rf_dagutils.h"
#include "rf_dagfuncs.h"
#include "rf_threadid.h"
#include "rf_debugMem.h"
#include "rf_memchunk.h"
#include "rf_general.h"
#include "rf_dagffrd.h"
/******************************************************************************
*
* General comments on DAG creation:
*
* All DAGs in this file use roll-away error recovery. Each DAG has a single
* commit node, usually called "Cmt." If an error occurs before the Cmt node
* is reached, the execution engine will halt forward execution and work
* backward through the graph, executing the undo functions. Assuming that
* each node in the graph prior to the Cmt node are undoable and atomic - or -
* does not make changes to permanent state, the graph will fail atomically.
* If an error occurs after the Cmt node executes, the engine will roll-forward
* through the graph, blindly executing nodes until it reaches the end.
* If a graph reaches the end, it is assumed to have completed successfully.
*
* A graph has only 1 Cmt node.
*
*/
/******************************************************************************
*
* The following wrappers map the standard DAG creation interface to the
* DAG creation routines. Additionally, these wrappers enable experimentation
* with new DAG structures by providing an extra level of indirection, allowing
* the DAG creation routines to be replaced at this single point.
*/
void rf_CreateFaultFreeReadDAG(
RF_Raid_t *raidPtr,
RF_AccessStripeMap_t *asmap,
RF_DagHeader_t *dag_h,
void *bp,
RF_RaidAccessFlags_t flags,
RF_AllocListElem_t *allocList)
{
rf_CreateNonredundantDAG(raidPtr, asmap, dag_h, bp, flags, allocList,
RF_IO_TYPE_READ);
}
/******************************************************************************
*
* DAG creation code begins here
*/
/******************************************************************************
*
* creates a DAG to perform a nonredundant read or write of data within one
* stripe.
* For reads, this DAG is as follows:
*
* /---- read ----\
* Header -- Block ---- read ---- Commit -- Terminate
* \---- read ----/
*
* For writes, this DAG is as follows:
*
* /---- write ----\
* Header -- Commit ---- write ---- Block -- Terminate
* \---- write ----/
*
* There is one disk node per stripe unit accessed, and all disk nodes are in
* parallel.
*
* Tricky point here: The first disk node (read or write) is created
* normally. Subsequent disk nodes are created by copying the first one,
* and modifying a few params. The "succedents" and "antecedents" fields are
* _not_ re-created in each node, but rather left pointing to the same array
* that was malloc'd when the first node was created. Thus, it's essential
* that when this DAG is freed, the succedents and antecedents fields be freed
* in ONLY ONE of the read nodes. This does not apply to the "params" field
* because it is recreated for each READ node.
*
* Note that normal-priority accesses do not need to be tagged with their
* parity stripe ID, because they will never be promoted. Hence, I've
* commented-out the code to do this, and marked it with UNNEEDED.
*
*****************************************************************************/
void rf_CreateNonredundantDAG(
RF_Raid_t *raidPtr,
RF_AccessStripeMap_t *asmap,
RF_DagHeader_t *dag_h,
void *bp,
RF_RaidAccessFlags_t flags,
RF_AllocListElem_t *allocList,
RF_IoType_t type)
{
RF_DagNode_t *nodes, *diskNodes, *blockNode, *commitNode, *termNode;
RF_PhysDiskAddr_t *pda = asmap->physInfo;
int (*doFunc)(RF_DagNode_t *), (*undoFunc)(RF_DagNode_t *);
int i, n, totalNumNodes;
char *name;
n = asmap->numStripeUnitsAccessed;
dag_h->creator = "NonredundantDAG";
RF_ASSERT(RF_IO_IS_R_OR_W(type));
switch (type) {
case RF_IO_TYPE_READ:
doFunc = rf_DiskReadFunc;
undoFunc = rf_DiskReadUndoFunc;
name = "R ";
if (rf_dagDebug) printf("[Creating non-redundant read DAG]\n");
break;
case RF_IO_TYPE_WRITE:
doFunc = rf_DiskWriteFunc;
undoFunc = rf_DiskWriteUndoFunc;
name = "W ";
if (rf_dagDebug) printf("[Creating non-redundant write DAG]\n");
break;
default:
RF_PANIC();
}
/*
* For reads, the dag can not commit until the block node is reached.
* for writes, the dag commits immediately.
*/
dag_h->numCommitNodes = 1;
dag_h->numCommits = 0;
dag_h->numSuccedents = 1;
/*
* Node count:
* 1 block node
* n data reads (or writes)
* 1 commit node
* 1 terminator node
*/
RF_ASSERT(n > 0);
totalNumNodes = n + 3;
RF_CallocAndAdd(nodes, totalNumNodes, sizeof(RF_DagNode_t),
(RF_DagNode_t *), allocList);
i = 0;
diskNodes = &nodes[i]; i += n;
blockNode = &nodes[i]; i += 1;
commitNode = &nodes[i]; i += 1;
termNode = &nodes[i]; i += 1;
RF_ASSERT(i == totalNumNodes);
/* initialize nodes */
switch (type) {
case RF_IO_TYPE_READ:
rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc,
NULL, n, 0, 0, 0, dag_h, "Nil", allocList);
rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc,
NULL, 1, n, 0, 0, dag_h, "Cmt", allocList);
rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc,
NULL, 0, 1, 0, 0, dag_h, "Trm", allocList);
break;
case RF_IO_TYPE_WRITE:
rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc,
NULL, 1, 0, 0, 0, dag_h, "Nil", allocList);
rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc,
NULL, n, 1, 0, 0, dag_h, "Cmt", allocList);
rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc,
NULL, 0, n, 0, 0, dag_h, "Trm", allocList);
break;
default:
RF_PANIC();
}
for (i = 0; i < n; i++) {
RF_ASSERT(pda != NULL);
rf_InitNode(&diskNodes[i], rf_wait, RF_FALSE, doFunc, undoFunc, rf_GenericWakeupFunc,
1, 1, 4, 0, dag_h, name, allocList);
diskNodes[i].params[0].p = pda;
diskNodes[i].params[1].p = pda->bufPtr;
/* parity stripe id is not necessary */
diskNodes[i].params[2].v = 0;
diskNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0);
pda = pda->next;
}
/*
* Connect nodes.
*/
/* connect hdr to block node */
RF_ASSERT(blockNode->numAntecedents == 0);
dag_h->succedents[0] = blockNode;
if (type == RF_IO_TYPE_READ) {
/* connecting a nonredundant read DAG */
RF_ASSERT(blockNode->numSuccedents == n);
RF_ASSERT(commitNode->numAntecedents == n);
for (i=0; i < n; i++) {
/* connect block node to each read node */
RF_ASSERT(diskNodes[i].numAntecedents == 1);
blockNode->succedents[i] = &diskNodes[i];
diskNodes[i].antecedents[0] = blockNode;
diskNodes[i].antType[0] = rf_control;
/* connect each read node to the commit node */
RF_ASSERT(diskNodes[i].numSuccedents == 1);
diskNodes[i].succedents[0] = commitNode;
commitNode->antecedents[i] = &diskNodes[i];
commitNode->antType[i] = rf_control;
}
/* connect the commit node to the term node */
RF_ASSERT(commitNode->numSuccedents == 1);
RF_ASSERT(termNode->numAntecedents == 1);
RF_ASSERT(termNode->numSuccedents == 0);
commitNode->succedents[0] = termNode;
termNode->antecedents[0] = commitNode;
termNode->antType[0] = rf_control;
}
else {
/* connecting a nonredundant write DAG */
/* connect the block node to the commit node */
RF_ASSERT(blockNode->numSuccedents == 1);
RF_ASSERT(commitNode->numAntecedents == 1);
blockNode->succedents[0] = commitNode;
commitNode->antecedents[0] = blockNode;
commitNode->antType[0] = rf_control;
RF_ASSERT(commitNode->numSuccedents == n);
RF_ASSERT(termNode->numAntecedents == n);
RF_ASSERT(termNode->numSuccedents == 0);
for (i=0; i < n; i++) {
/* connect the commit node to each write node */
RF_ASSERT(diskNodes[i].numAntecedents == 1);
commitNode->succedents[i] = &diskNodes[i];
diskNodes[i].antecedents[0] = commitNode;
diskNodes[i].antType[0] = rf_control;
/* connect each write node to the term node */
RF_ASSERT(diskNodes[i].numSuccedents == 1);
diskNodes[i].succedents[0] = termNode;
termNode->antecedents[i] = &diskNodes[i];
termNode->antType[i] = rf_control;
}
}
}
/******************************************************************************
* Create a fault-free read DAG for RAID level 1
*
* Hdr -> Nil -> Rmir -> Cmt -> Trm
*
* The "Rmir" node schedules a read from the disk in the mirror pair with the
* shortest disk queue. the proper queue is selected at Rmir execution. this
* deferred mapping is unlike other archs in RAIDframe which generally fix
* mapping at DAG creation time.
*
* Parameters: raidPtr - description of the physical array
* asmap - logical & physical addresses for this access
* bp - buffer ptr (for holding read data)
* flags - general flags (e.g. disk locking)
* allocList - list of memory allocated in DAG creation
*****************************************************************************/
static void CreateMirrorReadDAG(
RF_Raid_t *raidPtr,
RF_AccessStripeMap_t *asmap,
RF_DagHeader_t *dag_h,
void *bp,
RF_RaidAccessFlags_t flags,
RF_AllocListElem_t *allocList,
int (*readfunc)(RF_DagNode_t *node))
{
RF_DagNode_t *readNodes, *nodes, *blockNode, *commitNode, *termNode;
RF_PhysDiskAddr_t *data_pda = asmap->physInfo;
RF_PhysDiskAddr_t *parity_pda = asmap->parityInfo;
int i, n, totalNumNodes;
n = asmap->numStripeUnitsAccessed;
dag_h->creator = "RaidOneReadDAG";
if (rf_dagDebug) {
printf("[Creating RAID level 1 read DAG]\n");
}
/*
* This dag can not commit until the commit node is reached
* errors prior to the commit point imply the dag has failed.
*/
dag_h->numCommitNodes = 1;
dag_h->numCommits = 0;
dag_h->numSuccedents = 1;
/*
* Node count:
* n data reads
* 1 block node
* 1 commit node
* 1 terminator node
*/
RF_ASSERT(n > 0);
totalNumNodes = n + 3;
RF_CallocAndAdd(nodes, totalNumNodes, sizeof(RF_DagNode_t),
(RF_DagNode_t *), allocList);
i = 0;
readNodes = &nodes[i]; i += n;
blockNode = &nodes[i]; i += 1;
commitNode = &nodes[i]; i += 1;
termNode = &nodes[i]; i += 1;
RF_ASSERT(i == totalNumNodes);
/* initialize nodes */
rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
rf_NullNodeUndoFunc, NULL, n, 0, 0, 0, dag_h, "Nil", allocList);
rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc,
rf_NullNodeUndoFunc, NULL, 1, n, 0, 0, dag_h, "Cmt", allocList);
rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc,
rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, dag_h, "Trm", allocList);
for (i = 0; i < n; i++) {
RF_ASSERT(data_pda != NULL);
RF_ASSERT(parity_pda != NULL);
rf_InitNode(&readNodes[i], rf_wait, RF_FALSE, readfunc,
rf_DiskReadMirrorUndoFunc, rf_GenericWakeupFunc, 1, 1, 5, 0, dag_h,
"Rmir", allocList);
readNodes[i].params[0].p = data_pda;
readNodes[i].params[1].p = data_pda->bufPtr;
/* parity stripe id is not necessary */
readNodes[i].params[2].p = 0;
readNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0);
readNodes[i].params[4].p = parity_pda;
data_pda = data_pda->next;
parity_pda = parity_pda->next;
}
/*
* Connect nodes
*/
/* connect hdr to block node */
RF_ASSERT(blockNode->numAntecedents == 0);
dag_h->succedents[0] = blockNode;
/* connect block node to read nodes */
RF_ASSERT(blockNode->numSuccedents == n);
for (i=0; i < n; i++) {
RF_ASSERT(readNodes[i].numAntecedents == 1);
blockNode->succedents[i] = &readNodes[i];
readNodes[i].antecedents[0] = blockNode;
readNodes[i].antType[0] = rf_control;
}
/* connect read nodes to commit node */
RF_ASSERT(commitNode->numAntecedents == n);
for (i=0; i < n; i++) {
RF_ASSERT(readNodes[i].numSuccedents == 1);
readNodes[i].succedents[0] = commitNode;
commitNode->antecedents[i] = &readNodes[i];
commitNode->antType[i] = rf_control;
}
/* connect commit node to term node */
RF_ASSERT(commitNode->numSuccedents == 1);
RF_ASSERT(termNode->numAntecedents == 1);
RF_ASSERT(termNode->numSuccedents == 0);
commitNode->succedents[0] = termNode;
termNode->antecedents[0] = commitNode;
termNode->antType[0] = rf_control;
}
void rf_CreateMirrorIdleReadDAG(
RF_Raid_t *raidPtr,
RF_AccessStripeMap_t *asmap,
RF_DagHeader_t *dag_h,
void *bp,
RF_RaidAccessFlags_t flags,
RF_AllocListElem_t *allocList)
{
CreateMirrorReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList,
rf_DiskReadMirrorIdleFunc);
}
void rf_CreateMirrorPartitionReadDAG(
RF_Raid_t *raidPtr,
RF_AccessStripeMap_t *asmap,
RF_DagHeader_t *dag_h,
void *bp,
RF_RaidAccessFlags_t flags,
RF_AllocListElem_t *allocList)
{
CreateMirrorReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList,
rf_DiskReadMirrorPartitionFunc);
}

View File

@ -0,0 +1,74 @@
/* $NetBSD: rf_dagffrd.h,v 1.1 1998/11/13 04:20:27 oster Exp $ */
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
*
* Author: Mark Holland, Daniel Stodolsky, William V. Courtright II
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/*
* :
* Log: rf_dagffrd.h,v
* Revision 1.5 1996/07/22 19:52:16 jimz
* switched node params to RF_DagParam_t, a union of
* a 64-bit int and a void *, for better portability
* attempted hpux port, but failed partway through for
* lack of a single C compiler capable of compiling all
* source files
*
* Revision 1.4 1996/06/06 17:31:13 jimz
* new mirror read creation dags
*
* Revision 1.3 1996/05/24 22:17:04 jimz
* continue code + namespace cleanup
* typed a bunch of flags
*
* Revision 1.2 1996/05/23 21:46:35 jimz
* checkpoint in code cleanup (release prep)
* lots of types, function names have been fixed
*
* Revision 1.1 1996/05/03 19:19:53 wvcii
* Initial revision
*
*/
#ifndef _RF__RF_DAGFFRD_H_
#define _RF__RF_DAGFFRD_H_
#include "rf_types.h"
/* fault-free read DAG creation routines */
void rf_CreateFaultFreeReadDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
RF_DagHeader_t *dag_h, void *bp, RF_RaidAccessFlags_t flags,
RF_AllocListElem_t *allocList);
void rf_CreateNonredundantDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
RF_DagHeader_t *dag_h, void *bp, RF_RaidAccessFlags_t flags,
RF_AllocListElem_t *allocList, RF_IoType_t type);
void rf_CreateMirrorIdleReadDAG(RF_Raid_t *raidPtr,
RF_AccessStripeMap_t *asmap, RF_DagHeader_t *dag_h, void *bp,
RF_RaidAccessFlags_t flags, RF_AllocListElem_t *allocList);
void rf_CreateMirrorPartitionReadDAG(RF_Raid_t *raidPtr,
RF_AccessStripeMap_t *asmap, RF_DagHeader_t *dag_h, void *bp,
RF_RaidAccessFlags_t flags, RF_AllocListElem_t *allocList);
#endif /* !_RF__RF_DAGFFRD_H_ */

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,102 @@
/* $NetBSD: rf_dagffwr.h,v 1.1 1998/11/13 04:20:27 oster Exp $ */
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
*
* Author: Mark Holland, Daniel Stodolsky, William V. Courtright II
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/*
* :
* Log: rf_dagffwr.h,v
* Revision 1.6 1996/07/31 15:35:29 jimz
* evenodd changes; bugfixes for double-degraded archs, generalize
* some formerly PQ-only functions
*
* Revision 1.5 1996/07/22 19:52:16 jimz
* switched node params to RF_DagParam_t, a union of
* a 64-bit int and a void *, for better portability
* attempted hpux port, but failed partway through for
* lack of a single C compiler capable of compiling all
* source files
*
* Revision 1.4 1996/06/10 22:25:28 wvcii
* added write dags which do not have a commit node and are
* used in forward and backward error recovery experiments.
*
* Revision 1.3 1996/05/24 22:17:04 jimz
* continue code + namespace cleanup
* typed a bunch of flags
*
* Revision 1.2 1996/05/23 21:46:35 jimz
* checkpoint in code cleanup (release prep)
* lots of types, function names have been fixed
*
* Revision 1.1 1996/05/03 19:20:18 wvcii
* Initial revision
*
*/
#ifndef _RF__RF_DAGFFWR_H_
#define _RF__RF_DAGFFWR_H_
#include "rf_types.h"
/* fault-free write DAG creation routines */
void rf_CreateNonRedundantWriteDAG(RF_Raid_t *raidPtr,
RF_AccessStripeMap_t *asmap, RF_DagHeader_t *dag_h, void *bp,
RF_RaidAccessFlags_t flags, RF_AllocListElem_t *allocList,
RF_IoType_t type);
void rf_CreateRAID0WriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
RF_DagHeader_t *dag_h, void *bp, RF_RaidAccessFlags_t flags,
RF_AllocListElem_t *allocList, RF_IoType_t type);
void rf_CreateSmallWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
RF_DagHeader_t *dag_h, void *bp, RF_RaidAccessFlags_t flags,
RF_AllocListElem_t *allocList);
void rf_CreateLargeWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
RF_DagHeader_t *dag_h, void *bp, RF_RaidAccessFlags_t flags,
RF_AllocListElem_t *allocList);
void rf_CommonCreateLargeWriteDAG(RF_Raid_t *raidPtr,
RF_AccessStripeMap_t *asmap, RF_DagHeader_t *dag_h, void *bp,
RF_RaidAccessFlags_t flags, RF_AllocListElem_t *allocList, int nfaults,
int (*redFunc)(RF_DagNode_t *), int allowBufferRecycle);
void rf_CommonCreateLargeWriteDAGFwd(RF_Raid_t *raidPtr,
RF_AccessStripeMap_t *asmap, RF_DagHeader_t *dag_h, void *bp,
RF_RaidAccessFlags_t flags, RF_AllocListElem_t *allocList, int nfaults,
int (*redFunc)(RF_DagNode_t *), int allowBufferRecycle);
void rf_CommonCreateSmallWriteDAG(RF_Raid_t *raidPtr,
RF_AccessStripeMap_t *asmap, RF_DagHeader_t *dag_h, void *bp,
RF_RaidAccessFlags_t flags, RF_AllocListElem_t *allocList,
RF_RedFuncs_t *pfuncs, RF_RedFuncs_t *qfuncs);
void rf_CommonCreateSmallWriteDAGFwd(RF_Raid_t *raidPtr,
RF_AccessStripeMap_t *asmap, RF_DagHeader_t *dag_h, void *bp,
RF_RaidAccessFlags_t flags, RF_AllocListElem_t *allocList,
RF_RedFuncs_t *pfuncs, RF_RedFuncs_t *qfuncs);
void rf_CreateRaidOneWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
RF_DagHeader_t *dag_h, void *bp, RF_RaidAccessFlags_t flags,
RF_AllocListElem_t *allocList);
void rf_CreateRaidOneWriteDAGFwd(RF_Raid_t *raidPtr,
RF_AccessStripeMap_t *asmap, RF_DagHeader_t *dag_h, void *bp,
RF_RaidAccessFlags_t flags, RF_AllocListElem_t *allocList);
#endif /* !_RF__RF_DAGFFWR_H_ */

View File

@ -0,0 +1,85 @@
/* $NetBSD: rf_dagflags.h,v 1.1 1998/11/13 04:20:27 oster Exp $ */
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
*
* Author: Mark Holland
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/**************************************************************************************
*
* dagflags.h -- flags that can be given to DoAccess
* I pulled these out of dag.h because routines that call DoAccess may need these flags,
* but certainly do not need the declarations related to the DAG data structures.
*
**************************************************************************************/
/* :
* Log: rf_dagflags.h,v
* Revision 1.10 1996/06/13 19:08:23 jimz
* remove unused BD flag
*
* Revision 1.9 1996/05/30 11:29:41 jimz
* Numerous bug fixes. Stripe lock release code disagreed with the taking code
* about when stripes should be locked (I made it consistent: no parity, no lock)
* There was a lot of extra serialization of I/Os which I've removed- a lot of
* it was to calculate values for the cache code, which is no longer with us.
* More types, function, macro cleanup. Added code to properly quiesce the array
* on shutdown. Made a lot of stuff array-specific which was (bogusly) general
* before. Fixed memory allocation, freeing bugs.
*
* Revision 1.8 1996/05/24 22:17:04 jimz
* continue code + namespace cleanup
* typed a bunch of flags
*
* Revision 1.7 1996/05/23 21:46:35 jimz
* checkpoint in code cleanup (release prep)
* lots of types, function names have been fixed
*
* Revision 1.6 1995/12/01 15:59:40 root
* added copyright info
*
*/
#ifndef _RF__RF_DAGFLAGS_H_
#define _RF__RF_DAGFLAGS_H_
/*
* Bitmasks for the "flags" parameter (RF_RaidAccessFlags_t) used
* by DoAccess, SelectAlgorithm, and the DAG creation routines.
*
* If USE_DAG or USE_ASM is specified, neither the DAG nor the ASM
* will be modified, which means that you can't SUPRESS if you
* specify USE_DAG.
*/
#define RF_DAG_FLAGS_NONE 0 /* no flags */
#define RF_DAG_SUPPRESS_LOCKS (1<<0) /* supress all stripe locks in the DAG */
#define RF_DAG_RETURN_ASM (1<<1) /* create an ASM and return it instead of freeing it */
#define RF_DAG_RETURN_DAG (1<<2) /* create a DAG and return it instead of freeing it */
#define RF_DAG_NONBLOCKING_IO (1<<3) /* cause DoAccess to be non-blocking */
#define RF_DAG_ACCESS_COMPLETE (1<<4) /* the access is complete */
#define RF_DAG_DISPATCH_RETURNED (1<<5) /* used to handle the case where the dag invokes no I/O */
#define RF_DAG_TEST_ACCESS (1<<6) /* this access came through rf_ioctl instead of rf_strategy */
#endif /* !_RF__RF_DAGFLAGS_H_ */

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,137 @@
/* $NetBSD: rf_dagfuncs.h,v 1.1 1998/11/13 04:20:28 oster Exp $ */
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
*
* Author: Mark Holland, William V. Courtright II, Jim Zelenka
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/*****************************************************************************************
*
* dagfuncs.h -- header file for DAG node execution routines
*
****************************************************************************************/
/*
* :
* Log: rf_dagfuncs.h,v
* Revision 1.17 1996/07/22 19:52:16 jimz
* switched node params to RF_DagParam_t, a union of
* a 64-bit int and a void *, for better portability
* attempted hpux port, but failed partway through for
* lack of a single C compiler capable of compiling all
* source files
*
* Revision 1.16 1996/06/10 11:55:47 jimz
* Straightened out some per-array/not-per-array distinctions, fixed
* a couple bugs related to confusion. Added shutdown lists. Removed
* layout shutdown function (now subsumed by shutdown lists).
*
* Revision 1.15 1996/06/06 17:27:20 jimz
* added another read mirror func (partitioning), changed names so dag
* creation routines can use the appropriate one
*
* Revision 1.14 1996/05/30 11:29:41 jimz
* Numerous bug fixes. Stripe lock release code disagreed with the taking code
* about when stripes should be locked (I made it consistent: no parity, no lock)
* There was a lot of extra serialization of I/Os which I've removed- a lot of
* it was to calculate values for the cache code, which is no longer with us.
* More types, function, macro cleanup. Added code to properly quiesce the array
* on shutdown. Made a lot of stuff array-specific which was (bogusly) general
* before. Fixed memory allocation, freeing bugs.
*
* Revision 1.13 1996/05/24 22:17:04 jimz
* continue code + namespace cleanup
* typed a bunch of flags
*
* Revision 1.12 1996/05/24 04:28:55 jimz
* release cleanup ckpt
*
* Revision 1.11 1996/05/23 21:46:35 jimz
* checkpoint in code cleanup (release prep)
* lots of types, function names have been fixed
*
* Revision 1.10 1996/05/18 19:51:34 jimz
* major code cleanup- fix syntax, make some types consistent,
* add prototypes, clean out dead code, et cetera
*
* Revision 1.9 1995/12/01 15:56:46 root
* added copyright info
*
* Revision 1.8 1995/11/07 16:25:23 wvcii
* added DiskUnlockFuncForThreads
*
*/
#ifndef _RF__RF_DAGFUNCS_H_
#define _RF__RF_DAGFUNCS_H_
int rf_ConfigureDAGFuncs(RF_ShutdownList_t **listp);
int rf_TerminateFunc(RF_DagNode_t *node);
int rf_TerminateUndoFunc(RF_DagNode_t *node);
int rf_DiskReadMirrorIdleFunc(RF_DagNode_t *node);
int rf_DiskReadMirrorPartitionFunc(RF_DagNode_t *node);
int rf_DiskReadMirrorUndoFunc(RF_DagNode_t *node);
int rf_ParityLogUpdateFunc(RF_DagNode_t *node);
int rf_ParityLogOverwriteFunc(RF_DagNode_t *node);
int rf_ParityLogUpdateUndoFunc(RF_DagNode_t *node);
int rf_ParityLogOverwriteUndoFunc(RF_DagNode_t *node);
int rf_NullNodeFunc(RF_DagNode_t *node);
int rf_NullNodeUndoFunc(RF_DagNode_t *node);
int rf_DiskReadFuncForThreads(RF_DagNode_t *node);
int rf_DiskWriteFuncForThreads(RF_DagNode_t *node);
int rf_DiskUndoFunc(RF_DagNode_t *node);
int rf_DiskUnlockFuncForThreads(RF_DagNode_t *node);
int rf_GenericWakeupFunc(RF_DagNode_t *node, int status);
int rf_RegularXorFunc(RF_DagNode_t *node);
int rf_SimpleXorFunc(RF_DagNode_t *node);
int rf_RecoveryXorFunc(RF_DagNode_t *node);
int rf_XorIntoBuffer(RF_Raid_t *raidPtr, RF_PhysDiskAddr_t *pda, char *srcbuf,
char *targbuf, void *bp);
int rf_bxor(char *src, char *dest, int len, void *bp);
int rf_longword_bxor(register unsigned long *src, register unsigned long *dest,
int len, void *bp);
int rf_longword_bxor3(register unsigned long *dest, register unsigned long *a,
register unsigned long *b, register unsigned long *c, int len, void *bp);
int rf_bxor3(unsigned char *dst, unsigned char *a, unsigned char *b,
unsigned char *c, unsigned long len, void *bp);
/* function ptrs defined in ConfigureDAGFuncs() */
extern int (*rf_DiskReadFunc)(RF_DagNode_t *);
extern int (*rf_DiskWriteFunc)(RF_DagNode_t *);
extern int (*rf_DiskReadUndoFunc)(RF_DagNode_t *);
extern int (*rf_DiskWriteUndoFunc)(RF_DagNode_t *);
extern int (*rf_DiskUnlockFunc)(RF_DagNode_t *);
extern int (*rf_DiskUnlockUndoFunc)(RF_DagNode_t *);
extern int (*rf_SimpleXorUndoFunc)(RF_DagNode_t *);
extern int (*rf_RegularXorUndoFunc)(RF_DagNode_t *);
extern int (*rf_RecoveryXorUndoFunc)(RF_DagNode_t *);
/* macros for manipulating the param[3] in a read or write node */
#define RF_CREATE_PARAM3(pri, lk, unlk, wru) (((RF_uint64)(((wru&0xFFFFFF)<<8)|((lk)?0x10:0)|((unlk)?0x20:0)|((pri)&0xF)) ))
#define RF_EXTRACT_PRIORITY(_x_) ((((unsigned) ((unsigned long)(_x_))) >> 0) & 0x0F)
#define RF_EXTRACT_LOCK_FLAG(_x_) ((((unsigned) ((unsigned long)(_x_))) >> 4) & 0x1)
#define RF_EXTRACT_UNLOCK_FLAG(_x_) ((((unsigned) ((unsigned long)(_x_))) >> 5) & 0x1)
#define RF_EXTRACT_RU(_x_) ((((unsigned) ((unsigned long)(_x_))) >> 8) & 0xFFFFFF)
#endif /* !_RF__RF_DAGFUNCS_H_ */

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,191 @@
/* $NetBSD: rf_dagutils.h,v 1.1 1998/11/13 04:20:28 oster Exp $ */
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
*
* Author: Mark Holland, William V. Courtright II
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/*************************************************************************
*
* rf_dagutils.h -- header file for utility routines for manipulating DAGs
*
*************************************************************************/
/*
* :
* Log: rf_dagutils.h,v
* Revision 1.19 1996/07/22 19:52:16 jimz
* switched node params to RF_DagParam_t, a union of
* a 64-bit int and a void *, for better portability
* attempted hpux port, but failed partway through for
* lack of a single C compiler capable of compiling all
* source files
*
* Revision 1.18 1996/07/15 17:22:18 jimz
* nit-pick code cleanup
* resolve stdlib problems on DEC OSF
*
* Revision 1.17 1996/06/10 11:55:47 jimz
* Straightened out some per-array/not-per-array distinctions, fixed
* a couple bugs related to confusion. Added shutdown lists. Removed
* layout shutdown function (now subsumed by shutdown lists).
*
* Revision 1.16 1996/06/06 17:27:46 jimz
* added another select mirror func (partitioning), changed names so dag
* creation routines can use the appropriate one
*
* fixed old idle mirror func to pick closest arm if queue lengths are equal
*
* Revision 1.15 1996/06/03 23:28:26 jimz
* more bugfixes
* check in tree to sync for IPDS runs with current bugfixes
* there still may be a problem with threads in the script test
* getting I/Os stuck- not trivially reproducible (runs ~50 times
* in a row without getting stuck)
*
* Revision 1.14 1996/05/27 18:56:37 jimz
* more code cleanup
* better typing
* compiles in all 3 environments
*
* Revision 1.13 1996/05/24 22:17:04 jimz
* continue code + namespace cleanup
* typed a bunch of flags
*
* Revision 1.12 1996/05/24 04:28:55 jimz
* release cleanup ckpt
*
* Revision 1.11 1996/05/23 21:46:35 jimz
* checkpoint in code cleanup (release prep)
* lots of types, function names have been fixed
*
* Revision 1.10 1996/05/23 00:33:23 jimz
* code cleanup: move all debug decls to rf_options.c, all extern
* debug decls to rf_options.h, all debug vars preceded by rf_
*
* Revision 1.9 1996/05/18 19:51:34 jimz
* major code cleanup- fix syntax, make some types consistent,
* add prototypes, clean out dead code, et cetera
*
* Revision 1.8 1996/05/08 21:01:24 jimz
* fixed up enum type names that were conflicting with other
* enums and function names (ie, "panic")
* future naming trends will be towards RF_ and rf_ for
* everything raidframe-related
*
* Revision 1.7 1996/05/03 19:55:27 wvcii
* added misc routines from old dag creation files
*
* Revision 1.6 1995/12/01 15:57:28 root
* added copyright info
*
* Revision 1.5 1995/11/07 16:21:36 wvcii
* modified InitNode and InitNodeFromBuf prototypes
*
*/
#include "rf_types.h"
#include "rf_dagfuncs.h"
#include "rf_general.h"
#ifndef _RF__RF_DAGUTILS_H_
#define _RF__RF_DAGUTILS_H_
struct RF_RedFuncs_s {
int (*regular)(RF_DagNode_t *);
char *RegularName;
int (*simple)(RF_DagNode_t *);
char *SimpleName;
};
extern RF_RedFuncs_t rf_xorFuncs;
extern RF_RedFuncs_t rf_xorRecoveryFuncs;
void rf_InitNode(RF_DagNode_t *node, RF_NodeStatus_t initstatus,
int commit,
int (*doFunc)(RF_DagNode_t *node),
int (*undoFunc)(RF_DagNode_t *node),
int (*wakeFunc)(RF_DagNode_t *node, int status),
int nSucc, int nAnte, int nParam, int nResult,
RF_DagHeader_t *hdr, char *name, RF_AllocListElem_t *alist);
void rf_FreeDAG(RF_DagHeader_t *dag_h);
RF_PropHeader_t *rf_MakePropListEntry(RF_DagHeader_t *dag_h, int resultNum,
int paramNum, RF_PropHeader_t *next, RF_AllocListElem_t *allocList);
int rf_ConfigureDAGs(RF_ShutdownList_t **listp);
RF_DagHeader_t *rf_AllocDAGHeader(void);
void rf_FreeDAGHeader(RF_DagHeader_t *dh);
void *rf_AllocBuffer(RF_Raid_t *raidPtr, RF_DagHeader_t *dag_h,
RF_PhysDiskAddr_t *pda, RF_AllocListElem_t *allocList);
char *rf_NodeStatusString(RF_DagNode_t *node);
void rf_PrintNodeInfoString(RF_DagNode_t *node);
int rf_AssignNodeNums(RF_DagHeader_t *dag_h);
int rf_RecurAssignNodeNums(RF_DagNode_t *node, int num, int unvisited);
void rf_ResetDAGHeaderPointers(RF_DagHeader_t *dag_h, RF_DagHeader_t *newptr);
void rf_RecurResetDAGHeaderPointers(RF_DagNode_t *node, RF_DagHeader_t *newptr);
void rf_PrintDAGList(RF_DagHeader_t *dag_h);
int rf_ValidateDAG(RF_DagHeader_t *dag_h);
void rf_redirect_asm(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap);
void rf_MapUnaccessedPortionOfStripe(RF_Raid_t *raidPtr,
RF_RaidLayout_t *layoutPtr,
RF_AccessStripeMap_t *asmap, RF_DagHeader_t *dag_h,
RF_AccessStripeMapHeader_t **new_asm_h, int *nRodNodes, char **sosBuffer,
char **eosBuffer, RF_AllocListElem_t *allocList);
int rf_PDAOverlap(RF_RaidLayout_t *layoutPtr, RF_PhysDiskAddr_t *src,
RF_PhysDiskAddr_t *dest);
void rf_GenerateFailedAccessASMs(RF_Raid_t *raidPtr,
RF_AccessStripeMap_t *asmap, RF_PhysDiskAddr_t *failedPDA,
RF_DagHeader_t *dag_h, RF_AccessStripeMapHeader_t **new_asm_h,
int *nXorBufs, char **rpBufPtr, char *overlappingPDAs,
RF_AllocListElem_t *allocList);
/* flags used by RangeRestrictPDA */
#define RF_RESTRICT_NOBUFFER 0
#define RF_RESTRICT_DOBUFFER 1
void rf_RangeRestrictPDA(RF_Raid_t *raidPtr, RF_PhysDiskAddr_t *src,
RF_PhysDiskAddr_t *dest, int dobuffer, int doraidaddr);
int rf_compute_workload_shift(RF_Raid_t *raidPtr, RF_PhysDiskAddr_t *pda);
void rf_SelectMirrorDiskIdle(RF_DagNode_t *node);
void rf_SelectMirrorDiskPartition(RF_DagNode_t *node);
#endif /* !_RF__RF_DAGUTILS_H_ */

View File

@ -0,0 +1,577 @@
/* $NetBSD: rf_debugMem.c,v 1.1 1998/11/13 04:20:28 oster Exp $ */
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
*
* Author: Daniel Stodolsky, Mark Holland, Jim Zelenka
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/* debugMem.c: memory usage debugging stuff.
* Malloc, Calloc, and Free are #defined everywhere
* to do_malloc, do_calloc, and do_free.
*
* if RF_UTILITY is nonzero, it means were compiling one of the
* raidframe utility programs, such as rfctrl or smd. In this
* case, we eliminate all references to the threads package
* and to the allocation list stuff.
*/
/* :
* Log: rf_debugMem.c,v
* Revision 1.38 1996/08/20 14:45:43 jimz
* add debugging to track memory allocated (amount only, w/out
* excessive sanity checking)
*
* Revision 1.37 1996/07/27 23:36:08 jimz
* Solaris port of simulator
*
* Revision 1.36 1996/07/18 22:57:14 jimz
* port simulator to AIX
*
* Revision 1.35 1996/06/13 08:55:38 jimz
* make error messages refer to file, line of original
* allocation
*
* Revision 1.34 1996/06/10 11:55:47 jimz
* Straightened out some per-array/not-per-array distinctions, fixed
* a couple bugs related to confusion. Added shutdown lists. Removed
* layout shutdown function (now subsumed by shutdown lists).
*
* Revision 1.33 1996/06/09 02:36:46 jimz
* lots of little crufty cleanup- fixup whitespace
* issues, comment #ifdefs, improve typing in some
* places (esp size-related)
*
* Revision 1.32 1996/06/05 18:06:02 jimz
* Major code cleanup. The Great Renaming is now done.
* Better modularity. Better typing. Fixed a bunch of
* synchronization bugs. Made a lot of global stuff
* per-desc or per-array. Removed dead code.
*
* Revision 1.31 1996/05/30 23:22:16 jimz
* bugfixes of serialization, timing problems
* more cleanup
*
* Revision 1.30 1996/05/30 11:29:41 jimz
* Numerous bug fixes. Stripe lock release code disagreed with the taking code
* about when stripes should be locked (I made it consistent: no parity, no lock)
* There was a lot of extra serialization of I/Os which I've removed- a lot of
* it was to calculate values for the cache code, which is no longer with us.
* More types, function, macro cleanup. Added code to properly quiesce the array
* on shutdown. Made a lot of stuff array-specific which was (bogusly) general
* before. Fixed memory allocation, freeing bugs.
*
* Revision 1.29 1996/05/27 18:56:37 jimz
* more code cleanup
* better typing
* compiles in all 3 environments
*
* Revision 1.28 1996/05/23 21:46:35 jimz
* checkpoint in code cleanup (release prep)
* lots of types, function names have been fixed
*
* Revision 1.27 1996/05/23 00:33:23 jimz
* code cleanup: move all debug decls to rf_options.c, all extern
* debug decls to rf_options.h, all debug vars preceded by rf_
*
* Revision 1.26 1996/05/21 18:53:46 jimz
* return NULL for failed allocations, not panic
*
* Revision 1.25 1996/05/20 16:14:19 jimz
* switch to rf_{mutex,cond}_{init,destroy}
*
* Revision 1.24 1996/05/18 19:51:34 jimz
* major code cleanup- fix syntax, make some types consistent,
* add prototypes, clean out dead code, et cetera
*
* Revision 1.23 1996/05/17 12:42:35 jimz
* wrap get_threadid stuff in #ifndef UTILITY for utils which use
* redzone allocation stuff
*
* Revision 1.22 1996/05/16 23:06:09 jimz
* don't warn about NULL alists
*
* Revision 1.21 1996/05/16 22:25:02 jimz
* show allocations for [MC]allocAndAdd
*
* Revision 1.20 1996/05/15 18:30:22 jimz
* print memory allocation as well as frees if memDebug > 1
*
* Revision 1.19 1996/05/07 17:41:17 jimz
* add "level 2" for memDebug, which will print freed address ranges
*
* Revision 1.18 1996/05/02 20:41:53 jimz
* really fix malloc problem out-of-kernel in memory_hash_insert()
*
* Revision 1.17 1996/05/02 20:04:29 jimz
* fixed malloc deadlock previous change introduced
*
* Revision 1.16 1996/05/01 16:27:26 jimz
* get rid of ALLOCMH
* stop using ccmn_ memory management
*
* Revision 1.15 1995/12/12 18:10:06 jimz
* MIN -> RF_MIN, MAX -> RF_MAX, ASSERT -> RF_ASSERT
* fix 80-column brain damage in comments
*
* Revision 1.14 1995/12/01 15:56:17 root
* added copyright info
*
*/
#include "rf_types.h"
#include "rf_sys.h"
#if RF_UTILITY == 0
#include "rf_threadstuff.h"
#include "rf_threadid.h"
#include "rf_options.h"
#else /* RF_UTILITY == 0 */
#include "rf_utility.h"
#endif /* RF_UTILITY == 0 */
#ifndef KERNEL
#include <stdio.h>
#include <assert.h>
#endif /* !KERNEL */
#include "rf_debugMem.h"
#include "rf_general.h"
static long tot_mem_in_use = 0, max_mem = 0;
/* Hash table of information about memory allocations */
#define RF_MH_TABLESIZE 1000
struct mh_struct {
void *address;
int size;
int line;
char *filen;
char allocated;
struct mh_struct *next;
};
static struct mh_struct *mh_table[RF_MH_TABLESIZE];
RF_DECLARE_MUTEX(rf_debug_mem_mutex)
static int mh_table_initialized=0;
static void memory_hash_insert(void *addr, int size, int line, char *filen);
static int memory_hash_remove(void *addr, int sz);
#ifndef KERNEL /* no redzones or "real_" routines in the kernel */
static void rf_redzone_free_failed(void *ptr, int size, int line, char *file);
void *rf_real_redzone_malloc(_size_)
int _size_;
{
char *p;
rf_validate_mh_table();
p = malloc((_size_)+16);
if (p == NULL)
return(p);
RF_ASSERT (p);
*((long *) p) = (_size_) ;
((char *) p)[(_size_)+8] = '!';
((char *) p)[(_size_)+15] = '!';
p += 8;
return(p);
}
void *rf_real_redzone_calloc(_n_,_size_)
int _n_,_size_;
{
char *p;
int _sz_;
rf_validate_mh_table();
_sz_ = (_n_) * (_size_);
p = malloc((_sz_)+16);
if (p == NULL)
return(p);
bzero(p,(_sz_)+16);
*((long *) p) = (_sz_) ;
((char *) p)[(_sz_)+8] = '!';
((char *) p)[(_sz_)+15] = '!';
p += 8;
return(p);
}
void rf_real_redzone_free(p, line, filen)
char *p;
int line;
char *filen;
{
unsigned long _size_;
rf_validate_mh_table();
p -= 8;
_size_ = *((long *) p);
if ((((char *) p)[(_size_)+8] != '!') || (((char *) p)[(_size_)+15] != '!'))
rf_redzone_free_failed(p,(_size_),line,filen);
free(p);
}
unsigned long rf_mem_alloc = 0;
char *rf_real_Malloc(size, line, file)
int size;
int line;
char *file;
{
void *pp;
char *p;
int tid;
RF_LOCK_MUTEX(rf_debug_mem_mutex);
rf_redzone_malloc(pp, size);
p = pp;
if (p == NULL) {
RF_ERRORMSG3("Unable to malloc %d bytes at line %d file %s\n", size,
line, file);
}
if (rf_memAmtDebug) {
rf_mem_alloc += size;
printf("%lu size %d %s:%d\n", rf_mem_alloc, size, file, line);
}
#if RF_UTILITY == 0
if (rf_memDebug > 1) {
rf_get_threadid(tid);
printf("[%d] malloc 0x%lx - 0x%lx (%d) %s %d\n", tid, p, p+size, size,
file, line);
}
#endif /* RF_UTILITY == 0 */
if (rf_memDebug)
rf_record_malloc(p, size, line, file);
RF_UNLOCK_MUTEX(rf_debug_mem_mutex);
return(p);
}
#if RF_UTILITY == 0
char *rf_real_MallocAndAdd(size, alist, line, file)
int size;
RF_AllocListElem_t *alist;
int line;
char *file;
{
void *pp;
char *p;
int tid;
RF_LOCK_MUTEX(rf_debug_mem_mutex);
rf_redzone_malloc(pp, size);
p = pp;
if (p == NULL) {
RF_ERRORMSG3("Unable to malloc %d bytes at line %d file %s\n", size,
line, file);
}
if (rf_memAmtDebug) {
rf_mem_alloc += size;
printf("%lu size %d %s:%d\n", rf_mem_alloc, size, file, line);
}
if (rf_memDebug > 1) {
rf_get_threadid(tid);
printf("[%d] malloc+add 0x%lx - 0x%lx (%d) %s %d\n", tid, p, p+size,
size, file, line);
}
if (alist) {
rf_real_AddToAllocList(alist, pp, size, 0);
}
if (rf_memDebug)
rf_record_malloc(p, size, line, file);
RF_UNLOCK_MUTEX(rf_debug_mem_mutex);
return(p);
}
#endif /* RF_UTILITY == 0 */
char *rf_real_Calloc(nel, elsz, line, file)
int nel;
int elsz;
int line;
char *file;
{
int tid, size;
void *pp;
char *p;
size = nel * elsz;
RF_LOCK_MUTEX(rf_debug_mem_mutex);
rf_redzone_calloc(pp, nel, elsz);
p = pp;
if (p == NULL) {
RF_ERRORMSG4("Unable to calloc %d objects of size %d at line %d file %s\n",
nel, elsz, line, file);
return(NULL);
}
if (rf_memAmtDebug) {
rf_mem_alloc += size;
printf("%lu size %d %s:%d\n", rf_mem_alloc, size, file, line);
}
#if RF_UTILITY == 0
if (rf_memDebug > 1) {
rf_get_threadid(tid);
printf("[%d] calloc 0x%lx - 0x%lx (%d,%d) %s %d\n", tid, p, p+size, nel,
elsz, file, line);
}
#endif /* RF_UTILITY == 0 */
if (rf_memDebug) {
rf_record_malloc(p, size, line, file);
}
RF_UNLOCK_MUTEX(rf_debug_mem_mutex);
return(p);
}
#if RF_UTILITY == 0
char *rf_real_CallocAndAdd(nel, elsz, alist, line, file)
int nel;
int elsz;
RF_AllocListElem_t *alist;
int line;
char *file;
{
int tid, size;
void *pp;
char *p;
size = nel * elsz;
RF_LOCK_MUTEX(rf_debug_mem_mutex);
rf_redzone_calloc(pp, nel, elsz);
p = pp;
if (p == NULL) {
RF_ERRORMSG4("Unable to calloc %d objs of size %d at line %d file %s\n",
nel, elsz, line, file);
return(NULL);
}
if (rf_memAmtDebug) {
rf_mem_alloc += size;
printf("%lu size %d %s:%d\n", rf_mem_alloc, size, file, line);
}
if (rf_memDebug > 1) {
rf_get_threadid(tid);
printf("[%d] calloc+add 0x%lx - 0x%lx (%d,%d) %s %d\n", tid, p,
p+size, nel, elsz, file, line);
}
if (alist) {
rf_real_AddToAllocList(alist, pp, size, 0);
}
if (rf_memDebug)
rf_record_malloc(p, size, line, file);
RF_UNLOCK_MUTEX(rf_debug_mem_mutex);
return(p);
}
#endif /* RF_UTILITY == 0 */
void rf_real_Free(p, sz, line, file)
void *p;
int sz;
int line;
char *file;
{
int tid;
#if RF_UTILITY == 0
if (rf_memDebug > 1) {
rf_get_threadid(tid);
printf("[%d] free 0x%lx - 0x%lx (%d) %s %d\n", tid, p, ((char *)p)+sz, sz,
file, line);
}
#endif /* RF_UTILITY == 0 */
RF_LOCK_MUTEX(rf_debug_mem_mutex);
if (rf_memAmtDebug) {
rf_mem_alloc -= sz;
printf("%lu - size %d %s:%d\n", rf_mem_alloc, sz, file, line);
}
if (rf_memDebug) {
rf_unrecord_malloc(p,sz);
}
rf_redzone_free(p);
RF_UNLOCK_MUTEX(rf_debug_mem_mutex);
}
void rf_validate_mh_table()
{
int i, size;
struct mh_struct *p;
char *cp;
return;
for (i=0; i<RF_MH_TABLESIZE; i++) {
for (p=mh_table[i]; p; p=p->next) if (p->allocated) {
cp = ((char *) p->address) - 8;
size = *((long *) cp);
if ((((char *) cp)[(size)+8] != '!') || (((char *) cp)[(size)+15] != '!')) {
rf_redzone_free_failed(cp,(size),__LINE__,__FILE__);
}
}
}
}
static void rf_redzone_free_failed(ptr,size,line,file)
void *ptr;
int size;
int line;
char *file;
{
RF_ERRORMSG4("Free of 0x%lx (recorded size %d) at %d of %s detected redzone overrun\n",ptr,size,line,file);
RF_ASSERT(0);
}
#endif /* !KERNEL */
void rf_record_malloc(p, size, line, filen)
void *p;
int size, line;
char *filen;
{
RF_ASSERT(size != 0);
/*RF_LOCK_MUTEX(rf_debug_mem_mutex);*/
memory_hash_insert(p, size, line, filen);
tot_mem_in_use += size;
/*RF_UNLOCK_MUTEX(rf_debug_mem_mutex);*/
if ( (long) p == rf_memDebugAddress) {
printf("Allocate: debug address allocated from line %d file %s\n",line,filen);
}
}
void rf_unrecord_malloc(p, sz)
void *p;
int sz;
{
int size;
/*RF_LOCK_MUTEX(rf_debug_mem_mutex);*/
size = memory_hash_remove(p, sz);
tot_mem_in_use -= size;
/*RF_UNLOCK_MUTEX(rf_debug_mem_mutex);*/
if ( (long) p == rf_memDebugAddress) {
printf("Free: Found debug address\n"); /* this is really only a flag line for gdb */
}
}
void rf_print_unfreed()
{
int i, foundone=0;
struct mh_struct *p;
for (i=0; i<RF_MH_TABLESIZE; i++) {
for (p=mh_table[i]; p; p=p->next) if (p->allocated) {
if (!foundone) printf("\n\nThere are unfreed memory locations at program shutdown:\n");
foundone = 1;
printf("Addr 0x%lx Size %d line %d file %s\n",
(long)p->address,p->size,p->line,p->filen);
}
}
if (tot_mem_in_use) {
printf("%ld total bytes in use\n", tot_mem_in_use);
}
}
int rf_ConfigureDebugMem(listp)
RF_ShutdownList_t **listp;
{
int i, rc;
rc = rf_create_managed_mutex(listp, &rf_debug_mem_mutex);
if (rc) {
RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__,
__LINE__, rc);
return(rc);
}
if (rf_memDebug) {
for (i=0; i<RF_MH_TABLESIZE; i++)
mh_table[i] = NULL;
mh_table_initialized=1;
}
return(0);
}
#define HASHADDR(_a_) ( (((unsigned long) _a_)>>3) % RF_MH_TABLESIZE )
static void memory_hash_insert(addr, size, line, filen)
void *addr;
int size, line;
char *filen;
{
unsigned long bucket = HASHADDR(addr);
struct mh_struct *p;
RF_ASSERT(mh_table_initialized);
/* search for this address in the hash table */
for (p=mh_table[bucket]; p && (p->address != addr); p=p->next);
if (!p) {
#ifdef KERNEL
RF_Malloc(p,sizeof(struct mh_struct),(struct mh_struct *));
#else /* KERNEL */
p = (struct mh_struct *)malloc(sizeof(struct mh_struct));
#endif /* KERNEL */
RF_ASSERT(p);
p->next = mh_table[bucket];
mh_table[bucket] = p;
p->address = addr;
p->allocated = 0;
}
if (p->allocated) {
printf("ERROR: reallocated address 0x%lx from line %d, file %s without intervening free\n",(long) addr, line, filen);
printf(" last allocated from line %d file %s\n",p->line, p->filen);
RF_ASSERT(0);
}
p->size = size; p->line = line; p->filen = filen;
p->allocated = 1;
}
static int memory_hash_remove(addr, sz)
void *addr;
int sz;
{
unsigned long bucket = HASHADDR(addr);
struct mh_struct *p;
RF_ASSERT(mh_table_initialized);
for (p=mh_table[bucket]; p && (p->address != addr); p=p->next);
if (!p) {
printf("ERROR: freeing never-allocated address 0x%lx\n",(long) addr);
RF_PANIC();
}
if (!p->allocated) {
printf("ERROR: freeing unallocated address 0x%lx. Last allocation line %d file %s\n",(long) addr, p->line, p->filen);
RF_PANIC();
}
if (sz > 0 && p->size != sz) { /* you can suppress this error by using a negative value as the size to free */
printf("ERROR: incorrect size at free for address 0x%lx: is %d should be %d. Alloc at line %d of file %s\n",(unsigned long) addr, sz, p->size,p->line, p->filen);
RF_PANIC();
}
p->allocated = 0;
return(p->size);
}
void rf_ReportMaxMem()
{
printf("Max memory used: %d bytes\n",(int)max_mem);
#ifndef KERNEL
fflush(stdout);
fprintf(stderr,"Max memory used: %d bytes\n",max_mem);
fflush(stderr);
#endif /* !KERNEL */
}

View File

@ -0,0 +1,262 @@
/* $NetBSD: rf_debugMem.h,v 1.1 1998/11/13 04:20:28 oster Exp $ */
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
*
* Author: Daniel Stodolsky, Mark Holland
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/*
* rf_debugMem.h -- memory leak debugging module
*
* IMPORTANT: if you put the lock/unlock mutex stuff back in here, you
* need to take it out of the routines in debugMem.c
*
* Log: rf_debugMem.h,v
* Revision 1.27 1996/07/18 22:57:14 jimz
* port simulator to AIX
*
* Revision 1.26 1996/06/11 13:46:43 jimz
* make bracing consistent around memory allocation macros
*
* Revision 1.25 1996/06/10 11:55:47 jimz
* Straightened out some per-array/not-per-array distinctions, fixed
* a couple bugs related to confusion. Added shutdown lists. Removed
* layout shutdown function (now subsumed by shutdown lists).
*
* Revision 1.24 1996/06/05 18:06:02 jimz
* Major code cleanup. The Great Renaming is now done.
* Better modularity. Better typing. Fixed a bunch of
* synchronization bugs. Made a lot of global stuff
* per-desc or per-array. Removed dead code.
*
* Revision 1.23 1996/05/30 11:29:41 jimz
* Numerous bug fixes. Stripe lock release code disagreed with the taking code
* about when stripes should be locked (I made it consistent: no parity, no lock)
* There was a lot of extra serialization of I/Os which I've removed- a lot of
* it was to calculate values for the cache code, which is no longer with us.
* More types, function, macro cleanup. Added code to properly quiesce the array
* on shutdown. Made a lot of stuff array-specific which was (bogusly) general
* before. Fixed memory allocation, freeing bugs.
*
* Revision 1.22 1996/05/27 18:56:37 jimz
* more code cleanup
* better typing
* compiles in all 3 environments
*
* Revision 1.21 1996/05/23 22:17:40 jimz
* fix alloclist macro names for kernel
*
* Revision 1.20 1996/05/23 21:46:35 jimz
* checkpoint in code cleanup (release prep)
* lots of types, function names have been fixed
*
* Revision 1.19 1996/05/23 13:18:23 jimz
* include rf_options.h
*
* Revision 1.18 1996/05/23 00:33:23 jimz
* code cleanup: move all debug decls to rf_options.c, all extern
* debug decls to rf_options.h, all debug vars preceded by rf_
*
* Revision 1.17 1996/05/21 18:51:54 jimz
* cleaned up macro args
*
* Revision 1.16 1996/05/18 19:51:34 jimz
* major code cleanup- fix syntax, make some types consistent,
* add prototypes, clean out dead code, et cetera
*
* Revision 1.15 1996/05/01 16:26:22 jimz
* get rid of old ccmn stuff
*
* Revision 1.14 1995/12/01 15:58:09 root
* added copyright info
*
* Revision 1.13 1995/10/11 15:26:03 jimz
* zero memory after allocation in kernel (hide effects
* of uninitialized structs)
*
* Revision 1.12 1995/10/06 17:04:15 jimz
* make Malloc and Free in kernel use kernel malloc package, not cam
* dbufs (which is gross, and was exhausting cam zalloc limit)
*
* Revision 1.11 1995/05/01 13:28:00 holland
* parity range locks, locking disk requests, recon+parityscan in kernel, etc.
*
* Revision 1.10 1995/04/24 13:25:51 holland
* rewrite to move disk queues, recon, & atomic RMW to kernel
*
* Revision 1.9 1995/02/17 19:39:56 holland
* added size param to all calls to Free().
* this is ignored at user level, but necessary in the kernel.
*
* Revision 1.8 1995/02/10 17:34:10 holland
* kernelization changes
*
* Revision 1.7 1995/02/03 22:31:36 holland
* many changes related to kernelization
*
* Revision 1.6 1995/02/01 15:13:05 holland
* moved #include of general.h out of raid.h and into each file
*
* Revision 1.5 1995/02/01 14:25:19 holland
* began changes for kernelization:
* changed all instances of mutex_t and cond_t to DECLARE macros
* converted configuration code to use config structure
*
* Revision 1.4 1995/01/11 19:27:02 holland
* many changes related to performance tuning
*
* Revision 1.3 1994/11/29 21:34:56 danner
* Changed type of redzone_calloc and malloc to void *.
*
* Revision 1.2 1994/11/28 22:13:23 danner
* Many macros converted to functions.
*
*/
#ifndef _RF__RF_DEBUGMEM_H_
#define _RF__RF_DEBUGMEM_H_
#include "rf_archs.h"
#include "rf_alloclist.h"
#include "rf_options.h"
#ifndef KERNEL
#ifndef __NetBSD__
void *malloc(), *calloc();
#endif
RF_DECLARE_EXTERN_MUTEX(rf_debug_mem_mutex)
/*
* redzone malloc, calloc, and free allocate an extra 16 bytes on each
* malloc/calloc call to allow tracking of overflows on free.
*/
#if RF_MEMORY_REDZONES > 0
#define rf_redzone_malloc(_p_,_size_) _p_ = rf_real_redzone_malloc(_size_)
#define rf_redzone_calloc(_p_,_n_,_size_) _p_ = rf_real_redzone_calloc(_n_,_size_)
#define rf_redzone_free(_p_) rf_real_redzone_free(_p_, __LINE__, __FILE__)
#else /* RF_MEMORY_REDZONES > 0 */
#define rf_redzone_malloc(_p_,_size_) _p_ = malloc(_size_)
#define rf_redzone_calloc(_p_,_nel_,_size_) _p_ = calloc(_nel_,_size_)
#define rf_redzone_free(_ptr_) free(_ptr_)
#endif /* RF_MEMORY_REDZONES > 0 */
#define RF_Malloc(_p_, _size_, _cast_) { \
_p_ = _cast_ rf_real_Malloc(_size_, __LINE__, __FILE__); \
}
#define RF_MallocAndAdd(_p_, _size_, _cast_, _alist_) { \
_p_ = _cast_ rf_real_MallocAndAdd(_size_, _alist_, __LINE__, __FILE__); \
}
#define RF_Calloc(_p_, _nel_, _elsz_, _cast_) { \
_p_ = _cast_ rf_real_Calloc(_nel_, _elsz_, __LINE__, __FILE__); \
}
#define RF_CallocAndAdd(_p_, _nel_, _elsz_, _cast_, _alist_) { \
_p_ = _cast_ rf_real_CallocAndAdd(_nel_, _elsz_, _alist_, __LINE__, __FILE__); \
}
#define RF_Free(__p_, _sz_) { \
rf_real_Free(__p_, _sz_, __LINE__, __FILE__); \
}
#else /* KERNEL */
#include <sys/types.h>
#ifdef __NetBSD__
typedef u_int32_t U32;
#else
#include <io/common/iotypes.h> /* just to get defn of U32 */
#endif /* __NetBSD__ */
#include <sys/malloc.h>
#ifdef __NetBSD__
#define RF_Malloc(_p_, _size_, _cast_) \
{ \
_p_ = _cast_ malloc((u_long)_size_, M_DEVBUF, M_WAITOK); \
bzero((char *)_p_, _size_); \
if (rf_memDebug) rf_record_malloc(_p_, _size_, __LINE__, __FILE__); \
}
#else
#define RF_Malloc(_p_, _size_, _cast_) \
{ \
_p_ = _cast_ malloc((u_long)_size_, BUCKETINDEX(_size_), M_DEVBUF, M_WAITOK); \
bzero((char *)_p_, _size_); \
if (rf_memDebug) rf_record_malloc(_p_, _size_, __LINE__, __FILE__); \
}
#endif /* __NetBSD__ */
#define RF_MallocAndAdd(__p_, __size_, __cast_, __alist_) \
{ \
RF_Malloc(__p_, __size_, __cast_); \
if (__alist_) rf_AddToAllocList(__alist_, __p_, __size_); \
}
#define RF_Calloc(_p_, _nel_, _elsz_, _cast_) \
{ \
RF_Malloc( _p_, (_nel_) * (_elsz_), _cast_); \
bzero( (_p_), (_nel_) * (_elsz_) ); \
}
#define RF_CallocAndAdd(__p,__nel,__elsz,__cast,__alist) \
{ \
RF_Calloc(__p, __nel, __elsz, __cast); \
if (__alist) rf_AddToAllocList(__alist, __p, (__nel)*(__elsz)); \
}
#define RF_Free(_p_, _sz_) \
{ \
free((void *)(_p_), M_DEVBUF); \
if (rf_memDebug) rf_unrecord_malloc(_p_, (U32) (_sz_)); \
}
#endif /* KERNEL */
#ifndef KERNEL
void *rf_real_redzone_malloc(int size);
void *rf_real_redzone_calloc(int n, int size);
void rf_real_redzone_free(char *p, int line, char *filen);
char *rf_real_Malloc(int size, int line, char *file);
char *rf_real_Calloc(int nel, int elsz, int line, char *file);
void rf_real_Free(void *p, int sz, int line, char *file);
void rf_validate_mh_table(void);
#if RF_UTILITY == 0
char *rf_real_MallocAndAdd(int size, RF_AllocListElem_t *alist, int line, char *file);
char *rf_real_CallocAndAdd(int nel, int elsz, RF_AllocListElem_t *alist, int line, char *file);
#endif /* RF_UTILITY == 0 */
#endif /* !KERNEL */
void rf_record_malloc(void *p, int size, int line, char *filen);
void rf_unrecord_malloc(void *p, int sz);
void rf_print_unfreed(void);
int rf_ConfigureDebugMem(RF_ShutdownList_t **listp);
void rf_ReportMaxMem(void);
#endif /* !_RF__RF_DEBUGMEM_H_ */

View File

@ -0,0 +1,185 @@
/* $NetBSD: rf_debugprint.c,v 1.1 1998/11/13 04:20:28 oster Exp $ */
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
*
* Author: Mark Holland
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/*
* Code to do debug printfs. Calls to rf_debug_printf cause the corresponding
* information to be printed to a circular buffer rather than the screen.
* The point is to try and minimize the timing variations induced by the
* printfs, and to capture only the printf's immediately preceding a failure.
*/
/* :
* Log: rf_debugprint.c,v
* Revision 1.13 1996/08/07 21:08:31 jimz
* remove bogus ; from mutex decl
*
* Revision 1.12 1996/06/10 11:55:47 jimz
* Straightened out some per-array/not-per-array distinctions, fixed
* a couple bugs related to confusion. Added shutdown lists. Removed
* layout shutdown function (now subsumed by shutdown lists).
*
* Revision 1.11 1996/06/05 18:06:02 jimz
* Major code cleanup. The Great Renaming is now done.
* Better modularity. Better typing. Fixed a bunch of
* synchronization bugs. Made a lot of global stuff
* per-desc or per-array. Removed dead code.
*
* Revision 1.10 1996/05/30 23:22:16 jimz
* bugfixes of serialization, timing problems
* more cleanup
*
* Revision 1.9 1996/05/27 18:56:37 jimz
* more code cleanup
* better typing
* compiles in all 3 environments
*
* Revision 1.8 1996/05/23 00:33:23 jimz
* code cleanup: move all debug decls to rf_options.c, all extern
* debug decls to rf_options.h, all debug vars preceded by rf_
*
* Revision 1.7 1996/05/20 16:16:06 jimz
* switch to rf_{mutex,cond}_{init,destroy}
*
* Revision 1.6 1996/05/18 19:51:34 jimz
* major code cleanup- fix syntax, make some types consistent,
* add prototypes, clean out dead code, et cetera
*
* Revision 1.5 1995/12/01 16:00:45 root
* added copyright info
*
*/
#include "rf_types.h"
#include "rf_threadstuff.h"
#include "rf_debugprint.h"
#include "rf_general.h"
#include "rf_options.h"
#include <sys/param.h>
struct RF_Entry_s {
char *cstring;
void *a1, *a2, *a3, *a4, *a5, *a6, *a7, *a8;
};
/* space for 1k lines */
#define BUFSHIFT 10
#define BUFSIZE (1<<BUFSHIFT)
#define BUFMASK (BUFSIZE-1)
static struct RF_Entry_s rf_debugprint_buf[BUFSIZE];
static int rf_debugprint_index = 0;
RF_DECLARE_STATIC_MUTEX(rf_debug_print_mutex)
int rf_ConfigureDebugPrint(listp)
RF_ShutdownList_t **listp;
{
int rc;
rc = rf_create_managed_mutex(listp, &rf_debug_print_mutex);
if (rc) {
RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__,
__LINE__, rc);
return(rc);
}
rf_clear_debug_print_buffer();
return(0);
}
void rf_clear_debug_print_buffer()
{
int i;
for (i=0; i<BUFSIZE; i++)
rf_debugprint_buf[i].cstring = NULL;
rf_debugprint_index = 0;
}
void rf_debug_printf(s,a1,a2,a3,a4,a5,a6,a7,a8)
char *s;
void *a1,*a2,*a3,*a4,*a5,*a6,*a7,*a8;
{
int idx;
if (rf_debugPrintUseBuffer) {
RF_LOCK_MUTEX(rf_debug_print_mutex);
idx = rf_debugprint_index;
rf_debugprint_index = (rf_debugprint_index+1) & BUFMASK;
RF_UNLOCK_MUTEX(rf_debug_print_mutex);
rf_debugprint_buf[idx].cstring = s;
rf_debugprint_buf[idx].a1 = a1;
rf_debugprint_buf[idx].a2 = a2;
rf_debugprint_buf[idx].a3 = a3;
rf_debugprint_buf[idx].a4 = a4;
rf_debugprint_buf[idx].a5 = a5;
rf_debugprint_buf[idx].a6 = a6;
rf_debugprint_buf[idx].a7 = a7;
rf_debugprint_buf[idx].a8 = a8;
}
else {
printf(s,a1,a2,a3,a4,a5,a6,a7,a8);
}
}
void rf_print_debug_buffer()
{
rf_spill_debug_buffer(NULL);
}
void rf_spill_debug_buffer(fname)
char *fname;
{
int i;
#ifndef KERNEL
FILE *fp;
#endif /* !KERNEL */
if (!rf_debugPrintUseBuffer)
return;
RF_LOCK_MUTEX(rf_debug_print_mutex);
#ifndef KERNEL
fp = (fname) ? fopen(fname,"w") : stdout;
if (!fp) {printf("Unable to open file %s for writing\n",fname); return;}
for (i=rf_debugprint_index+1; i != rf_debugprint_index; i = (i+1)&BUFMASK) if (rf_debugprint_buf[i].cstring)
fprintf(fp,rf_debugprint_buf[i].cstring,rf_debugprint_buf[i].a1,rf_debugprint_buf[i].a2,rf_debugprint_buf[i].a3,
rf_debugprint_buf[i].a4,rf_debugprint_buf[i].a5,rf_debugprint_buf[i].a6,rf_debugprint_buf[i].a7,rf_debugprint_buf[i].a8);
fprintf(fp,rf_debugprint_buf[i].cstring,rf_debugprint_buf[i].a1,rf_debugprint_buf[i].a2,rf_debugprint_buf[i].a3,
rf_debugprint_buf[i].a4,rf_debugprint_buf[i].a5,rf_debugprint_buf[i].a6,rf_debugprint_buf[i].a7,rf_debugprint_buf[i].a8);
fclose(fp);
#else /* !KERNEL */
for (i=rf_debugprint_index+1; i != rf_debugprint_index; i = (i+1)&BUFMASK) if (rf_debugprint_buf[i].cstring)
printf(rf_debugprint_buf[i].cstring,rf_debugprint_buf[i].a1,rf_debugprint_buf[i].a2,rf_debugprint_buf[i].a3,
rf_debugprint_buf[i].a4,rf_debugprint_buf[i].a5,rf_debugprint_buf[i].a6,rf_debugprint_buf[i].a7,rf_debugprint_buf[i].a8);
printf(rf_debugprint_buf[i].cstring,rf_debugprint_buf[i].a1,rf_debugprint_buf[i].a2,rf_debugprint_buf[i].a3,
rf_debugprint_buf[i].a4,rf_debugprint_buf[i].a5,rf_debugprint_buf[i].a6,rf_debugprint_buf[i].a7,rf_debugprint_buf[i].a8);
#endif /* !KERNEL */
RF_UNLOCK_MUTEX(rf_debug_print_mutex);
}

View File

@ -0,0 +1,63 @@
/* $NetBSD: rf_debugprint.h,v 1.1 1998/11/13 04:20:28 oster Exp $ */
/*
* rf_debugprint.h
*/
/*
* Copyright (c) 1996 Carnegie-Mellon University.
* All rights reserved.
*
* Author: Mark Holland
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/*
* :
* Log: rf_debugprint.h,v
* Revision 1.4 1996/06/10 11:55:47 jimz
* Straightened out some per-array/not-per-array distinctions, fixed
* a couple bugs related to confusion. Added shutdown lists. Removed
* layout shutdown function (now subsumed by shutdown lists).
*
* Revision 1.3 1996/05/27 18:56:37 jimz
* more code cleanup
* better typing
* compiles in all 3 environments
*
* Revision 1.2 1996/05/23 21:46:35 jimz
* checkpoint in code cleanup (release prep)
* lots of types, function names have been fixed
*
* Revision 1.1 1996/05/18 19:55:43 jimz
* Initial revision
*
*/
#ifndef _RF__RF_DEBUGPRINT_H_
#define _RF__RF_DEBUGPRINT_H_
int rf_ConfigureDebugPrint(RF_ShutdownList_t **listp);
void rf_clear_debug_print_buffer(void);
void rf_debug_printf(char *s, void *a1, void *a2, void *a3, void *a4,
void *a5, void *a6, void *a7, void *a8);
void rf_print_debug_buffer(void);
void rf_spill_debug_buffer(char *fname);
#endif /* !_RF__RF_DEBUGPRINT_H_ */

View File

@ -0,0 +1,846 @@
/* $NetBSD: rf_decluster.c,v 1.1 1998/11/13 04:20:28 oster Exp $ */
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
*
* Author: Mark Holland
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/*----------------------------------------------------------------------
*
* rf_decluster.c -- code related to the declustered layout
*
* Created 10-21-92 (MCH)
*
* Nov 93: adding support for distributed sparing. This code is a little
* complex: the basic layout used is as follows:
* let F = (v-1)/GCD(r,v-1). The spare space for each set of
* F consecutive fulltables is grouped together and placed after
* that set of tables.
* +------------------------------+
* | F fulltables |
* | Spare Space |
* | F fulltables |
* | Spare Space |
* | ... |
* +------------------------------+
*
*--------------------------------------------------------------------*/
/*
* :
* Log: rf_decluster.c,v
* Revision 1.51 1996/08/21 19:47:10 jimz
* fix bogus return values from config
*
* Revision 1.50 1996/08/20 22:41:42 jimz
* better diagnostics for bad blockdesigns
*
* Revision 1.49 1996/07/31 16:56:18 jimz
* dataBytesPerStripe, sectorsPerDisk init arch-indep.
*
* Revision 1.48 1996/07/29 14:05:12 jimz
* fix numPUs/numRUs confusion (everything is now numRUs)
* clean up some commenting, return values
*
* Revision 1.47 1996/07/27 23:36:08 jimz
* Solaris port of simulator
*
* Revision 1.46 1996/07/27 18:40:11 jimz
* cleanup sweep
*
* Revision 1.45 1996/07/18 22:57:14 jimz
* port simulator to AIX
*
* Revision 1.44 1996/07/13 00:00:59 jimz
* sanitized generalized reconstruction architecture
* cleaned up head sep, rbuf problems
*
* Revision 1.43 1996/06/19 17:53:48 jimz
* move GetNumSparePUs, InstallSpareTable ops into layout switch
*
* Revision 1.42 1996/06/17 03:23:48 jimz
* switch DeclusteredDS typing
*
* Revision 1.41 1996/06/11 08:55:15 jimz
* improved error-checking at configuration time
*
* Revision 1.40 1996/06/10 11:55:47 jimz
* Straightened out some per-array/not-per-array distinctions, fixed
* a couple bugs related to confusion. Added shutdown lists. Removed
* layout shutdown function (now subsumed by shutdown lists).
*
* Revision 1.39 1996/06/09 02:36:46 jimz
* lots of little crufty cleanup- fixup whitespace
* issues, comment #ifdefs, improve typing in some
* places (esp size-related)
*
* Revision 1.38 1996/06/07 22:26:27 jimz
* type-ify which_ru (RF_ReconUnitNum_t)
*
* Revision 1.37 1996/06/07 21:33:04 jimz
* begin using consistent types for sector numbers,
* stripe numbers, row+col numbers, recon unit numbers
*
* Revision 1.36 1996/06/03 23:28:26 jimz
* more bugfixes
* check in tree to sync for IPDS runs with current bugfixes
* there still may be a problem with threads in the script test
* getting I/Os stuck- not trivially reproducible (runs ~50 times
* in a row without getting stuck)
*
* Revision 1.35 1996/06/02 17:31:48 jimz
* Moved a lot of global stuff into array structure, where it belongs.
* Fixed up paritylogging, pss modules in this manner. Some general
* code cleanup. Removed lots of dead code, some dead files.
*
* Revision 1.34 1996/05/30 23:22:16 jimz
* bugfixes of serialization, timing problems
* more cleanup
*
* Revision 1.33 1996/05/30 11:29:41 jimz
* Numerous bug fixes. Stripe lock release code disagreed with the taking code
* about when stripes should be locked (I made it consistent: no parity, no lock)
* There was a lot of extra serialization of I/Os which I've removed- a lot of
* it was to calculate values for the cache code, which is no longer with us.
* More types, function, macro cleanup. Added code to properly quiesce the array
* on shutdown. Made a lot of stuff array-specific which was (bogusly) general
* before. Fixed memory allocation, freeing bugs.
*
* Revision 1.32 1996/05/27 18:56:37 jimz
* more code cleanup
* better typing
* compiles in all 3 environments
*
* Revision 1.31 1996/05/24 01:59:45 jimz
* another checkpoint in code cleanup for release
* time to sync kernel tree
*
* Revision 1.30 1996/05/23 00:33:23 jimz
* code cleanup: move all debug decls to rf_options.c, all extern
* debug decls to rf_options.h, all debug vars preceded by rf_
*
* Revision 1.29 1996/05/18 19:51:34 jimz
* major code cleanup- fix syntax, make some types consistent,
* add prototypes, clean out dead code, et cetera
*
* Revision 1.28 1995/12/12 18:10:06 jimz
* MIN -> RF_MIN, MAX -> RF_MAX, ASSERT -> RF_ASSERT
* fix 80-column brain damage in comments
*
* Revision 1.27 1995/12/01 16:00:08 root
* added copyright info
*
* Revision 1.26 1995/11/28 21:35:12 amiri
* set the RF_BD_DECLUSTERED flag
*
* Revision 1.25 1995/11/17 18:56:00 wvcii
* added prototyping to MapParity
*
* Revision 1.24 1995/07/04 22:25:33 holland
* increased default num bufs
*
* Revision 1.23 1995/07/03 20:23:51 holland
* changed floating recon bufs & head sep yet again
*
* Revision 1.22 1995/07/03 18:12:14 holland
* changed the way the number of floating recon bufs & the head sep
* limit are set
*
* Revision 1.21 1995/07/02 15:07:42 holland
* bug fixes related to getting distributed sparing numbers
*
* Revision 1.20 1995/06/23 13:41:28 robby
* updeated to prototypes in rf_layout.h
*
*/
#ifdef _KERNEL
#define KERNEL
#endif
#include "rf_types.h"
#include "rf_raid.h"
#include "rf_raidframe.h"
#include "rf_configure.h"
#include "rf_decluster.h"
#include "rf_debugMem.h"
#include "rf_utils.h"
#include "rf_alloclist.h"
#include "rf_general.h"
#include "rf_shutdown.h"
#include "rf_sys.h"
extern int rf_copyback_in_progress; /* debug only */
/* found in rf_kintf.c */
int rf_GetSpareTableFromDaemon(RF_SparetWait_t *req);
/* configuration code */
int rf_ConfigureDeclustered(
RF_ShutdownList_t **listp,
RF_Raid_t *raidPtr,
RF_Config_t *cfgPtr)
{
RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
int b, v, k, r, lambda; /* block design params */
int i, j;
RF_RowCol_t *first_avail_slot;
RF_StripeCount_t complete_FT_count, numCompleteFullTablesPerDisk;
RF_DeclusteredConfigInfo_t *info;
RF_StripeCount_t PUsPerDisk, spareRegionDepthInPUs, numCompleteSpareRegionsPerDisk, extraPUsPerDisk;
RF_StripeCount_t totSparePUsPerDisk;
RF_SectorNum_t diskOffsetOfLastFullTableInSUs;
RF_SectorCount_t SpareSpaceInSUs;
char *cfgBuf = (char *) (cfgPtr->layoutSpecific);
RF_StripeNum_t l, SUID;
SUID = l = 0;
numCompleteSpareRegionsPerDisk = 0;
/* 1. create layout specific structure */
RF_MallocAndAdd(info, sizeof(RF_DeclusteredConfigInfo_t), (RF_DeclusteredConfigInfo_t *), raidPtr->cleanupList);
if (info == NULL)
return(ENOMEM);
layoutPtr->layoutSpecificInfo = (void *) info;
info->SpareTable = NULL;
/* 2. extract parameters from the config structure */
if (layoutPtr->map->flags & RF_DISTRIBUTE_SPARE) {
(void) bcopy(cfgBuf, info->sparemap_fname, RF_SPAREMAP_NAME_LEN);
}
cfgBuf += RF_SPAREMAP_NAME_LEN;
b = *( (int *) cfgBuf); cfgBuf += sizeof(int);
v = *( (int *) cfgBuf); cfgBuf += sizeof(int);
k = *( (int *) cfgBuf); cfgBuf += sizeof(int);
r = *( (int *) cfgBuf); cfgBuf += sizeof(int);
lambda = *( (int *) cfgBuf); cfgBuf += sizeof(int);
raidPtr->noRotate = *( (int *) cfgBuf); cfgBuf += sizeof(int);
/* the sparemaps are generated assuming that parity is rotated, so we issue
* a warning if both distributed sparing and no-rotate are on at the same time
*/
if ((layoutPtr->map->flags & RF_DISTRIBUTE_SPARE) && raidPtr->noRotate) {
RF_ERRORMSG("Warning: distributed sparing specified without parity rotation.\n");
}
if (raidPtr->numCol != v) {
RF_ERRORMSG2("RAID: config error: table element count (%d) not equal to no. of cols (%d)\n", v, raidPtr->numCol);
return(EINVAL);
}
/* 3. set up the values used in the mapping code */
info->BlocksPerTable = b;
info->Lambda = lambda;
info->NumParityReps = info->groupSize = k;
info->SUsPerTable = b * (k-1) * layoutPtr->SUsPerPU;/* b blks, k-1 SUs each */
info->SUsPerFullTable = k * info->SUsPerTable; /* rot k times */
info->PUsPerBlock = k-1;
info->SUsPerBlock = info->PUsPerBlock * layoutPtr->SUsPerPU;
info->TableDepthInPUs = (b*k) / v;
info->FullTableDepthInPUs = info->TableDepthInPUs * k; /* k repetitions */
/* used only in distributed sparing case */
info->FullTablesPerSpareRegion = (v-1) / rf_gcd(r, v-1); /* (v-1)/gcd fulltables */
info->TablesPerSpareRegion = k * info->FullTablesPerSpareRegion;
info->SpareSpaceDepthPerRegionInSUs = (r * info->TablesPerSpareRegion / (v-1)) * layoutPtr->SUsPerPU;
/* check to make sure the block design is sufficiently small */
if ((raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) {
if (info->FullTableDepthInPUs * layoutPtr->SUsPerPU + info->SpareSpaceDepthPerRegionInSUs > layoutPtr->stripeUnitsPerDisk) {
RF_ERRORMSG3("RAID: config error: Full Table depth (%d) + Spare Space (%d) larger than disk size (%d) (BD too big)\n",
(int)info->FullTableDepthInPUs,
(int)info->SpareSpaceDepthPerRegionInSUs,
(int)layoutPtr->stripeUnitsPerDisk);
return(EINVAL);
}
} else {
if (info->TableDepthInPUs * layoutPtr->SUsPerPU > layoutPtr->stripeUnitsPerDisk) {
RF_ERRORMSG2("RAID: config error: Table depth (%d) larger than disk size (%d) (BD too big)\n",
(int)(info->TableDepthInPUs * layoutPtr->SUsPerPU), \
(int)layoutPtr->stripeUnitsPerDisk);
return(EINVAL);
}
}
/* compute the size of each disk, and the number of tables in the last fulltable (which
* need not be complete)
*/
if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) {
PUsPerDisk = layoutPtr->stripeUnitsPerDisk / layoutPtr->SUsPerPU;
spareRegionDepthInPUs = (info->TablesPerSpareRegion * info->TableDepthInPUs +
(info->TablesPerSpareRegion * info->TableDepthInPUs) / (v-1));
info->SpareRegionDepthInSUs = spareRegionDepthInPUs * layoutPtr->SUsPerPU;
numCompleteSpareRegionsPerDisk = PUsPerDisk / spareRegionDepthInPUs;
info->NumCompleteSRs = numCompleteSpareRegionsPerDisk;
extraPUsPerDisk = PUsPerDisk % spareRegionDepthInPUs;
/* assume conservatively that we need the full amount of spare space in one region in order
* to provide spares for the partial spare region at the end of the array. We set "i" to
* the number of tables in the partial spare region. This may actually include some fulltables.
*/
extraPUsPerDisk -= (info->SpareSpaceDepthPerRegionInSUs / layoutPtr->SUsPerPU);
if (extraPUsPerDisk <= 0) i = 0;
else i = extraPUsPerDisk/info->TableDepthInPUs;
complete_FT_count = raidPtr->numRow * (numCompleteSpareRegionsPerDisk * (info->TablesPerSpareRegion/k) + i/k);
info->FullTableLimitSUID = complete_FT_count * info->SUsPerFullTable;
info->ExtraTablesPerDisk = i % k;
/* note that in the last spare region, the spare space is complete even though data/parity space is not */
totSparePUsPerDisk = (numCompleteSpareRegionsPerDisk+1) * (info->SpareSpaceDepthPerRegionInSUs / layoutPtr->SUsPerPU);
info->TotSparePUsPerDisk = totSparePUsPerDisk;
layoutPtr->stripeUnitsPerDisk =
((complete_FT_count/raidPtr->numRow) * info->FullTableDepthInPUs + /* data & parity space */
info->ExtraTablesPerDisk * info->TableDepthInPUs +
totSparePUsPerDisk /* spare space */
) * layoutPtr->SUsPerPU;
layoutPtr->dataStripeUnitsPerDisk =
(complete_FT_count * info->FullTableDepthInPUs + info->ExtraTablesPerDisk * info->TableDepthInPUs)
* layoutPtr->SUsPerPU * (k-1) / k;
} else {
/* non-dist spare case: force each disk to contain an integral number of tables */
layoutPtr->stripeUnitsPerDisk /= (info->TableDepthInPUs * layoutPtr->SUsPerPU);
layoutPtr->stripeUnitsPerDisk *= (info->TableDepthInPUs * layoutPtr->SUsPerPU);
/* compute the number of tables in the last fulltable, which need not be complete */
complete_FT_count =
((layoutPtr->stripeUnitsPerDisk/layoutPtr->SUsPerPU) / info->FullTableDepthInPUs) * raidPtr->numRow;
info->FullTableLimitSUID = complete_FT_count * info->SUsPerFullTable;
info->ExtraTablesPerDisk =
((layoutPtr->stripeUnitsPerDisk/layoutPtr->SUsPerPU) / info->TableDepthInPUs) % k;
}
raidPtr->sectorsPerDisk = layoutPtr->stripeUnitsPerDisk * layoutPtr->sectorsPerStripeUnit;
/* find the disk offset of the stripe unit where the last fulltable starts */
numCompleteFullTablesPerDisk = complete_FT_count / raidPtr->numRow;
diskOffsetOfLastFullTableInSUs = numCompleteFullTablesPerDisk * info->FullTableDepthInPUs * layoutPtr->SUsPerPU;
if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) {
SpareSpaceInSUs = numCompleteSpareRegionsPerDisk * info->SpareSpaceDepthPerRegionInSUs;
diskOffsetOfLastFullTableInSUs += SpareSpaceInSUs;
info->DiskOffsetOfLastSpareSpaceChunkInSUs =
diskOffsetOfLastFullTableInSUs + info->ExtraTablesPerDisk * info->TableDepthInPUs * layoutPtr->SUsPerPU;
}
info->DiskOffsetOfLastFullTableInSUs = diskOffsetOfLastFullTableInSUs;
info->numCompleteFullTablesPerDisk = numCompleteFullTablesPerDisk;
/* 4. create and initialize the lookup tables */
info->LayoutTable = rf_make_2d_array(b, k, raidPtr->cleanupList);
if (info->LayoutTable == NULL)
return(ENOMEM);
info->OffsetTable = rf_make_2d_array(b, k, raidPtr->cleanupList);
if (info->OffsetTable == NULL)
return(ENOMEM);
info->BlockTable = rf_make_2d_array(info->TableDepthInPUs*layoutPtr->SUsPerPU, raidPtr->numCol, raidPtr->cleanupList);
if (info->BlockTable == NULL)
return(ENOMEM);
first_avail_slot = rf_make_1d_array(v, NULL);
if (first_avail_slot == NULL)
return(ENOMEM);
for (i=0; i<b; i++)
for (j=0; j<k; j++)
info->LayoutTable[i][j] = *cfgBuf++;
/* initialize offset table */
for (i=0; i<b; i++) for (j=0; j<k; j++) {
info->OffsetTable[i][j] = first_avail_slot[ info->LayoutTable[i][j] ];
first_avail_slot[ info->LayoutTable[i][j] ]++;
}
/* initialize block table */
for (SUID=l=0; l<layoutPtr->SUsPerPU; l++) {
for (i=0; i<b; i++) {
for (j=0; j<k; j++) {
info->BlockTable[ (info->OffsetTable[i][j] * layoutPtr->SUsPerPU) + l ]
[ info->LayoutTable[i][j] ] = SUID;
}
SUID++;
}
}
rf_free_1d_array(first_avail_slot, v);
/* 5. set up the remaining redundant-but-useful parameters */
raidPtr->totalSectors = (k*complete_FT_count + raidPtr->numRow*info->ExtraTablesPerDisk) *
info->SUsPerTable * layoutPtr->sectorsPerStripeUnit;
layoutPtr->numStripe = (raidPtr->totalSectors / layoutPtr->sectorsPerStripeUnit) / (k-1);
/* strange evaluation order below to try and minimize overflow problems */
layoutPtr->dataSectorsPerStripe = (k-1) * layoutPtr->sectorsPerStripeUnit;
layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << raidPtr->logBytesPerSector;
layoutPtr->numDataCol = k-1;
layoutPtr->numParityCol = 1;
return(0);
}
/* declustering with distributed sparing */
static void rf_ShutdownDeclusteredDS(RF_ThreadArg_t);
static void rf_ShutdownDeclusteredDS(arg)
RF_ThreadArg_t arg;
{
RF_DeclusteredConfigInfo_t *info;
RF_Raid_t *raidPtr;
raidPtr = (RF_Raid_t *)arg;
info = (RF_DeclusteredConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
if (info->SpareTable)
rf_FreeSpareTable(raidPtr);
}
int rf_ConfigureDeclusteredDS(
RF_ShutdownList_t **listp,
RF_Raid_t *raidPtr,
RF_Config_t *cfgPtr)
{
int rc;
rc = rf_ConfigureDeclustered(listp, raidPtr, cfgPtr);
if (rc)
return(rc);
rc = rf_ShutdownCreate(listp, rf_ShutdownDeclusteredDS, raidPtr);
if (rc) {
RF_ERRORMSG1("Got %d adding shutdown event for DeclusteredDS\n", rc);
rf_ShutdownDeclusteredDS(raidPtr);
return(rc);
}
return(0);
}
void rf_MapSectorDeclustered(raidPtr, raidSector, row, col, diskSector, remap)
RF_Raid_t *raidPtr;
RF_RaidAddr_t raidSector;
RF_RowCol_t *row;
RF_RowCol_t *col;
RF_SectorNum_t *diskSector;
int remap;
{
RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo;
RF_StripeNum_t SUID = raidSector / layoutPtr->sectorsPerStripeUnit;
RF_StripeNum_t FullTableID, FullTableOffset, TableID, TableOffset;
RF_StripeNum_t BlockID, BlockOffset, RepIndex;
RF_StripeCount_t sus_per_fulltable = info->SUsPerFullTable;
RF_StripeCount_t fulltable_depth = info->FullTableDepthInPUs * layoutPtr->SUsPerPU;
RF_StripeNum_t base_suid = 0, outSU, SpareRegion=0, SpareSpace=0;
rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable, &fulltable_depth, &base_suid);
FullTableID = SUID / sus_per_fulltable; /* fulltable ID within array (across rows) */
if (raidPtr->numRow == 1) *row = 0; /* avoid a mod and a div in the common case */
else {
*row = FullTableID % raidPtr->numRow;
FullTableID /= raidPtr->numRow; /* convert to fulltable ID on this disk */
}
if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) {
SpareRegion = FullTableID / info->FullTablesPerSpareRegion;
SpareSpace = SpareRegion * info->SpareSpaceDepthPerRegionInSUs;
}
FullTableOffset = SUID % sus_per_fulltable;
TableID = FullTableOffset / info->SUsPerTable;
TableOffset = FullTableOffset - TableID * info->SUsPerTable;
BlockID = TableOffset / info->PUsPerBlock;
BlockOffset = TableOffset - BlockID * info->PUsPerBlock;
BlockID %= info->BlocksPerTable;
RepIndex = info->PUsPerBlock - TableID;
if (!raidPtr->noRotate) BlockOffset += ((BlockOffset >= RepIndex) ? 1 : 0);
*col = info->LayoutTable[BlockID][BlockOffset];
/* remap to distributed spare space if indicated */
if (remap) {
RF_ASSERT( raidPtr->Disks[*row][*col].status == rf_ds_reconstructing || raidPtr->Disks[*row][*col].status == rf_ds_dist_spared ||
(rf_copyback_in_progress && raidPtr->Disks[*row][*col].status == rf_ds_optimal));
rf_remap_to_spare_space(layoutPtr, info, *row, FullTableID, TableID, BlockID, (base_suid) ? 1 : 0, SpareRegion, col, &outSU);
} else {
outSU = base_suid;
outSU += FullTableID * fulltable_depth; /* offs to strt of FT */
outSU += SpareSpace; /* skip rsvd spare space */
outSU += TableID * info->TableDepthInPUs * layoutPtr->SUsPerPU; /* offs to strt of tble */
outSU += info->OffsetTable[BlockID][BlockOffset] * layoutPtr->SUsPerPU; /* offs to the PU */
}
outSU += TableOffset / (info->BlocksPerTable * info->PUsPerBlock); /* offs to the SU within a PU */
/* convert SUs to sectors, and, if not aligned to SU boundary, add in offset to sector. */
*diskSector = outSU*layoutPtr->sectorsPerStripeUnit + (raidSector % layoutPtr->sectorsPerStripeUnit);
RF_ASSERT( *col != -1 );
}
/* prototyping this inexplicably causes the compile of the layout table (rf_layout.c) to fail */
void rf_MapParityDeclustered(
RF_Raid_t *raidPtr,
RF_RaidAddr_t raidSector,
RF_RowCol_t *row,
RF_RowCol_t *col,
RF_SectorNum_t *diskSector,
int remap)
{
RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo;
RF_StripeNum_t SUID = raidSector / layoutPtr->sectorsPerStripeUnit;
RF_StripeNum_t FullTableID, FullTableOffset, TableID, TableOffset;
RF_StripeNum_t BlockID, BlockOffset, RepIndex;
RF_StripeCount_t sus_per_fulltable = info->SUsPerFullTable;
RF_StripeCount_t fulltable_depth = info->FullTableDepthInPUs * layoutPtr->SUsPerPU;
RF_StripeNum_t base_suid = 0, outSU, SpareRegion=0, SpareSpace=0;
rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable, &fulltable_depth, &base_suid);
/* compute row & (possibly) spare space exactly as before */
FullTableID = SUID / sus_per_fulltable;
if (raidPtr->numRow == 1) *row = 0; /* avoid a mod and a div in the common case */
else {
*row = FullTableID % raidPtr->numRow;
FullTableID /= raidPtr->numRow; /* convert to fulltable ID on this disk */
}
if ((raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) {
SpareRegion = FullTableID / info->FullTablesPerSpareRegion;
SpareSpace = SpareRegion * info->SpareSpaceDepthPerRegionInSUs;
}
/* compute BlockID and RepIndex exactly as before */
FullTableOffset = SUID % sus_per_fulltable;
TableID = FullTableOffset / info->SUsPerTable;
TableOffset = FullTableOffset - TableID * info->SUsPerTable;
/*TableOffset = FullTableOffset % info->SUsPerTable;*/
/*BlockID = (TableOffset / info->PUsPerBlock) % info->BlocksPerTable;*/
BlockID = TableOffset / info->PUsPerBlock;
/*BlockOffset = TableOffset % info->PUsPerBlock;*/
BlockOffset = TableOffset - BlockID * info->PUsPerBlock;
BlockID %= info->BlocksPerTable;
/* the parity block is in the position indicated by RepIndex */
RepIndex = (raidPtr->noRotate) ? info->PUsPerBlock : info->PUsPerBlock - TableID;
*col = info->LayoutTable[BlockID][RepIndex];
if (remap) {
RF_ASSERT( raidPtr->Disks[*row][*col].status == rf_ds_reconstructing || raidPtr->Disks[*row][*col].status == rf_ds_dist_spared ||
(rf_copyback_in_progress && raidPtr->Disks[*row][*col].status == rf_ds_optimal));
rf_remap_to_spare_space(layoutPtr, info, *row, FullTableID, TableID, BlockID, (base_suid) ? 1 : 0, SpareRegion, col, &outSU);
} else {
/* compute sector as before, except use RepIndex instead of BlockOffset */
outSU = base_suid;
outSU += FullTableID * fulltable_depth;
outSU += SpareSpace; /* skip rsvd spare space */
outSU += TableID * info->TableDepthInPUs * layoutPtr->SUsPerPU;
outSU += info->OffsetTable[BlockID][RepIndex] * layoutPtr->SUsPerPU;
}
outSU += TableOffset / (info->BlocksPerTable * info->PUsPerBlock);
*diskSector = outSU*layoutPtr->sectorsPerStripeUnit + (raidSector % layoutPtr->sectorsPerStripeUnit);
RF_ASSERT( *col != -1 );
}
/* returns an array of ints identifying the disks that comprise the stripe containing the indicated address.
* the caller must _never_ attempt to modify this array.
*/
void rf_IdentifyStripeDeclustered(
RF_Raid_t *raidPtr,
RF_RaidAddr_t addr,
RF_RowCol_t **diskids,
RF_RowCol_t *outRow)
{
RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo;
RF_StripeCount_t sus_per_fulltable = info->SUsPerFullTable;
RF_StripeCount_t fulltable_depth = info->FullTableDepthInPUs * layoutPtr->SUsPerPU;
RF_StripeNum_t base_suid = 0;
RF_StripeNum_t SUID = rf_RaidAddressToStripeUnitID(layoutPtr, addr);
RF_StripeNum_t stripeID, FullTableID;
int tableOffset;
rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable, &fulltable_depth, &base_suid);
FullTableID = SUID / sus_per_fulltable; /* fulltable ID within array (across rows) */
*outRow = FullTableID % raidPtr->numRow;
stripeID = rf_StripeUnitIDToStripeID(layoutPtr, SUID); /* find stripe offset into array */
tableOffset = (stripeID % info->BlocksPerTable); /* find offset into block design table */
*diskids = info->LayoutTable[tableOffset];
}
/* This returns the default head-separation limit, which is measured
* in "required units for reconstruction". Each time a disk fetches
* a unit, it bumps a counter. The head-sep code prohibits any disk
* from getting more than headSepLimit counter values ahead of any
* other.
*
* We assume here that the number of floating recon buffers is already
* set. There are r stripes to be reconstructed in each table, and so
* if we have a total of B buffers, we can have at most B/r tables
* under recon at any one time. In each table, lambda units are required
* from each disk, so given B buffers, the head sep limit has to be
* (lambda*B)/r units. We subtract one to avoid weird boundary cases.
*
* for example, suppose were given 50 buffers, r=19, and lambda=4 as in
* the 20.5 design. There are 19 stripes/table to be reconstructed, so
* we can have 50/19 tables concurrently under reconstruction, which means
* we can allow the fastest disk to get 50/19 tables ahead of the slower
* disk. There are lambda "required units" for each disk, so the fastest
* disk can get 4*50/19 = 10 counter values ahead of the slowest.
*
* If numBufsToAccumulate is not 1, we need to limit the head sep further
* because multiple bufs will be required for each stripe under recon.
*/
RF_HeadSepLimit_t rf_GetDefaultHeadSepLimitDeclustered(
RF_Raid_t *raidPtr)
{
RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
return(info->Lambda * raidPtr->numFloatingReconBufs / info->TableDepthInPUs / rf_numBufsToAccumulate);
}
/* returns the default number of recon buffers to use. The value
* is somewhat arbitrary...it's intended to be large enough to allow
* for a reasonably large head-sep limit, but small enough that you
* don't use up all your system memory with buffers.
*/
int rf_GetDefaultNumFloatingReconBuffersDeclustered(RF_Raid_t *raidPtr)
{
return(100 * rf_numBufsToAccumulate);
}
/* sectors in the last fulltable of the array need to be handled
* specially since this fulltable can be incomplete. this function
* changes the values of certain params to handle this.
*
* the idea here is that MapSector et. al. figure out which disk the
* addressed unit lives on by computing the modulos of the unit number
* with the number of units per fulltable, table, etc. In the last
* fulltable, there are fewer units per fulltable, so we need to adjust
* the number of user data units per fulltable to reflect this.
*
* so, we (1) convert the fulltable size and depth parameters to
* the size of the partial fulltable at the end, (2) compute the
* disk sector offset where this fulltable starts, and (3) convert
* the users stripe unit number from an offset into the array to
* an offset into the last fulltable.
*/
void rf_decluster_adjust_params(
RF_RaidLayout_t *layoutPtr,
RF_StripeNum_t *SUID,
RF_StripeCount_t *sus_per_fulltable,
RF_StripeCount_t *fulltable_depth,
RF_StripeNum_t *base_suid)
{
RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo;
#if defined(__NetBSD__) && defined(_KERNEL)
/* Nothing! */
#else
char pc = layoutPtr->map->parityConfig;
#endif
if (*SUID >= info->FullTableLimitSUID) {
/* new full table size is size of last full table on disk */
*sus_per_fulltable = info->ExtraTablesPerDisk * info->SUsPerTable;
/* new full table depth is corresponding depth */
*fulltable_depth = info->ExtraTablesPerDisk * info->TableDepthInPUs * layoutPtr->SUsPerPU;
/* set up the new base offset */
*base_suid = info->DiskOffsetOfLastFullTableInSUs;
/* convert users array address to an offset into the last fulltable */
*SUID -= info->FullTableLimitSUID;
}
}
/*
* map a stripe ID to a parity stripe ID.
* See comment above RaidAddressToParityStripeID in layout.c.
*/
void rf_MapSIDToPSIDDeclustered(
RF_RaidLayout_t *layoutPtr,
RF_StripeNum_t stripeID,
RF_StripeNum_t *psID,
RF_ReconUnitNum_t *which_ru)
{
RF_DeclusteredConfigInfo_t *info;
info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo;
*psID = (stripeID / (layoutPtr->SUsPerPU * info->BlocksPerTable))
* info->BlocksPerTable + (stripeID % info->BlocksPerTable);
*which_ru = (stripeID % (info->BlocksPerTable * layoutPtr->SUsPerPU))
/ info->BlocksPerTable;
RF_ASSERT( (*which_ru) < layoutPtr->SUsPerPU/layoutPtr->SUsPerRU);
}
/*
* Called from MapSector and MapParity to retarget an access at the spare unit.
* Modifies the "col" and "outSU" parameters only.
*/
void rf_remap_to_spare_space(
RF_RaidLayout_t *layoutPtr,
RF_DeclusteredConfigInfo_t *info,
RF_RowCol_t row,
RF_StripeNum_t FullTableID,
RF_StripeNum_t TableID,
RF_SectorNum_t BlockID,
RF_StripeNum_t base_suid,
RF_StripeNum_t SpareRegion,
RF_RowCol_t *outCol,
RF_StripeNum_t *outSU)
{
RF_StripeNum_t ftID, spareTableStartSU, TableInSpareRegion, lastSROffset, which_ft;
/*
* note that FullTableID and hence SpareRegion may have gotten
* tweaked by rf_decluster_adjust_params. We detect this by
* noticing that base_suid is not 0.
*/
if (base_suid == 0) {
ftID = FullTableID;
}
else {
/*
* There may be > 1.0 full tables in the last (i.e. partial)
* spare region. find out which of these we're in.
*/
lastSROffset = info->NumCompleteSRs * info->SpareRegionDepthInSUs;
which_ft = (info->DiskOffsetOfLastFullTableInSUs - lastSROffset) / (info->FullTableDepthInPUs * layoutPtr->SUsPerPU);
/* compute the actual full table ID */
ftID = info->DiskOffsetOfLastFullTableInSUs / (info->FullTableDepthInPUs * layoutPtr->SUsPerPU) + which_ft;
SpareRegion = info->NumCompleteSRs;
}
TableInSpareRegion = (ftID * info->NumParityReps + TableID) % info->TablesPerSpareRegion;
*outCol = info->SpareTable[TableInSpareRegion][BlockID].spareDisk;
RF_ASSERT( *outCol != -1);
spareTableStartSU = (SpareRegion == info->NumCompleteSRs) ?
info->DiskOffsetOfLastFullTableInSUs + info->ExtraTablesPerDisk * info->TableDepthInPUs * layoutPtr->SUsPerPU :
(SpareRegion+1) * info->SpareRegionDepthInSUs - info->SpareSpaceDepthPerRegionInSUs;
*outSU = spareTableStartSU + info->SpareTable[TableInSpareRegion][BlockID].spareBlockOffsetInSUs;
if (*outSU >= layoutPtr->stripeUnitsPerDisk) {
printf("rf_remap_to_spare_space: invalid remapped disk SU offset %ld\n",(long)*outSU);
}
}
int rf_InstallSpareTable(
RF_Raid_t *raidPtr,
RF_RowCol_t frow,
RF_RowCol_t fcol)
{
RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
RF_SparetWait_t *req;
int retcode;
RF_Malloc(req, sizeof(*req), (RF_SparetWait_t *));
req->C = raidPtr->numCol;
req->G = raidPtr->Layout.numDataCol + raidPtr->Layout.numParityCol;
req->fcol = fcol;
req->SUsPerPU = raidPtr->Layout.SUsPerPU;
req->TablesPerSpareRegion = info->TablesPerSpareRegion;
req->BlocksPerTable = info->BlocksPerTable;
req->TableDepthInPUs = info->TableDepthInPUs;
req->SpareSpaceDepthPerRegionInSUs = info->SpareSpaceDepthPerRegionInSUs;
#ifndef KERNEL
info->SpareTable = rf_ReadSpareTable(req, info->sparemap_fname);
RF_Free(req, sizeof(*req));
retcode = (info->SpareTable) ? 0 : 1;
#else /* !KERNEL */
retcode = rf_GetSpareTableFromDaemon(req);
RF_ASSERT(!retcode); /* XXX -- fix this to recover gracefully -- XXX */
#endif /* !KERNEL */
return(retcode);
}
#ifdef KERNEL
/*
* Invoked via ioctl to install a spare table in the kernel.
*/
int rf_SetSpareTable(raidPtr, data)
RF_Raid_t *raidPtr;
void *data;
{
RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
RF_SpareTableEntry_t **ptrs;
int i, retcode;
/* what we need to copyin is a 2-d array, so first copyin the user pointers to the rows in the table */
RF_Malloc(ptrs, info->TablesPerSpareRegion * sizeof(RF_SpareTableEntry_t *), (RF_SpareTableEntry_t **));
retcode = copyin((caddr_t) data, (caddr_t) ptrs, info->TablesPerSpareRegion * sizeof(RF_SpareTableEntry_t *));
if (retcode) return(retcode);
/* now allocate kernel space for the row pointers */
RF_Malloc(info->SpareTable, info->TablesPerSpareRegion * sizeof(RF_SpareTableEntry_t *), (RF_SpareTableEntry_t **));
/* now allocate kernel space for each row in the table, and copy it in from user space */
for (i=0; i<info->TablesPerSpareRegion; i++) {
RF_Malloc(info->SpareTable[i], info->BlocksPerTable * sizeof(RF_SpareTableEntry_t), (RF_SpareTableEntry_t *));
retcode = copyin(ptrs[i], info->SpareTable[i], info->BlocksPerTable * sizeof(RF_SpareTableEntry_t));
if (retcode) {
info->SpareTable = NULL; /* blow off the memory we've allocated */
return(retcode);
}
}
/* free up the temporary array we used */
RF_Free(ptrs, info->TablesPerSpareRegion * sizeof(RF_SpareTableEntry_t *));
return(0);
}
#endif /* KERNEL */
RF_ReconUnitCount_t rf_GetNumSpareRUsDeclustered(raidPtr)
RF_Raid_t *raidPtr;
{
RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
return( ((RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo)->TotSparePUsPerDisk );
}
void rf_FreeSpareTable(raidPtr)
RF_Raid_t *raidPtr;
{
long i;
RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo;
RF_SpareTableEntry_t **table = info->SpareTable;
for (i=0; i<info->TablesPerSpareRegion; i++) {RF_Free(table[i], info->BlocksPerTable * sizeof(RF_SpareTableEntry_t));}
RF_Free(table, info->TablesPerSpareRegion * sizeof(RF_SpareTableEntry_t *));
info->SpareTable = (RF_SpareTableEntry_t **) NULL;
}

View File

@ -0,0 +1,181 @@
/* $NetBSD: rf_decluster.h,v 1.1 1998/11/13 04:20:28 oster Exp $ */
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
*
* Author: Mark Holland
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/*----------------------------------------------------------------------
*
* decluster.h -- header file for declustered layout code
*
* Adapted from raidSim version July 1994
* Created 10-21-92 (MCH)
*
*--------------------------------------------------------------------*/
/*
* :
* Log: rf_decluster.h,v
* Revision 1.20 1996/07/29 14:05:12 jimz
* fix numPUs/numRUs confusion (everything is now numRUs)
* clean up some commenting, return values
*
* Revision 1.19 1996/07/13 00:00:59 jimz
* sanitized generalized reconstruction architecture
* cleaned up head sep, rbuf problems
*
* Revision 1.18 1996/06/19 17:53:48 jimz
* move GetNumSparePUs, InstallSpareTable ops into layout switch
*
* Revision 1.17 1996/06/10 11:55:47 jimz
* Straightened out some per-array/not-per-array distinctions, fixed
* a couple bugs related to confusion. Added shutdown lists. Removed
* layout shutdown function (now subsumed by shutdown lists).
*
* Revision 1.16 1996/06/09 02:36:46 jimz
* lots of little crufty cleanup- fixup whitespace
* issues, comment #ifdefs, improve typing in some
* places (esp size-related)
*
* Revision 1.15 1996/06/07 22:26:27 jimz
* type-ify which_ru (RF_ReconUnitNum_t)
*
* Revision 1.14 1996/06/07 21:33:04 jimz
* begin using consistent types for sector numbers,
* stripe numbers, row+col numbers, recon unit numbers
*
* Revision 1.13 1996/05/27 18:56:37 jimz
* more code cleanup
* better typing
* compiles in all 3 environments
*
* Revision 1.12 1996/05/24 01:59:45 jimz
* another checkpoint in code cleanup for release
* time to sync kernel tree
*
* Revision 1.11 1996/05/23 21:46:35 jimz
* checkpoint in code cleanup (release prep)
* lots of types, function names have been fixed
*
* Revision 1.10 1996/05/18 19:51:34 jimz
* major code cleanup- fix syntax, make some types consistent,
* add prototypes, clean out dead code, et cetera
*
* Revision 1.9 1995/12/01 15:58:23 root
* added copyright info
*
* Revision 1.8 1995/11/17 18:57:02 wvcii
* added prototyping to MapParity
*
* Revision 1.7 1995/07/02 15:08:31 holland
* bug fixes related to getting distributed sparing numbers
*
* Revision 1.6 1995/06/23 13:41:18 robby
* updeated to prototypes in rf_layout.h
*
*/
#ifndef _RF__RF_DECLUSTER_H_
#define _RF__RF_DECLUSTER_H_
#include "rf_types.h"
/*
* These structures define the tables used to locate the spare unit
* associated with a particular data or parity unit, and to perform
* the associated inverse mapping.
*/
struct RF_SpareTableEntry_s {
u_int spareDisk; /* disk to which this block is spared */
u_int spareBlockOffsetInSUs; /* offset into spare table for that disk */
};
#define RF_SPAREMAP_NAME_LEN 128
/* this is the layout-specific info structure for the declustered layout.
*/
struct RF_DeclusteredConfigInfo_s {
RF_StripeCount_t groupSize; /* no. of stripe units per parity stripe */
RF_RowCol_t **LayoutTable; /* the block design table */
RF_RowCol_t **OffsetTable; /* the sector offset table */
RF_RowCol_t **BlockTable; /* the block membership table */
RF_StripeCount_t SUsPerFullTable; /* stripe units per full table */
RF_StripeCount_t SUsPerTable; /* stripe units per table */
RF_StripeCount_t PUsPerBlock; /* parity units per block */
RF_StripeCount_t SUsPerBlock; /* stripe units per block */
RF_StripeCount_t BlocksPerTable; /* block design tuples per table */
RF_StripeCount_t NumParityReps; /* tables per full table */
RF_StripeCount_t TableDepthInPUs; /* PUs on one disk in 1 table */
RF_StripeCount_t FullTableDepthInPUs; /* PUs on one disk in 1 fulltable */
RF_StripeCount_t FullTableLimitSUID; /* SU where partial fulltables start */
RF_StripeCount_t ExtraTablesPerDisk; /* # of tables in last fulltable */
RF_SectorNum_t DiskOffsetOfLastFullTableInSUs; /* disk offs of partial ft, if any */
RF_StripeCount_t numCompleteFullTablesPerDisk; /* ft identifier of partial ft, if any */
u_int Lambda; /* the pair count in the block design */
/* these are used only in the distributed-sparing case */
RF_StripeCount_t FullTablesPerSpareRegion; /* # of ft's comprising 1 spare region */
RF_StripeCount_t TablesPerSpareRegion; /* # of tables */
RF_SectorCount_t SpareSpaceDepthPerRegionInSUs; /* spare space/disk/region */
RF_SectorCount_t SpareRegionDepthInSUs; /* # of units/disk/region */
RF_SectorNum_t DiskOffsetOfLastSpareSpaceChunkInSUs; /* locates sp space after partial ft */
RF_StripeCount_t TotSparePUsPerDisk; /* total number of spare PUs per disk */
RF_StripeCount_t NumCompleteSRs;
RF_SpareTableEntry_t **SpareTable; /* remap table for spare space */
char sparemap_fname[RF_SPAREMAP_NAME_LEN]; /* where to find sparemap. not used in kernel */
};
int rf_ConfigureDeclustered(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
RF_Config_t *cfgPtr);
int rf_ConfigureDeclusteredDS(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
RF_Config_t *cfgPtr);
void rf_MapSectorDeclustered(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector,
RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap);
void rf_MapParityDeclustered(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector,
RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap);
void rf_IdentifyStripeDeclustered(RF_Raid_t *raidPtr, RF_RaidAddr_t addr,
RF_RowCol_t **diskids, RF_RowCol_t *outRow);
void rf_MapSIDToPSIDDeclustered(RF_RaidLayout_t *layoutPtr,
RF_StripeNum_t stripeID, RF_StripeNum_t *psID,
RF_ReconUnitNum_t *which_ru);
int rf_InstallSpareTable(RF_Raid_t *raidPtr, RF_RowCol_t frow, RF_RowCol_t fcol);
void rf_FreeSpareTable(RF_Raid_t *raidPtr);
RF_HeadSepLimit_t rf_GetDefaultHeadSepLimitDeclustered(RF_Raid_t *raidPtr);
int rf_GetDefaultNumFloatingReconBuffersDeclustered(RF_Raid_t *raidPtr);
void rf_decluster_adjust_params(RF_RaidLayout_t *layoutPtr,
RF_StripeNum_t *SUID, RF_StripeCount_t *sus_per_fulltable,
RF_StripeCount_t *fulltable_depth, RF_StripeNum_t *base_suid);
void rf_remap_to_spare_space(
RF_RaidLayout_t *layoutPtr,
RF_DeclusteredConfigInfo_t *info, RF_RowCol_t row, RF_StripeNum_t FullTableID,
RF_StripeNum_t TableID, RF_SectorNum_t BlockID, RF_StripeNum_t base_suid,
RF_StripeNum_t SpareRegion, RF_RowCol_t *outCol, RF_StripeNum_t *outSU);
int rf_SetSpareTable(RF_Raid_t *raidPtr, void *data);
RF_ReconUnitCount_t rf_GetNumSpareRUsDeclustered(RF_Raid_t *raidPtr);
#endif /* !_RF__RF_DECLUSTER_H_ */

View File

@ -0,0 +1,588 @@
/* $NetBSD: rf_declusterPQ.c,v 1.1 1998/11/13 04:20:28 oster Exp $ */
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
*
* Authors: Daniel Stodolsky, Mark Holland, Jim Zelenka
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/*--------------------------------------------------
* rf_declusterPQ.c
*
* mapping code for declustered P & Q or declustered EvenOdd
* much code borrowed from rf_decluster.c
*
*--------------------------------------------------*/
/*
* $Header: /cvsroot/src/sys/dev/raidframe/rf_declusterPQ.c,v 1.1 1998/11/13 04:20:28 oster Exp $
*
* Log: rf_declusterPQ.c,v
* Revision 1.34 1996/08/21 19:47:14 jimz
* fix bogus return values from config
*
* Revision 1.33 1996/08/21 15:09:16 jimz
* cleanup debugging spoo
*
* Revision 1.32 1996/08/21 04:13:36 jimz
* debug with EvenOdd
*
* Revision 1.31 1996/08/20 22:41:54 jimz
* 2 parity disks, not 1
*
* Revision 1.30 1996/07/31 16:56:18 jimz
* dataBytesPerStripe, sectorsPerDisk init arch-indep.
*
* Revision 1.29 1996/07/18 22:57:14 jimz
* port simulator to AIX
*
* Revision 1.28 1996/07/13 00:00:59 jimz
* sanitized generalized reconstruction architecture
* cleaned up head sep, rbuf problems
*
* Revision 1.27 1996/06/11 08:45:12 jimz
* improved error-checking on array configuration
*
* Revision 1.26 1996/06/10 11:55:47 jimz
* Straightened out some per-array/not-per-array distinctions, fixed
* a couple bugs related to confusion. Added shutdown lists. Removed
* layout shutdown function (now subsumed by shutdown lists).
*
* Revision 1.25 1996/06/07 21:33:04 jimz
* begin using consistent types for sector numbers,
* stripe numbers, row+col numbers, recon unit numbers
*
* Revision 1.24 1996/06/02 17:31:48 jimz
* Moved a lot of global stuff into array structure, where it belongs.
* Fixed up paritylogging, pss modules in this manner. Some general
* code cleanup. Removed lots of dead code, some dead files.
*
* Revision 1.23 1996/05/30 23:22:16 jimz
* bugfixes of serialization, timing problems
* more cleanup
*
* Revision 1.22 1996/05/27 18:56:37 jimz
* more code cleanup
* better typing
* compiles in all 3 environments
*
* Revision 1.21 1996/05/24 22:17:04 jimz
* continue code + namespace cleanup
* typed a bunch of flags
*
* Revision 1.20 1996/05/24 01:59:45 jimz
* another checkpoint in code cleanup for release
* time to sync kernel tree
*
* Revision 1.19 1996/05/23 00:33:23 jimz
* code cleanup: move all debug decls to rf_options.c, all extern
* debug decls to rf_options.h, all debug vars preceded by rf_
*
* Revision 1.18 1996/05/18 19:51:34 jimz
* major code cleanup- fix syntax, make some types consistent,
* add prototypes, clean out dead code, et cetera
*
* Revision 1.17 1996/05/17 00:52:56 jimz
* RepIndex was not being initialized before the computation of
* RepIndexQ in MapQDeclusteredPQ(). I copied the initialization
* from MapParityDeclusteredPQ(). Hope that was right.
*
* Revision 1.16 1995/12/12 18:10:06 jimz
* MIN -> RF_MIN, MAX -> RF_MAX, ASSERT -> RF_ASSERT
* fix 80-column brain damage in comments
*
* Revision 1.15 1995/12/01 15:57:46 root
* added copyright info
*
* Revision 1.14 1995/11/17 19:00:13 wvcii
* added prototyping to MapParity
* created MapQ
*
* Revision 1.13 1995/10/05 22:20:48 jimz
* free_1d_array() takes two args; provide them both
*
* Revision 1.12 1995/09/06 19:26:33 wvcii
* offset cfgBuf by sparemap length (ConfigureDeclusteredPQ)
*
* Revision 1.11 95/06/23 13:41:11 robby
* updeated to prototypes in rf_layout.h
*
* Revision 1.10 1995/05/02 22:46:53 holland
* minor code cleanups.
*
* Revision 1.9 1995/03/15 20:45:23 holland
* distr sparing changes.
*
* Revision 1.8 1995/03/01 20:25:48 holland
* kernelization changes
*
* Revision 1.7 1995/02/17 19:39:56 holland
* added size param to all calls to Free().
* this is ignored at user level, but necessary in the kernel.
*
* Revision 1.6 1995/02/10 17:34:10 holland
* kernelization changes
*
* Revision 1.5 1995/02/03 22:31:36 holland
* many changes related to kernelization
*
* Revision 1.4 1995/02/01 15:13:05 holland
* moved #include of general.h out of raid.h and into each file
*
* Revision 1.3 1995/02/01 14:25:19 holland
* began changes for kernelization:
* changed all instances of mutex_t and cond_t to DECLARE macros
* converted configuration code to use config structure
*
* Revision 1.2 1994/11/28 22:13:56 danner
* corrected some mapping bugs.
*
*/
#include "rf_types.h"
#include "rf_raid.h"
#include "rf_configure.h"
#include "rf_decluster.h"
#include "rf_declusterPQ.h"
#include "rf_debugMem.h"
#include "rf_utils.h"
#include "rf_alloclist.h"
#include "rf_general.h"
/* configuration code */
int rf_ConfigureDeclusteredPQ(
RF_ShutdownList_t **listp,
RF_Raid_t *raidPtr,
RF_Config_t *cfgPtr)
{
RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
int b, v, k, r, lambda; /* block design params */
int i, j, l;
int *first_avail_slot;
int complete_FT_count, SUID;
RF_DeclusteredConfigInfo_t *info;
int numCompleteFullTablesPerDisk;
int PUsPerDisk, spareRegionDepthInPUs, numCompleteSpareRegionsPerDisk = 0, extraPUsPerDisk;
int totSparePUsPerDisk;
int diskOffsetOfLastFullTableInSUs, SpareSpaceInSUs;
char *cfgBuf = (char *) (cfgPtr->layoutSpecific);
cfgBuf += RF_SPAREMAP_NAME_LEN;
b = *( (int *) cfgBuf); cfgBuf += sizeof(int);
v = *( (int *) cfgBuf); cfgBuf += sizeof(int);
k = *( (int *) cfgBuf); cfgBuf += sizeof(int);
r = *( (int *) cfgBuf); cfgBuf += sizeof(int);
lambda = *( (int *) cfgBuf); cfgBuf += sizeof(int);
raidPtr->noRotate = *( (int *) cfgBuf); cfgBuf += sizeof(int);
if (k <= 2) {
printf("RAIDFRAME: k=%d, minimum value 2\n", k);
return(EINVAL);
}
/* 1. create layout specific structure */
RF_MallocAndAdd(info, sizeof(RF_DeclusteredConfigInfo_t), (RF_DeclusteredConfigInfo_t *), raidPtr->cleanupList);
if (info == NULL)
return(ENOMEM);
layoutPtr->layoutSpecificInfo = (void *) info;
/* the sparemaps are generated assuming that parity is rotated, so we issue
* a warning if both distributed sparing and no-rotate are on at the same time
*/
if ((raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) && raidPtr->noRotate) {
RF_ERRORMSG("Warning: distributed sparing specified without parity rotation.\n");
}
if (raidPtr->numCol != v) {
RF_ERRORMSG2("RAID: config error: table element count (%d) not equal to no. of cols (%d)\n", v, raidPtr->numCol);
return(EINVAL);
}
/* 3. set up the values used in devRaidMap */
info->BlocksPerTable = b;
info->NumParityReps = info->groupSize = k;
info->PUsPerBlock = k-2; /* PQ */
info->SUsPerTable = b * info->PUsPerBlock * layoutPtr->SUsPerPU;/* b blks, k-1 SUs each */
info->SUsPerFullTable = k * info->SUsPerTable; /* rot k times */
info->SUsPerBlock = info->PUsPerBlock * layoutPtr->SUsPerPU;
info->TableDepthInPUs = (b*k) / v;
info->FullTableDepthInPUs = info->TableDepthInPUs * k; /* k repetitions */
/* used only in distributed sparing case */
info->FullTablesPerSpareRegion = (v-1) / rf_gcd(r, v-1); /* (v-1)/gcd fulltables */
info->TablesPerSpareRegion = k * info->FullTablesPerSpareRegion;
info->SpareSpaceDepthPerRegionInSUs = (r * info->TablesPerSpareRegion / (v-1)) * layoutPtr->SUsPerPU;
/* check to make sure the block design is sufficiently small */
if ((raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) {
if (info->FullTableDepthInPUs * layoutPtr->SUsPerPU + info->SpareSpaceDepthPerRegionInSUs > layoutPtr->stripeUnitsPerDisk) {
RF_ERRORMSG3("RAID: config error: Full Table depth (%d) + Spare Space (%d) larger than disk size (%d) (BD too big)\n",
(int)info->FullTableDepthInPUs,
(int)info->SpareSpaceDepthPerRegionInSUs,
(int)layoutPtr->stripeUnitsPerDisk);
return(EINVAL);
}
} else {
if (info->TableDepthInPUs * layoutPtr->SUsPerPU > layoutPtr->stripeUnitsPerDisk) {
RF_ERRORMSG2("RAID: config error: Table depth (%d) larger than disk size (%d) (BD too big)\n",
(int)(info->TableDepthInPUs * layoutPtr->SUsPerPU),
(int)layoutPtr->stripeUnitsPerDisk);
return(EINVAL);
}
}
/* compute the size of each disk, and the number of tables in the last fulltable (which
* need not be complete)
*/
if ((raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) {
PUsPerDisk = layoutPtr->stripeUnitsPerDisk / layoutPtr->SUsPerPU;
spareRegionDepthInPUs = (info->TablesPerSpareRegion * info->TableDepthInPUs +
(info->TablesPerSpareRegion * info->TableDepthInPUs) / (v-1));
info->SpareRegionDepthInSUs = spareRegionDepthInPUs * layoutPtr->SUsPerPU;
numCompleteSpareRegionsPerDisk = PUsPerDisk / spareRegionDepthInPUs;
info->NumCompleteSRs = numCompleteSpareRegionsPerDisk;
extraPUsPerDisk = PUsPerDisk % spareRegionDepthInPUs;
/* assume conservatively that we need the full amount of spare space in one region in order
* to provide spares for the partial spare region at the end of the array. We set "i" to
* the number of tables in the partial spare region. This may actually include some fulltables.
*/
extraPUsPerDisk -= (info->SpareSpaceDepthPerRegionInSUs / layoutPtr->SUsPerPU);
if (extraPUsPerDisk <= 0) i = 0;
else i = extraPUsPerDisk/info->TableDepthInPUs;
complete_FT_count = raidPtr->numRow * (numCompleteSpareRegionsPerDisk * (info->TablesPerSpareRegion/k) + i/k);
info->FullTableLimitSUID = complete_FT_count * info->SUsPerFullTable;
info->ExtraTablesPerDisk = i % k;
/* note that in the last spare region, the spare space is complete even though data/parity space is not */
totSparePUsPerDisk = (numCompleteSpareRegionsPerDisk+1) * (info->SpareSpaceDepthPerRegionInSUs / layoutPtr->SUsPerPU);
info->TotSparePUsPerDisk = totSparePUsPerDisk;
layoutPtr->stripeUnitsPerDisk =
((complete_FT_count/raidPtr->numRow) * info->FullTableDepthInPUs + /* data & parity space */
info->ExtraTablesPerDisk * info->TableDepthInPUs +
totSparePUsPerDisk /* spare space */
) * layoutPtr->SUsPerPU;
layoutPtr->dataStripeUnitsPerDisk =
(complete_FT_count * info->FullTableDepthInPUs + info->ExtraTablesPerDisk * info->TableDepthInPUs)
* layoutPtr->SUsPerPU * (k-1) / k;
} else {
/* non-dist spare case: force each disk to contain an integral number of tables */
layoutPtr->stripeUnitsPerDisk /= (info->TableDepthInPUs * layoutPtr->SUsPerPU);
layoutPtr->stripeUnitsPerDisk *= (info->TableDepthInPUs * layoutPtr->SUsPerPU);
/* compute the number of tables in the last fulltable, which need not be complete */
complete_FT_count =
((layoutPtr->stripeUnitsPerDisk/layoutPtr->SUsPerPU) / info->FullTableDepthInPUs) * raidPtr->numRow;
info->FullTableLimitSUID = complete_FT_count * info->SUsPerFullTable;
info->ExtraTablesPerDisk =
((layoutPtr->stripeUnitsPerDisk/layoutPtr->SUsPerPU) / info->TableDepthInPUs) % k;
}
raidPtr->sectorsPerDisk = layoutPtr->stripeUnitsPerDisk * layoutPtr->sectorsPerStripeUnit;
/* find the disk offset of the stripe unit where the last fulltable starts */
numCompleteFullTablesPerDisk = complete_FT_count / raidPtr->numRow;
diskOffsetOfLastFullTableInSUs = numCompleteFullTablesPerDisk * info->FullTableDepthInPUs * layoutPtr->SUsPerPU;
if ((raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) {
SpareSpaceInSUs = numCompleteSpareRegionsPerDisk * info->SpareSpaceDepthPerRegionInSUs;
diskOffsetOfLastFullTableInSUs += SpareSpaceInSUs;
info->DiskOffsetOfLastSpareSpaceChunkInSUs =
diskOffsetOfLastFullTableInSUs + info->ExtraTablesPerDisk * info->TableDepthInPUs * layoutPtr->SUsPerPU;
}
info->DiskOffsetOfLastFullTableInSUs = diskOffsetOfLastFullTableInSUs;
info->numCompleteFullTablesPerDisk = numCompleteFullTablesPerDisk;
/* 4. create and initialize the lookup tables */
info->LayoutTable = rf_make_2d_array(b, k, raidPtr->cleanupList);
if (info->LayoutTable == NULL)
return(ENOMEM);
info->OffsetTable = rf_make_2d_array(b, k, raidPtr->cleanupList);
if (info->OffsetTable == NULL)
return(ENOMEM);
info->BlockTable = rf_make_2d_array(info->TableDepthInPUs*layoutPtr->SUsPerPU, raidPtr->numCol, raidPtr->cleanupList);
if (info->BlockTable == NULL)
return(ENOMEM);
first_avail_slot = (int *) rf_make_1d_array(v, NULL);
if (first_avail_slot == NULL)
return(ENOMEM);
for (i=0; i<b; i++)
for (j=0; j<k; j++)
info->LayoutTable[i][j] = *cfgBuf++;
/* initialize offset table */
for (i=0; i<b; i++) for (j=0; j<k; j++) {
info->OffsetTable[i][j] = first_avail_slot[ info->LayoutTable[i][j] ];
first_avail_slot[ info->LayoutTable[i][j] ]++;
}
/* initialize block table */
for (SUID=l=0; l<layoutPtr->SUsPerPU; l++) {
for (i=0; i<b; i++) {
for (j=0; j<k; j++) {
info->BlockTable[ (info->OffsetTable[i][j] * layoutPtr->SUsPerPU) + l ]
[ info->LayoutTable[i][j] ] = SUID;
}
SUID++;
}
}
rf_free_1d_array(first_avail_slot, v);
/* 5. set up the remaining redundant-but-useful parameters */
raidPtr->totalSectors = (k*complete_FT_count + raidPtr->numRow*info->ExtraTablesPerDisk) *
info->SUsPerTable * layoutPtr->sectorsPerStripeUnit;
layoutPtr->numStripe = (raidPtr->totalSectors / layoutPtr->sectorsPerStripeUnit) / (k-2);
/* strange evaluation order below to try and minimize overflow problems */
layoutPtr->dataSectorsPerStripe = (k-2) * layoutPtr->sectorsPerStripeUnit;
layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << raidPtr->logBytesPerSector;
layoutPtr->numDataCol = k-2;
layoutPtr->numParityCol = 2;
return(0);
}
int rf_GetDefaultNumFloatingReconBuffersPQ(RF_Raid_t *raidPtr)
{
int def_decl;
def_decl = rf_GetDefaultNumFloatingReconBuffersDeclustered(raidPtr);
return(RF_MAX(3 * raidPtr->numCol, def_decl));
}
void rf_MapSectorDeclusteredPQ(
RF_Raid_t *raidPtr,
RF_RaidAddr_t raidSector,
RF_RowCol_t *row,
RF_RowCol_t *col,
RF_SectorNum_t *diskSector,
int remap)
{
RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo;
RF_StripeNum_t SUID = raidSector / layoutPtr->sectorsPerStripeUnit;
RF_StripeNum_t FullTableID, FullTableOffset, TableID, TableOffset;
RF_StripeNum_t BlockID, BlockOffset, RepIndex;
RF_StripeCount_t sus_per_fulltable = info->SUsPerFullTable;
RF_StripeCount_t fulltable_depth = info->FullTableDepthInPUs * layoutPtr->SUsPerPU;
RF_StripeNum_t base_suid = 0, outSU, SpareRegion=0, SpareSpace=0;
rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable, &fulltable_depth, &base_suid);
FullTableID = SUID / sus_per_fulltable; /* fulltable ID within array (across rows) */
*row = FullTableID % raidPtr->numRow;
FullTableID /= raidPtr->numRow; /* convert to fulltable ID on this disk */
if ((raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) {
SpareRegion = FullTableID / info->FullTablesPerSpareRegion;
SpareSpace = SpareRegion * info->SpareSpaceDepthPerRegionInSUs;
}
FullTableOffset = SUID % sus_per_fulltable;
TableID = FullTableOffset / info->SUsPerTable;
TableOffset = FullTableOffset - TableID * info->SUsPerTable;
BlockID = TableOffset / info->PUsPerBlock;
BlockOffset = TableOffset - BlockID * info->PUsPerBlock;
BlockID %= info->BlocksPerTable;
RF_ASSERT(BlockOffset < info->groupSize-2 );
/*
TableIDs go from 0 .. GroupSize-1 inclusive.
PUsPerBlock is k-2.
We want the tableIDs to rotate from the
right, so use GroupSize
*/
RepIndex = info->groupSize - 1 - TableID;
RF_ASSERT(RepIndex >= 0);
if (!raidPtr->noRotate)
{
if (TableID==0)
BlockOffset++; /* P on last drive, Q on first */
else
BlockOffset += ((BlockOffset >= RepIndex) ? 2 : 0); /* skip over PQ */
RF_ASSERT(BlockOffset < info->groupSize);
*col = info->LayoutTable[BlockID][BlockOffset];
}
/* remap to distributed spare space if indicated */
if (remap) {
rf_remap_to_spare_space(layoutPtr, info, *row, FullTableID, TableID, BlockID, (base_suid) ? 1 : 0, SpareRegion, col, &outSU);
} else {
outSU = base_suid;
outSU += FullTableID * fulltable_depth; /* offs to strt of FT */
outSU += SpareSpace; /* skip rsvd spare space */
outSU += TableID * info->TableDepthInPUs * layoutPtr->SUsPerPU; /* offs to strt of tble */
outSU += info->OffsetTable[BlockID][BlockOffset] * layoutPtr->SUsPerPU; /* offs to the PU */
}
outSU += TableOffset / (info->BlocksPerTable * info->PUsPerBlock); /* offs to the SU within a PU */
/* convert SUs to sectors, and, if not aligned to SU boundary, add in offset to sector */
*diskSector = outSU*layoutPtr->sectorsPerStripeUnit + (raidSector % layoutPtr->sectorsPerStripeUnit);
}
void rf_MapParityDeclusteredPQ(
RF_Raid_t *raidPtr,
RF_RaidAddr_t raidSector,
RF_RowCol_t *row,
RF_RowCol_t *col,
RF_SectorNum_t *diskSector,
int remap)
{
RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo;
RF_StripeNum_t SUID = raidSector / layoutPtr->sectorsPerStripeUnit;
RF_StripeNum_t FullTableID, FullTableOffset, TableID, TableOffset;
RF_StripeNum_t BlockID, BlockOffset, RepIndex;
RF_StripeCount_t sus_per_fulltable = info->SUsPerFullTable;
RF_StripeCount_t fulltable_depth = info->FullTableDepthInPUs * layoutPtr->SUsPerPU;
RF_StripeNum_t base_suid = 0, outSU, SpareRegion, SpareSpace=0;
rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable, &fulltable_depth, &base_suid);
/* compute row & (possibly) spare space exactly as before */
FullTableID = SUID / sus_per_fulltable;
*row = FullTableID % raidPtr->numRow;
FullTableID /= raidPtr->numRow; /* convert to fulltable ID on this disk */
if ((raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) {
SpareRegion = FullTableID / info->FullTablesPerSpareRegion;
SpareSpace = SpareRegion * info->SpareSpaceDepthPerRegionInSUs;
}
/* compute BlockID and RepIndex exactly as before */
FullTableOffset = SUID % sus_per_fulltable;
TableID = FullTableOffset / info->SUsPerTable;
TableOffset = FullTableOffset - TableID * info->SUsPerTable;
BlockID = TableOffset / info->PUsPerBlock;
BlockOffset = TableOffset - BlockID * info->PUsPerBlock;
BlockID %= info->BlocksPerTable;
/* the parity block is in the position indicated by RepIndex */
RepIndex = (raidPtr->noRotate) ? info->PUsPerBlock : info->groupSize - 1 - TableID;
*col = info->LayoutTable[BlockID][RepIndex];
if (remap)
RF_PANIC();
/* compute sector as before, except use RepIndex instead of BlockOffset */
outSU = base_suid;
outSU += FullTableID * fulltable_depth;
outSU += SpareSpace; /* skip rsvd spare space */
outSU += TableID * info->TableDepthInPUs * layoutPtr->SUsPerPU;
outSU += info->OffsetTable[BlockID][RepIndex] * layoutPtr->SUsPerPU;
outSU += TableOffset / (info->BlocksPerTable * info->PUsPerBlock);
*diskSector = outSU*layoutPtr->sectorsPerStripeUnit + (raidSector % layoutPtr->sectorsPerStripeUnit);
}
void rf_MapQDeclusteredPQ(
RF_Raid_t *raidPtr,
RF_RaidAddr_t raidSector,
RF_RowCol_t *row,
RF_RowCol_t *col,
RF_SectorNum_t *diskSector,
int remap)
{
RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo;
RF_StripeNum_t SUID = raidSector / layoutPtr->sectorsPerStripeUnit;
RF_StripeNum_t FullTableID, FullTableOffset, TableID, TableOffset;
RF_StripeNum_t BlockID, BlockOffset, RepIndex, RepIndexQ;
RF_StripeCount_t sus_per_fulltable = info->SUsPerFullTable;
RF_StripeCount_t fulltable_depth = info->FullTableDepthInPUs * layoutPtr->SUsPerPU;
RF_StripeNum_t base_suid = 0, outSU, SpareRegion, SpareSpace=0;
rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable, &fulltable_depth, &base_suid);
/* compute row & (possibly) spare space exactly as before */
FullTableID = SUID / sus_per_fulltable;
*row = FullTableID % raidPtr->numRow;
FullTableID /= raidPtr->numRow; /* convert to fulltable ID on this disk */
if ((raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) {
SpareRegion = FullTableID / info->FullTablesPerSpareRegion;
SpareSpace = SpareRegion * info->SpareSpaceDepthPerRegionInSUs;
}
/* compute BlockID and RepIndex exactly as before */
FullTableOffset = SUID % sus_per_fulltable;
TableID = FullTableOffset / info->SUsPerTable;
TableOffset = FullTableOffset - TableID * info->SUsPerTable;
BlockID = TableOffset / info->PUsPerBlock;
BlockOffset = TableOffset - BlockID * info->PUsPerBlock;
BlockID %= info->BlocksPerTable;
/* the q block is in the position indicated by RepIndex */
RepIndex = (raidPtr->noRotate) ? info->PUsPerBlock : info->groupSize - 1 - TableID;
RepIndexQ = ((RepIndex == (info->groupSize-1)) ? 0 : RepIndex+1);
*col = info->LayoutTable[BlockID][RepIndexQ];
if (remap)
RF_PANIC();
/* compute sector as before, except use RepIndex instead of BlockOffset */
outSU = base_suid;
outSU += FullTableID * fulltable_depth;
outSU += SpareSpace; /* skip rsvd spare space */
outSU += TableID * info->TableDepthInPUs * layoutPtr->SUsPerPU;
outSU += TableOffset / (info->BlocksPerTable * info->PUsPerBlock);
outSU += info->OffsetTable[BlockID][RepIndexQ] * layoutPtr->SUsPerPU;
*diskSector = outSU*layoutPtr->sectorsPerStripeUnit + (raidSector % layoutPtr->sectorsPerStripeUnit);
}
/* returns an array of ints identifying the disks that comprise the stripe containing the indicated address.
* the caller must _never_ attempt to modify this array.
*/
void rf_IdentifyStripeDeclusteredPQ(
RF_Raid_t *raidPtr,
RF_RaidAddr_t addr,
RF_RowCol_t **diskids,
RF_RowCol_t *outRow)
{
RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo;
RF_StripeCount_t sus_per_fulltable = info->SUsPerFullTable;
RF_StripeCount_t fulltable_depth = info->FullTableDepthInPUs * layoutPtr->SUsPerPU;
RF_StripeNum_t base_suid = 0;
RF_StripeNum_t SUID = rf_RaidAddressToStripeUnitID(layoutPtr, addr);
RF_StripeNum_t stripeID, FullTableID;
int tableOffset;
rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable, &fulltable_depth, &base_suid);
FullTableID = SUID / sus_per_fulltable; /* fulltable ID within array (across rows) */
*outRow = FullTableID % raidPtr->numRow;
stripeID = rf_StripeUnitIDToStripeID(layoutPtr, SUID); /* find stripe offset into array */
tableOffset = (stripeID % info->BlocksPerTable); /* find offset into block design table */
*diskids = info->LayoutTable[tableOffset];
}

View File

@ -0,0 +1,99 @@
/* $NetBSD: rf_declusterPQ.h,v 1.1 1998/11/13 04:20:28 oster Exp $ */
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
*
* Author: Daniel Stodolsky, Mark Holland
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/* :
* Log: rf_declusterPQ.h,v
* Revision 1.13 1996/08/20 22:42:08 jimz
* missing prototype of IdentifyStripeDeclusteredPQ added
*
* Revision 1.12 1996/07/13 00:00:59 jimz
* sanitized generalized reconstruction architecture
* cleaned up head sep, rbuf problems
*
* Revision 1.11 1996/06/10 11:55:47 jimz
* Straightened out some per-array/not-per-array distinctions, fixed
* a couple bugs related to confusion. Added shutdown lists. Removed
* layout shutdown function (now subsumed by shutdown lists).
*
* Revision 1.10 1996/06/07 21:33:04 jimz
* begin using consistent types for sector numbers,
* stripe numbers, row+col numbers, recon unit numbers
*
* Revision 1.9 1996/05/27 18:56:37 jimz
* more code cleanup
* better typing
* compiles in all 3 environments
*
* Revision 1.8 1996/05/24 01:59:45 jimz
* another checkpoint in code cleanup for release
* time to sync kernel tree
*
* Revision 1.7 1996/05/18 19:51:34 jimz
* major code cleanup- fix syntax, make some types consistent,
* add prototypes, clean out dead code, et cetera
*
* Revision 1.6 1995/12/01 15:59:20 root
* added copyright info
*
* Revision 1.5 1995/11/17 19:08:23 wvcii
* added prototyping to MapParity
*
* Revision 1.4 1995/11/07 15:30:33 wvcii
* changed PQDagSelect prototype
* function no longer generates numHdrSucc, numTermAnt
* removed ParityLoggingDagSelect prototype
*
* Revision 1.3 1995/06/23 13:40:57 robby
* updeated to prototypes in rf_layout.h
*
* Revision 1.2 1995/05/02 22:46:53 holland
* minor code cleanups.
*
* Revision 1.1 1994/11/19 20:26:57 danner
* Initial revision
*
*/
#ifndef _RF__RF_DECLUSTERPQ_H_
#define _RF__RF_DECLUSTERPQ_H_
#include "rf_types.h"
int rf_ConfigureDeclusteredPQ(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
RF_Config_t *cfgPtr);
int rf_GetDefaultNumFloatingReconBuffersPQ(RF_Raid_t *raidPtr);
void rf_MapSectorDeclusteredPQ(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector,
RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap);
void rf_MapParityDeclusteredPQ(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector,
RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap);
void rf_MapQDeclusteredPQ(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector,
RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap);
void rf_IdentifyStripeDeclusteredPQ(RF_Raid_t *raidPtr, RF_RaidAddr_t addr,
RF_RowCol_t **diskids, RF_RowCol_t *outRow);
#endif /* !_RF__RF_DECLUSTERPQ_H_ */

505
sys/dev/raidframe/rf_demo.c Normal file
View File

@ -0,0 +1,505 @@
/* $NetBSD: rf_demo.c,v 1.1 1998/11/13 04:20:28 oster Exp $ */
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
*
* Author: Mark Holland, Khalil Amiri
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/**********************************************************************************
*
* rf_demo.c -- code for supporting demos. this is not actually part of the driver.
*
**********************************************************************************/
/* :
* Log: rf_demo.c,v
* Revision 1.24 1996/06/17 14:38:33 jimz
* properly #if out RF_DEMO code
* fix bug in MakeConfig that was causing weird behavior
* in configuration routines (config was not zeroed at start)
* clean up genplot handling of stacks
*
* Revision 1.23 1996/06/17 03:23:09 jimz
* explicitly do pthread stuff (for join)
* NOTE: this should be changed!
*
* Revision 1.22 1996/06/14 23:15:38 jimz
* attempt to deal with thread GC problem
*
* Revision 1.21 1996/06/09 02:36:46 jimz
* lots of little crufty cleanup- fixup whitespace
* issues, comment #ifdefs, improve typing in some
* places (esp size-related)
*
* Revision 1.20 1996/06/05 18:06:02 jimz
* Major code cleanup. The Great Renaming is now done.
* Better modularity. Better typing. Fixed a bunch of
* synchronization bugs. Made a lot of global stuff
* per-desc or per-array. Removed dead code.
*
* Revision 1.19 1996/05/30 23:22:16 jimz
* bugfixes of serialization, timing problems
* more cleanup
*
* Revision 1.18 1996/05/30 11:29:41 jimz
* Numerous bug fixes. Stripe lock release code disagreed with the taking code
* about when stripes should be locked (I made it consistent: no parity, no lock)
* There was a lot of extra serialization of I/Os which I've removed- a lot of
* it was to calculate values for the cache code, which is no longer with us.
* More types, function, macro cleanup. Added code to properly quiesce the array
* on shutdown. Made a lot of stuff array-specific which was (bogusly) general
* before. Fixed memory allocation, freeing bugs.
*
* Revision 1.17 1996/05/27 18:56:37 jimz
* more code cleanup
* better typing
* compiles in all 3 environments
*
* Revision 1.16 1996/05/23 00:33:23 jimz
* code cleanup: move all debug decls to rf_options.c, all extern
* debug decls to rf_options.h, all debug vars preceded by rf_
*
* Revision 1.15 1996/05/20 16:14:08 jimz
* switch to rf_{mutex,cond}_{init,destroy}
*
* Revision 1.14 1996/05/18 19:51:34 jimz
* major code cleanup- fix syntax, make some types consistent,
* add prototypes, clean out dead code, et cetera
*
* Revision 1.13 1995/12/01 15:56:07 root
* added copyright info
*
*/
#include "rf_archs.h"
#if RF_DEMO > 0
#include <stdio.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <strings.h>
#include <unistd.h>
#include <sys/time.h>
#include <signal.h>
#include "rf_threadstuff.h"
#include "rf_demo.h"
#include "rf_utils.h"
#include "rf_general.h"
#include "rf_options.h"
#ifdef SIMULATE
#include "rf_diskevent.h"
#endif /* SIMULATE */
static int doMax = 0; /* currently no way to set this */
/****************************************************************************************
* fault-free demo code
***************************************************************************************/
static int user_iops_meter = -1;
static int disk_iops_meter = -1;
static int max_user_meter = -1;
static int max_disk_meter = -1;
static int recon_pctg_meter = -1;
static int avg_resp_time_meter = -1;
static int recon_time_meter = -1;
static int ff_avg_resp_time_meter = -1;
static int deg_avg_resp_time_meter = -1;
static int recon_avg_resp_time_meter = -1;
static int user_ios_ff=0;
static int user_ios_deg=0;
static int user_ios_recon=0;
static long user_resp_time_sum_ff = 0;
static long user_resp_time_sum_deg = 0;
static long user_resp_time_sum_recon = 0;
int rf_demo_op_mode = 0;
RF_DECLARE_STATIC_MUTEX(iops_mutex)
static int user_ios_so_far, disk_ios_so_far, max_user, max_disk;
static long user_resp_time_sum_ms;
static int recon_pctg;
static struct timeval iops_starttime;
#ifndef SIMULATE
static RF_Thread_t update_thread_desc;
#endif /* !SIMULATE */
static int meter_update_terminate;
static int meter_update_interval = 2; /* seconds between meter updates */
static int iops_initialized = 0, recon_initialized = 0;
static char *demoMeterTags[] = {"FF", "Degr", "Recon"};
static int vpos=0;
static int rf_CreateMeter(char *title, char *geom, char *color);
static void rf_UpdateMeter(int meterid, int value);
static void rf_DestroyMeter(int meterid, int killproc);
void rf_startup_iops_demo(meter_vpos, C, G)
int meter_vpos;
int C;
int G;
{
char buf[100], title[100];
int rc;
vpos = meter_vpos;
sprintf(buf, "%dx%d-0+%d",RF_DEMO_METER_WIDTH, RF_DEMO_METER_HEIGHT, vpos * (RF_DEMO_METER_HEIGHT+RF_DEMO_METER_VSPACE));
sprintf(title,"%s %d/%d User IOs/sec",demoMeterTags[rf_demoMeterTag],C,G);
user_iops_meter = rf_CreateMeter(title, buf, "black");
sprintf(buf, "%dx%d-%d+%d",RF_DEMO_METER_WIDTH, RF_DEMO_METER_HEIGHT, RF_DEMO_METER_WIDTH+RF_DEMO_METER_SPACING,vpos * (RF_DEMO_METER_HEIGHT+RF_DEMO_METER_VSPACE));
sprintf(title,"%s %d/%d Disk IOs/sec",demoMeterTags[rf_demoMeterTag],C,G);
disk_iops_meter = rf_CreateMeter(title, buf, "red");
if (doMax) {
sprintf(buf, "%dx%d-%d+%d",RF_DEMO_METER_WIDTH, RF_DEMO_METER_HEIGHT, 2*(RF_DEMO_METER_WIDTH+RF_DEMO_METER_SPACING),vpos * (RF_DEMO_METER_HEIGHT+RF_DEMO_METER_VSPACE));
sprintf(title,"%s %d/%d Avg User IOs/s",demoMeterTags[rf_demoMeterTag],C,G);
max_user_meter = rf_CreateMeter(title, buf, "black");
sprintf(buf, "%dx%d-%d+%d",RF_DEMO_METER_WIDTH, RF_DEMO_METER_HEIGHT, 3*(RF_DEMO_METER_WIDTH+RF_DEMO_METER_SPACING), vpos * (RF_DEMO_METER_HEIGHT+RF_DEMO_METER_VSPACE));
sprintf(title,"%s %d/%d Avg Disk IOs/s",demoMeterTags[rf_demoMeterTag],C,G);
max_disk_meter = rf_CreateMeter(title, buf, "red");
sprintf(buf, "%dx%d-%d+%d",RF_DEMO_METER_WIDTH, RF_DEMO_METER_HEIGHT, 4*(RF_DEMO_METER_WIDTH+RF_DEMO_METER_SPACING), vpos * (RF_DEMO_METER_HEIGHT+RF_DEMO_METER_VSPACE));
} else {
sprintf(buf, "%dx%d-%d+%d",RF_DEMO_METER_WIDTH, RF_DEMO_METER_HEIGHT, 2*(RF_DEMO_METER_WIDTH+RF_DEMO_METER_SPACING), vpos * (RF_DEMO_METER_HEIGHT+RF_DEMO_METER_VSPACE));
}
sprintf(title,"%s %d/%d Avg User Resp Time (ms)",demoMeterTags[rf_demoMeterTag],C,G);
avg_resp_time_meter = rf_CreateMeter(title, buf, "blue");
rc = rf_mutex_init(&iops_mutex);
if (rc) {
RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__,
__LINE__, rc);
return;
}
user_ios_so_far = disk_ios_so_far = max_user = max_disk = 0;
user_resp_time_sum_ms = 0;
meter_update_terminate = 0;
#ifndef SIMULATE
pthread_create(&update_thread_desc, raidframe_attr_default, (pthread_startroutine_t)rf_meter_update_thread, NULL);
#endif /* !SIMULATE */
gettimeofday(&iops_starttime, NULL);
iops_initialized = 1;
}
void rf_update_user_stats(resptime)
int resptime;
{
if (!iops_initialized && !recon_initialized) return;
RF_LOCK_MUTEX(iops_mutex);
user_ios_so_far++;
user_resp_time_sum_ms += resptime;
RF_UNLOCK_MUTEX(iops_mutex);
}
void rf_update_disk_iops(val)
int val;
{
if (!iops_initialized) return;
RF_LOCK_MUTEX(iops_mutex);
disk_ios_so_far += val;
RF_UNLOCK_MUTEX(iops_mutex);
}
void rf_meter_update_thread()
{
struct timeval now, diff;
int iops, resptime;
float secs;
#ifndef SIMULATE
while (!meter_update_terminate) {
gettimeofday(&now, NULL);
RF_TIMEVAL_DIFF(&iops_starttime, &now, &diff);
secs = ((float) diff.tv_sec) + ((float) diff.tv_usec)/1000000.0;
#else /* !SIMULATE */
secs = rf_cur_time;
#endif /* !SIMULATE */
if (user_iops_meter >= 0) {
iops = (secs!=0.0) ? (int) (((float) user_ios_so_far) / secs) : 0;
rf_UpdateMeter(user_iops_meter, iops);
if (max_user_meter && iops > max_user) {max_user = iops; rf_UpdateMeter(max_user_meter, iops);}
}
if (disk_iops_meter >= 0) {
iops = (secs!=0.0) ? (int) (((float) disk_ios_so_far) / secs) : 0;
rf_UpdateMeter(disk_iops_meter, iops);
if (max_disk_meter && iops > max_disk) {max_disk = iops; rf_UpdateMeter(max_disk_meter, iops);}
}
if (recon_pctg_meter >= 0) {
rf_UpdateMeter(recon_pctg_meter, recon_pctg);
}
switch (rf_demo_op_mode){
case RF_DEMO_FAULT_FREE:
resptime = (user_ios_so_far != 0) ? user_resp_time_sum_ms / user_ios_so_far : 0;
if (resptime && (ff_avg_resp_time_meter >=0))
rf_UpdateMeter(ff_avg_resp_time_meter, resptime);
user_ios_ff += user_ios_so_far;
user_resp_time_sum_ff += user_resp_time_sum_ms;
break;
case RF_DEMO_DEGRADED:
resptime = (user_ios_so_far != 0) ? user_resp_time_sum_ms / user_ios_so_far : 0;
if (resptime &&(deg_avg_resp_time_meter >=0))
rf_UpdateMeter(deg_avg_resp_time_meter, resptime);
user_ios_deg += user_ios_so_far;
user_resp_time_sum_deg += user_resp_time_sum_ms;
case RF_DEMO_RECON:
resptime = (user_ios_so_far != 0) ? user_resp_time_sum_ms / user_ios_so_far : 0;
if (resptime && (recon_avg_resp_time_meter >= 0))
rf_UpdateMeter(recon_avg_resp_time_meter, resptime);
user_ios_recon += user_ios_so_far;
user_resp_time_sum_recon += user_resp_time_sum_ms;
break;
default: printf("WARNING: demo meter update thread: Invalid op mode! \n");
}
user_ios_so_far = 0;
user_resp_time_sum_ms = 0;
#ifndef SIMULATE
RF_DELAY_THREAD(1,0);
}
#endif /* !SIMULATE */
}
void rf_finish_iops_demo()
{
long status;
if (!iops_initialized) return;
iops_initialized = 0; /* make sure any subsequent update calls don't do anything */
meter_update_terminate = 1;
#ifndef SIMULATE
pthread_join(update_thread_desc, (pthread_addr_t)&status);
#endif /* !SIMULATE */
rf_DestroyMeter(user_iops_meter, (doMax) ? 1 : 0);
rf_DestroyMeter(disk_iops_meter, (doMax) ? 1 : 0);
rf_DestroyMeter(max_user_meter, 0);
rf_DestroyMeter(max_disk_meter, 0);
rf_DestroyMeter(avg_resp_time_meter, 0);
rf_mutex_destroy(&iops_mutex);
}
void rf_demo_update_mode(arg_mode)
int arg_mode;
{
int hpos;
char buf[100], title[100];
switch (rf_demo_op_mode = arg_mode) {
case RF_DEMO_DEGRADED:
/* freeze fault-free response time meter; create degraded mode meter */
hpos=rf_demoMeterHpos+2;
sprintf(buf, "%dx%d-%d+%d",RF_DEMO_METER_WIDTH, RF_DEMO_METER_HEIGHT, hpos * (RF_DEMO_METER_WIDTH+RF_DEMO_METER_SPACING), vpos * (RF_DEMO_METER_HEIGHT+RF_DEMO_METER_VSPACE));
sprintf(title,"Degraded Mode Average Response Time (ms)",demoMeterTags[rf_demoMeterTag]);
deg_avg_resp_time_meter = rf_CreateMeter(title, buf, "purple");
rf_UpdateMeter(ff_avg_resp_time_meter, (user_ios_ff == 0)? 0: user_resp_time_sum_ff/user_ios_ff);
break;
case RF_DEMO_RECON:
/* freeze degraded mode response time meter; create recon meters */
hpos = rf_demoMeterHpos+1;
sprintf(buf, "%dx%d-%d+%d",RF_DEMO_METER_WIDTH, RF_DEMO_METER_HEIGHT, hpos * (RF_DEMO_METER_WIDTH+RF_DEMO_METER_SPACING), vpos * (RF_DEMO_METER_HEIGHT+RF_DEMO_METER_VSPACE));
sprintf(title,"Reconstruction Average Response Time (ms)",demoMeterTags[rf_demoMeterTag]);
recon_avg_resp_time_meter = rf_CreateMeter(title, buf, "darkgreen");
sprintf(buf, "%dx%d-%d+%d",RF_DEMO_METER_WIDTH, RF_DEMO_METER_HEIGHT, (rf_demoMeterHpos) * (RF_DEMO_METER_WIDTH + RF_DEMO_METER_SPACING), vpos * (RF_DEMO_METER_HEIGHT+RF_DEMO_METER_VSPACE));
sprintf(title,"Percent Complete / Recon Time");
recon_pctg_meter = rf_CreateMeter(title,buf,"red");
rf_UpdateMeter(deg_avg_resp_time_meter, (user_ios_deg == 0)? 0: user_resp_time_sum_deg/user_ios_deg);
break;
default: /*do nothing -- finish_recon_demo will update rest of meters */;
}
}
/****************************************************************************************
* reconstruction demo code
***************************************************************************************/
void rf_startup_recon_demo(meter_vpos, C, G, init)
int meter_vpos;
int C;
int G;
int init;
{
char buf[100], title[100];
int rc;
vpos = meter_vpos;
if (init) {
/* init demo -- display ff resp time meter */
sprintf(buf, "%dx%d-%d+%d",RF_DEMO_METER_WIDTH, RF_DEMO_METER_HEIGHT, (rf_demoMeterHpos+3) * (RF_DEMO_METER_WIDTH+RF_DEMO_METER_SPACING), vpos * (RF_DEMO_METER_HEIGHT+RF_DEMO_METER_VSPACE));
sprintf(title,"%s %d/%d Fault-Free Avg User Resp Time (ms)",demoMeterTags[rf_demoMeterTag],C,G);
ff_avg_resp_time_meter = rf_CreateMeter(title, buf, "blue");
}
rc = rf_mutex_init(&iops_mutex);
if (rc) {
RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__,
__LINE__, rc);
}
meter_update_terminate = 0;
#ifndef SIMULATE
pthread_create(&update_thread_desc, raidframe_attr_default, (pthread_startroutine_t)rf_meter_update_thread, NULL);
#endif /* !SIMULATE */
gettimeofday(&iops_starttime, NULL);
recon_initialized = 1;
}
void rf_update_recon_meter(val)
int val;
{
recon_pctg = val;
}
void rf_finish_recon_demo(etime)
struct timeval *etime;
{
long status;
int hpos;
hpos = rf_demoMeterHpos;
recon_initialized = 0; /* make sure any subsequent
update calls don't do anything */
recon_pctg = etime->tv_sec; /* display recon time on meter */
rf_UpdateMeter(recon_avg_resp_time_meter, (user_ios_recon == 0)? 0: user_resp_time_sum_recon/user_ios_recon);
rf_UpdateMeter(recon_pctg_meter, etime->tv_sec);
meter_update_terminate = 1;
#ifndef SIMULATE
pthread_join(update_thread_desc, (pthread_addr_t)&status); /* join the meter update thread */
#endif /* !SIMULATE */
rf_DestroyMeter(recon_pctg_meter, 0);
rf_DestroyMeter(ff_avg_resp_time_meter, 0);
rf_DestroyMeter(deg_avg_resp_time_meter, 0);
rf_DestroyMeter(recon_avg_resp_time_meter, 0);
rf_mutex_destroy(&iops_mutex);
}
/****************************************************************************************
* meter manipulation code
***************************************************************************************/
#define MAXMETERS 50
static struct meter_info { int sd; int pid; char name[100]; } minfo[MAXMETERS];
static int meter_num = 0;
int rf_ConfigureMeters()
{
int i;
for (i=0; i<MAXMETERS; i++)
minfo[i].sd = -1;
return(0);
}
/* forks a dmeter process to create a 4-digit meter window
* "title" appears in the title bar of the meter window
* returns an integer handle (really a socket descriptor) by which
* the new meter can be accessed.
*/
static int rf_CreateMeter(title, geom, color)
char *title;
char *geom;
char *color;
{
char geombuf[100], *clr;
int sd, pid, i, status;
struct sockaddr sa;
if (!geom) sprintf(geombuf,"120x40-0+%d", 50*meter_num); else sprintf(geombuf, "%s", geom);
clr = (color) ? color : "black";
sprintf(minfo[meter_num].name,"/tmp/xm_%d",meter_num);
unlink(minfo[meter_num].name);
if ( !(pid = fork()) ) {
execlp("dmeter","dmeter","-noscroll","-t",title,"-geometry",geombuf,"-sa",minfo[meter_num].name,"-fg",clr,NULL);
perror("rf_CreateMeter: exec failed");
return(-1);
}
sd = socket(AF_UNIX,SOCK_STREAM,0);
sa.sa_family = AF_UNIX;
strcpy(sa.sa_data, minfo[meter_num].name);
for (i=0; i<50; i++) { /* this give us 25 seconds to get the meter running */
if ( (status = connect(sd,&sa,sizeof(sa))) != -1) break;
#ifdef SIMULATE
sleep (1);
#else /* SIMULATE */
RF_DELAY_THREAD(0, 500);
#endif /* SIMULATE */
}
if (status == -1) {
perror("Unable to connect to meter");
exit(1);
}
minfo[meter_num].sd = sd;
minfo[meter_num].pid = pid;
return(meter_num++);
}
/* causes the meter to display the given value */
void rf_UpdateMeter(meterid, value)
int meterid;
int value;
{
if (write(minfo[meterid].sd, &value, sizeof(int)) < sizeof(int)) {
fprintf(stderr,"Unable to write to meter %d\n",meterid);
}
}
void rf_DestroyMeter(meterid, killproc)
int meterid;
int killproc;
{
close(minfo[meterid].sd);
if (killproc) kill(minfo[meterid].pid, SIGTERM);
minfo[meterid].sd = -1;
}
int rf_ShutdownAllMeters()
{
int i;
for (i=0; i<MAXMETERS; i++)
if (minfo[i].sd >= 0)
rf_DestroyMeter(i, 0);
return(0);
}
#endif /* RF_DEMO > 0 */

View File

@ -0,0 +1,82 @@
/* $NetBSD: rf_demo.h,v 1.1 1998/11/13 04:20:28 oster Exp $ */
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
*
* Author: Mark Holland, Khalil Amiri
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/* rf_demo.h
* some constants for demo'ing software
*/
/* :
* Log: rf_demo.h,v
* Revision 1.8 1996/06/14 23:15:38 jimz
* attempt to deal with thread GC problem
*
* Revision 1.7 1996/06/09 02:36:46 jimz
* lots of little crufty cleanup- fixup whitespace
* issues, comment #ifdefs, improve typing in some
* places (esp size-related)
*
* Revision 1.6 1996/05/27 18:56:37 jimz
* more code cleanup
* better typing
* compiles in all 3 environments
*
* Revision 1.5 1996/05/18 19:51:34 jimz
* major code cleanup- fix syntax, make some types consistent,
* add prototypes, clean out dead code, et cetera
*
* Revision 1.4 1995/12/01 15:58:53 root
* added copyright info
*
*/
#ifndef _RF__RF_DEMO_H_
#define _RF__RF_DEMO_H_
#include "rf_types.h"
#define RF_DEMO_METER_WIDTH 300 /* how wide each meter is */
#define RF_DEMO_METER_HEIGHT 150 /* how tall */
#define RF_DEMO_METER_SPACING 15 /* how much space between horizontally */
#define RF_DEMO_METER_VSPACE 20 /* how much space between vertically */
#define RF_DEMO_FAULT_FREE 0
#define RF_DEMO_DEGRADED 1
#define RF_DEMO_RECON 2
void rf_startup_iops_demo(int meter_vpos, int C, int G);
void rf_update_user_stats(int resptime);
void rf_update_disk_iops(int val);
void rf_meter_update_thread(void);
void rf_finish_iops_demo(void);
void rf_demo_update_mode(int arg_mode);
void rf_startup_recon_demo(int meter_vpos, int C, int G, int init);
void rf_update_recon_meter(int val);
void rf_finish_recon_demo(struct timeval *etime);
extern int rf_demo_op_mode;
#endif /* !_RF__RF_DEMO_H_ */

180
sys/dev/raidframe/rf_desc.h Normal file
View File

@ -0,0 +1,180 @@
/* $NetBSD: rf_desc.h,v 1.1 1998/11/13 04:20:28 oster Exp $ */
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
*
* Author: Mark Holland
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/*
* :
* Log: rf_desc.h,v
* Revision 1.29 1996/07/22 19:52:16 jimz
* switched node params to RF_DagParam_t, a union of
* a 64-bit int and a void *, for better portability
* attempted hpux port, but failed partway through for
* lack of a single C compiler capable of compiling all
* source files
*
* Revision 1.28 1996/06/07 22:49:22 jimz
* fix up raidPtr typing
*
* Revision 1.27 1996/06/07 21:33:04 jimz
* begin using consistent types for sector numbers,
* stripe numbers, row+col numbers, recon unit numbers
*
* Revision 1.26 1996/06/05 18:06:02 jimz
* Major code cleanup. The Great Renaming is now done.
* Better modularity. Better typing. Fixed a bunch of
* synchronization bugs. Made a lot of global stuff
* per-desc or per-array. Removed dead code.
*
* Revision 1.25 1996/06/02 17:31:48 jimz
* Moved a lot of global stuff into array structure, where it belongs.
* Fixed up paritylogging, pss modules in this manner. Some general
* code cleanup. Removed lots of dead code, some dead files.
*
* Revision 1.24 1996/05/30 11:29:41 jimz
* Numerous bug fixes. Stripe lock release code disagreed with the taking code
* about when stripes should be locked (I made it consistent: no parity, no lock)
* There was a lot of extra serialization of I/Os which I've removed- a lot of
* it was to calculate values for the cache code, which is no longer with us.
* More types, function, macro cleanup. Added code to properly quiesce the array
* on shutdown. Made a lot of stuff array-specific which was (bogusly) general
* before. Fixed memory allocation, freeing bugs.
*
* Revision 1.23 1996/05/27 18:56:37 jimz
* more code cleanup
* better typing
* compiles in all 3 environments
*
* Revision 1.22 1996/05/24 22:17:04 jimz
* continue code + namespace cleanup
* typed a bunch of flags
*
* Revision 1.21 1996/05/24 04:28:55 jimz
* release cleanup ckpt
*
* Revision 1.20 1996/05/23 21:46:35 jimz
* checkpoint in code cleanup (release prep)
* lots of types, function names have been fixed
*
* Revision 1.19 1996/05/23 00:33:23 jimz
* code cleanup: move all debug decls to rf_options.c, all extern
* debug decls to rf_options.h, all debug vars preceded by rf_
*
* Revision 1.18 1996/05/18 19:51:34 jimz
* major code cleanup- fix syntax, make some types consistent,
* add prototypes, clean out dead code, et cetera
*
* Revision 1.17 1995/12/01 15:58:43 root
* added copyright info
*
* Revision 1.16 1995/11/19 16:31:30 wvcii
* descriptors now contain an array of dag lists as opposed to a dag header
*
* Revision 1.15 1995/11/07 16:24:17 wvcii
* updated def of _AccessState
*
*/
#ifndef _RF__RF_DESC_H_
#define _RF__RF_DESC_H_
#include "rf_archs.h"
#include "rf_types.h"
#include "rf_etimer.h"
#include "rf_dag.h"
struct RF_RaidReconDesc_s {
RF_Raid_t *raidPtr; /* raid device descriptor */
RF_RowCol_t row; /* row of failed disk */
RF_RowCol_t col; /* col of failed disk */
int state; /* how far along the reconstruction operation has gotten */
RF_RaidDisk_t *spareDiskPtr; /* describes target disk for recon (not used in dist sparing) */
int numDisksDone; /* the number of surviving disks that have completed their work */
RF_RowCol_t srow; /* row ID of the spare disk (not used in dist sparing) */
RF_RowCol_t scol; /* col ID of the spare disk (not used in dist sparing) */
#ifdef KERNEL
/*
* Prevent recon from hogging CPU
*/
RF_Etimer_t recon_exec_timer;
RF_uint64 reconExecTimerRunning;
RF_uint64 reconExecTicks;
RF_uint64 maxReconExecTicks;
#endif /* KERNEL */
#if RF_RECON_STATS > 0
RF_uint64 hsStallCount; /* head sep stall count */
RF_uint64 numReconExecDelays;
RF_uint64 numReconEventWaits;
#endif /* RF_RECON_STATS > 0 */
RF_RaidReconDesc_t *next;
};
struct RF_RaidAccessDesc_s {
RF_Raid_t *raidPtr; /* raid device descriptor */
RF_IoType_t type; /* read or write */
RF_RaidAddr_t raidAddress; /* starting address in raid address space */
RF_SectorCount_t numBlocks; /* number of blocks (sectors) to transfer */
RF_StripeCount_t numStripes; /* number of stripes involved in access */
caddr_t bufPtr; /* pointer to data buffer */
#if !defined(KERNEL) && !defined(SIMULATE)
caddr_t obufPtr; /* real pointer to data buffer */
#endif /* !KERNEL && !SIMULATE */
RF_RaidAccessFlags_t flags; /* flags controlling operation */
int state; /* index into states telling how far along the RAID operation has gotten */
RF_AccessState_t *states; /* array of states to be run */
int status; /* pass/fail status of the last operation */
RF_DagList_t *dagArray; /* array of dag lists, one list per stripe */
RF_AccessStripeMapHeader_t *asmap; /* the asm for this I/O */
void *bp; /* buf pointer for this RAID acc. ignored outside the kernel */
RF_DagHeader_t **paramDAG; /* allows the DAG to be returned to the caller after I/O completion */
RF_AccessStripeMapHeader_t **paramASM; /* allows the ASM to be returned to the caller after I/O completion */
RF_AccTraceEntry_t tracerec; /* perf monitoring information for a user access (not for dag stats) */
void (*callbackFunc)(RF_CBParam_t); /* callback function for this I/O */
void *callbackArg; /* arg to give to callback func */
int tid; /* debug only, user-level only: thread id of thr that did this access */
RF_AllocListElem_t *cleanupList; /* memory to be freed at the end of the access*/
RF_RaidAccessDesc_t *next;
RF_RaidAccessDesc_t *head;
int numPending;
RF_DECLARE_MUTEX(mutex) /* these are used to implement blocking I/O */
RF_DECLARE_COND(cond)
#ifdef SIMULATE
RF_Owner_t owner;
int async_flag;
#endif /* SIMULATE */
RF_Etimer_t timer; /* used for timing this access */
};
#endif /* !_RF__RF_DESC_H_ */

View File

@ -0,0 +1,290 @@
/* $NetBSD: rf_diskevent.c,v 1.1 1998/11/13 04:20:28 oster Exp $ */
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
*
* Author: Rachad Youssef
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/*
* rf_diskevent. - support for disk device, by managing a heap of future events
* adapted from original code by David Kotz, Song Bac Toh (1994)
*/
/* :
* Log: rf_diskevent.c,v
* Revision 1.18 1996/07/28 20:31:39 jimz
* i386netbsd port
* true/false fixup
*
* Revision 1.17 1996/07/27 16:05:19 jimz
* return ENOMEM if DDEventInit fails its call to InitHeap
*
* Revision 1.16 1996/06/10 12:06:24 jimz
* fix spelling errors
*
* Revision 1.15 1996/06/10 11:55:47 jimz
* Straightened out some per-array/not-per-array distinctions, fixed
* a couple bugs related to confusion. Added shutdown lists. Removed
* layout shutdown function (now subsumed by shutdown lists).
*
* Revision 1.14 1996/06/07 21:33:04 jimz
* begin using consistent types for sector numbers,
* stripe numbers, row+col numbers, recon unit numbers
*
* Revision 1.13 1996/06/02 17:31:48 jimz
* Moved a lot of global stuff into array structure, where it belongs.
* Fixed up paritylogging, pss modules in this manner. Some general
* code cleanup. Removed lots of dead code, some dead files.
*
* Revision 1.12 1996/05/30 11:29:41 jimz
* Numerous bug fixes. Stripe lock release code disagreed with the taking code
* about when stripes should be locked (I made it consistent: no parity, no lock)
* There was a lot of extra serialization of I/Os which I've removed- a lot of
* it was to calculate values for the cache code, which is no longer with us.
* More types, function, macro cleanup. Added code to properly quiesce the array
* on shutdown. Made a lot of stuff array-specific which was (bogusly) general
* before. Fixed memory allocation, freeing bugs.
*
* Revision 1.11 1996/05/27 18:56:37 jimz
* more code cleanup
* better typing
* compiles in all 3 environments
*
* Revision 1.10 1996/05/24 04:28:55 jimz
* release cleanup ckpt
*
* Revision 1.9 1996/05/23 00:33:23 jimz
* code cleanup: move all debug decls to rf_options.c, all extern
* debug decls to rf_options.h, all debug vars preceded by rf_
*
* Revision 1.8 1996/05/18 19:51:34 jimz
* major code cleanup- fix syntax, make some types consistent,
* add prototypes, clean out dead code, et cetera
*
* Revision 1.7 1995/12/01 15:57:56 root
* added copyright info
*
*/
#include "rf_types.h"
#include "rf_heap.h"
#include "rf_diskevent.h"
#include "rf_general.h"
#include "rf_dag.h"
#include "rf_diskthreads.h"
#include "rf_states.h"
#include "rf_shutdown.h"
/* trace printing can be turned on/off in the Makefile */
RF_TICS_t rf_cur_time;
static RF_Owner_t cur_owner;
static RF_Heap_t heap;
static void rf_DDEventShutdown(ignored)
void *ignored;
{
rf_FreeHeap(heap);
}
/* ======================================================================== */
/* DDEventInit
*
* Initialize the event heap.
*/
int rf_DDEventInit(listp)
RF_ShutdownList_t **listp;
{
int rc;
heap = rf_InitHeap(RF_HEAP_MAX); /* initialize the heap */
if (heap == NULL)
return(ENOMEM);
rc = rf_ShutdownCreate(listp, rf_DDEventShutdown, NULL);
if (rc) {
RF_ERRORMSG3("RAIDFRAME: failed creating shutdown event file %s line %d rc=%d\n",
__FILE__, __LINE__, rc);
rf_FreeHeap(heap);
return(rc);
}
rf_cur_time=(RF_TICS_t)0;
return(0);
}
/* DDEventRequest
*
* Put an event request into the event heap.
*/
void rf_DDEventRequest(
RF_TICS_t eventTime,
int (*CompleteFunc)(),
void *argument,
RF_Owner_t owner,
RF_RowCol_t row,
RF_RowCol_t col,
RF_Raid_t *raidPtr,
void *diskid)
{
RF_HeapData_t *hpdat;
RF_Malloc(hpdat,sizeof(RF_HeapData_t),(RF_HeapData_t *) );
if (hpdat == NULL) {
fprintf(stderr, "DDEventRequest: malloc failed\n");
RF_PANIC();
}
hpdat->eventTime = eventTime;
hpdat->CompleteFunc = CompleteFunc;
hpdat->argument = argument;
hpdat->owner = owner;
hpdat->row = row;
hpdat->col = col;
hpdat->raidPtr = raidPtr;
hpdat->diskid = diskid;
rf_AddHeap(heap, hpdat, (hpdat->eventTime));
}
void rf_DAGEventRequest(
RF_TICS_t eventTime,
RF_Owner_t owner,
RF_RowCol_t row,
RF_RowCol_t col,
RF_RaidAccessDesc_t *desc,
RF_Raid_t *raidPtr)
{
RF_HeapData_t *hpdat;
RF_Malloc(hpdat,sizeof(RF_HeapData_t),(RF_HeapData_t *) );
if (hpdat == NULL) {
fprintf(stderr, "DDEventRequest: malloc failed\n");
RF_PANIC();
}
hpdat->eventTime = eventTime;
hpdat->CompleteFunc = NULL;
hpdat->argument = NULL;
hpdat->owner = owner;
hpdat->row = row;
hpdat->col = col;
hpdat->desc=desc;
hpdat->raidPtr = raidPtr;
rf_AddHeap(heap, hpdat, (hpdat->eventTime));
}
/* ------------------------------------------------------------------------ */
/* @SUBTITLE "Print out the request queue" */
/* There is only 1 request queue so no argument is needed for this
function */
void rf_DDPrintRequests()
{
RF_HeapData_t *Hpdat;
RF_HeapKey_t Hpkey;
RF_Heap_t tempHp;
printf("Events on heap:\n");
tempHp = rf_InitHeap(RF_HEAP_MAX);
while (rf_RemHeap(heap, &Hpdat, &Hpkey) != RF_HEAP_NONE)
{
printf ("at %5g HpKey there is: something for owner %d at disk %d %d\n",Hpkey,
Hpdat->owner,Hpdat->row,Hpdat->col);
rf_AddHeap(tempHp, Hpdat, Hpdat->eventTime);
}
printf("END heap:\n");
rf_FreeHeap(heap); /* free the empty old heap */
heap = tempHp; /* restore the recycled heap */
}
/* ------------------------------------------------------------------------ */
int rf_ProcessEvent()
{
RF_HeapData_t *Hpdat;
RF_HeapKey_t Hpkey;
int retcode;
retcode = rf_RemHeap(heap, &Hpdat, &Hpkey);
if (retcode==RF_HEAP_FOUND) {
if (rf_eventDebug) {
rf_DDPrintRequests();
printf ("Now processing: at %5g something for owner %d at disk %d %d\n",
Hpkey, Hpdat->owner, Hpdat->row, Hpdat->col);
}
rf_cur_time=Hpkey;
rf_SetCurrentOwner(Hpdat->owner);
if (Hpdat->row>=0) {/* ongoing dag event */
rf_SetDiskIdle (Hpdat->raidPtr, Hpdat->row, Hpdat->col);
if (Hpdat->diskid != NULL) {
rf_simulator_complete_io(Hpdat->diskid);
}
retcode=(Hpdat->CompleteFunc)(Hpdat->argument,0);
if (retcode==RF_HEAP_FOUND)
(((RF_DagNode_t *) (Hpdat->argument))->dagHdr->cbFunc)(((RF_DagNode_t *) (Hpdat->argument))->dagHdr->cbArg);
RF_Free(Hpdat,sizeof(RF_HeapData_t));
return(retcode);
}
else {
/* this is a dag event or reconstruction event */
if (Hpdat->row==RF_DD_DAGEVENT_ROW){ /* dag event */
rf_ContinueRaidAccess(Hpdat->desc);
retcode = RF_FALSE;
RF_Free(Hpdat,sizeof(RF_HeapData_t));
return (RF_FALSE);
}
else {
/* recon event */
retcode=(Hpdat->CompleteFunc)(Hpdat->argument,0);
retcode = RF_FALSE;
RF_Free(Hpdat,sizeof(RF_HeapData_t));
return (RF_FALSE);
}
}
}
if (rf_eventDebug)
printf("HEAP is empty\n");
return(RF_DD_NOTHING_THERE);
}
RF_Owner_t rf_GetCurrentOwner()
{
return(cur_owner);
}
void rf_SetCurrentOwner(RF_Owner_t owner)
{
cur_owner=owner;
}
RF_TICS_t rf_CurTime()
{
return(rf_cur_time);
}

View File

@ -0,0 +1,96 @@
/* $NetBSD: rf_diskevent.h,v 1.1 1998/11/13 04:20:28 oster Exp $ */
/*
* rf_diskevent.h
* Adapted from original code by David Kotz (1994)
*
* The disk-device module is event driven. This module keeps the event
* request mechanism, which is based on proteus SimRequests,
* abstracted away from the bulk of the disk device code.
*
* Functions
* DDEventInit
* DDEventRequest
* DDEventPrint
* DDEventCancel
*/
/* :
* Log: rf_diskevent.h,v
* Revision 1.10 1996/06/10 11:55:47 jimz
* Straightened out some per-array/not-per-array distinctions, fixed
* a couple bugs related to confusion. Added shutdown lists. Removed
* layout shutdown function (now subsumed by shutdown lists).
*
* Revision 1.9 1996/06/07 21:33:04 jimz
* begin using consistent types for sector numbers,
* stripe numbers, row+col numbers, recon unit numbers
*
* Revision 1.8 1996/06/02 17:31:48 jimz
* Moved a lot of global stuff into array structure, where it belongs.
* Fixed up paritylogging, pss modules in this manner. Some general
* code cleanup. Removed lots of dead code, some dead files.
*
* Revision 1.7 1996/05/30 11:29:41 jimz
* Numerous bug fixes. Stripe lock release code disagreed with the taking code
* about when stripes should be locked (I made it consistent: no parity, no lock)
* There was a lot of extra serialization of I/Os which I've removed- a lot of
* it was to calculate values for the cache code, which is no longer with us.
* More types, function, macro cleanup. Added code to properly quiesce the array
* on shutdown. Made a lot of stuff array-specific which was (bogusly) general
* before. Fixed memory allocation, freeing bugs.
*
* Revision 1.6 1996/05/27 18:56:37 jimz
* more code cleanup
* better typing
* compiles in all 3 environments
*
* Revision 1.5 1996/05/18 19:51:34 jimz
* major code cleanup- fix syntax, make some types consistent,
* add prototypes, clean out dead code, et cetera
*
* Revision 1.4 1995/12/01 15:57:16 root
* added copyright info
*
*/
#ifndef _RF__RF_DISKEVENT_H_
#define _RF__RF_DISKEVENT_H_
#include "rf_types.h"
#include "rf_heap.h"
#ifndef __NetBSD__
#include "time.h"
#endif
#define RF_DD_NOTHING_THERE (-1)
#define RF_DD_DAGEVENT_ROW (-3)
#define RF_DD_DAGEVENT_COL RF_DD_DAGEVENT_ROW
extern RF_TICS_t rf_cur_time;
/*
* list of disk-device request types,
* initialized in diskdevice.c,
* used in diskevent.c
*/
typedef void (*RF_DDhandler)(int disk, RF_TICS_t eventTime);
struct RF_dd_handlers_s {
RF_DDhandler handler; /* function implementing this event type */
char name[20]; /* name of that event type */
};
extern struct RF_dd_handlers_s rf_DDhandlers[];
int rf_DDEventInit(RF_ShutdownList_t **listp);
void rf_DDEventRequest(RF_TICS_t eventTime, int (*CompleteFunc)(),
void *argument, RF_Owner_t owner, RF_RowCol_t row, RF_RowCol_t col,
RF_Raid_t *raidPtr, void *diskid);
void rf_DAGEventRequest(RF_TICS_t eventTime, RF_Owner_t owner,
RF_RowCol_t row, RF_RowCol_t col, RF_RaidAccessDesc_t *desc,
RF_Raid_t *raidPtr);
void rf_DDPrintRequests(void);
int rf_ProcessEvent(void);
RF_Owner_t rf_GetCurrentOwner(void);
void rf_SetCurrentOwner(RF_Owner_t owner);
RF_TICS_t rf_CurTime(void);
#endif /* !_RF__RF_DISKEVENT_H_ */

View File

@ -0,0 +1,924 @@
/* $NetBSD: rf_diskqueue.c,v 1.1 1998/11/13 04:20:29 oster Exp $ */
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
*
* Author: Mark Holland
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/****************************************************************************************
*
* rf_diskqueue.c -- higher-level disk queue code
*
* the routines here are a generic wrapper around the actual queueing
* routines. The code here implements thread scheduling, synchronization,
* and locking ops (see below) on top of the lower-level queueing code.
*
* to support atomic RMW, we implement "locking operations". When a locking op
* is dispatched to the lower levels of the driver, the queue is locked, and no further
* I/Os are dispatched until the queue receives & completes a corresponding "unlocking
* operation". This code relies on the higher layers to guarantee that a locking
* op will always be eventually followed by an unlocking op. The model is that
* the higher layers are structured so locking and unlocking ops occur in pairs, i.e.
* an unlocking op cannot be generated until after a locking op reports completion.
* There is no good way to check to see that an unlocking op "corresponds" to the
* op that currently has the queue locked, so we make no such attempt. Since by
* definition there can be only one locking op outstanding on a disk, this should
* not be a problem.
*
* In the kernel, we allow multiple I/Os to be concurrently dispatched to the disk
* driver. In order to support locking ops in this environment, when we decide to
* do a locking op, we stop dispatching new I/Os and wait until all dispatched I/Os
* have completed before dispatching the locking op.
*
* Unfortunately, the code is different in the 3 different operating states
* (user level, kernel, simulator). In the kernel, I/O is non-blocking, and
* we have no disk threads to dispatch for us. Therefore, we have to dispatch
* new I/Os to the scsi driver at the time of enqueue, and also at the time
* of completion. At user level, I/O is blocking, and so only the disk threads
* may dispatch I/Os. Thus at user level, all we can do at enqueue time is
* enqueue and wake up the disk thread to do the dispatch.
*
***************************************************************************************/
/*
* :
*
* Log: rf_diskqueue.c,v
* Revision 1.50 1996/08/07 21:08:38 jimz
* b_proc -> kb_proc
*
* Revision 1.49 1996/07/05 20:36:14 jimz
* make rf_ConfigureDiskQueueSystem return 0
*
* Revision 1.48 1996/06/18 20:53:11 jimz
* fix up disk queueing (remove configure routine,
* add shutdown list arg to create routines)
*
* Revision 1.47 1996/06/14 14:16:36 jimz
* fix handling of bogus queue type
*
* Revision 1.46 1996/06/13 20:41:44 jimz
* add scan, cscan, random queueing
*
* Revision 1.45 1996/06/11 01:27:50 jimz
* Fixed bug where diskthread shutdown would crash or hang. This
* turned out to be two distinct bugs:
* (1) [crash] The thread shutdown code wasn't properly waiting for
* all the diskthreads to complete. This caused diskthreads that were
* exiting+cleaning up to unlock a destroyed mutex.
* (2) [hang] TerminateDiskQueues wasn't locking, and DiskIODequeue
* only checked for termination _after_ a wakeup if the queues were
* empty. This was a race where the termination wakeup could be lost
* by the dequeueing thread, and the system would hang waiting for the
* thread to exit, while the thread waited for an I/O or a signal to
* check the termination flag.
*
* Revision 1.44 1996/06/10 11:55:47 jimz
* Straightened out some per-array/not-per-array distinctions, fixed
* a couple bugs related to confusion. Added shutdown lists. Removed
* layout shutdown function (now subsumed by shutdown lists).
*
* Revision 1.43 1996/06/09 02:36:46 jimz
* lots of little crufty cleanup- fixup whitespace
* issues, comment #ifdefs, improve typing in some
* places (esp size-related)
*
* Revision 1.42 1996/06/07 22:26:27 jimz
* type-ify which_ru (RF_ReconUnitNum_t)
*
* Revision 1.41 1996/06/07 21:33:04 jimz
* begin using consistent types for sector numbers,
* stripe numbers, row+col numbers, recon unit numbers
*
* Revision 1.40 1996/06/06 17:28:04 jimz
* track sector number of last I/O dequeued
*
* Revision 1.39 1996/06/06 01:14:13 jimz
* fix crashing bug when tracerec is NULL (ie, from copyback)
* initialize req->queue
*
* Revision 1.38 1996/06/05 19:38:32 jimz
* fixed up disk queueing types config
* added sstf disk queueing
* fixed exit bug on diskthreads (ref-ing bad mem)
*
* Revision 1.37 1996/06/05 18:06:02 jimz
* Major code cleanup. The Great Renaming is now done.
* Better modularity. Better typing. Fixed a bunch of
* synchronization bugs. Made a lot of global stuff
* per-desc or per-array. Removed dead code.
*
* Revision 1.36 1996/05/30 23:22:16 jimz
* bugfixes of serialization, timing problems
* more cleanup
*
* Revision 1.35 1996/05/30 12:59:18 jimz
* make etimer happier, more portable
*
* Revision 1.34 1996/05/30 11:29:41 jimz
* Numerous bug fixes. Stripe lock release code disagreed with the taking code
* about when stripes should be locked (I made it consistent: no parity, no lock)
* There was a lot of extra serialization of I/Os which I've removed- a lot of
* it was to calculate values for the cache code, which is no longer with us.
* More types, function, macro cleanup. Added code to properly quiesce the array
* on shutdown. Made a lot of stuff array-specific which was (bogusly) general
* before. Fixed memory allocation, freeing bugs.
*
* Revision 1.33 1996/05/27 18:56:37 jimz
* more code cleanup
* better typing
* compiles in all 3 environments
*
* Revision 1.32 1996/05/24 22:17:04 jimz
* continue code + namespace cleanup
* typed a bunch of flags
*
* Revision 1.31 1996/05/24 01:59:45 jimz
* another checkpoint in code cleanup for release
* time to sync kernel tree
*
* Revision 1.30 1996/05/23 21:46:35 jimz
* checkpoint in code cleanup (release prep)
* lots of types, function names have been fixed
*
* Revision 1.29 1996/05/23 00:33:23 jimz
* code cleanup: move all debug decls to rf_options.c, all extern
* debug decls to rf_options.h, all debug vars preceded by rf_
*
* Revision 1.28 1996/05/20 16:14:29 jimz
* switch to rf_{mutex,cond}_{init,destroy}
*
* Revision 1.27 1996/05/18 19:51:34 jimz
* major code cleanup- fix syntax, make some types consistent,
* add prototypes, clean out dead code, et cetera
*
* Revision 1.26 1996/05/16 19:21:49 wvcii
* fixed typo in init_dqd
*
* Revision 1.25 1996/05/16 16:02:51 jimz
* switch to RF_FREELIST stuff for DiskQueueData
*
* Revision 1.24 1996/05/10 16:24:14 jimz
* new cvscan function names
*
* Revision 1.23 1996/05/01 16:27:54 jimz
* don't use ccmn bp management
*
* Revision 1.22 1995/12/12 18:10:06 jimz
* MIN -> RF_MIN, MAX -> RF_MAX, ASSERT -> RF_ASSERT
* fix 80-column brain damage in comments
*
* Revision 1.21 1995/12/01 15:59:59 root
* added copyright info
*
* Revision 1.20 1995/11/07 16:27:20 wvcii
* added Peek() function to diskqueuesw
* non-locking accesses are never blocked (assume clients enforce proper
* respect for lock acquisition)
*
* Revision 1.19 1995/10/05 18:56:52 jimz
* fix req handling in IOComplete
*
* Revision 1.18 1995/10/04 20:13:50 wvcii
* added asserts to monitor numOutstanding queueLength
*
* Revision 1.17 1995/10/04 07:43:52 wvcii
* queue->numOutstanding now valid for user & sim
* added queue->queueLength
* user tested & verified, sim untested
*
* Revision 1.16 1995/09/12 00:21:19 wvcii
* added support for tracing disk queue time
*
*/
#include "rf_types.h"
#include "rf_threadstuff.h"
#include "rf_threadid.h"
#include "rf_raid.h"
#include "rf_diskqueue.h"
#include "rf_alloclist.h"
#include "rf_acctrace.h"
#include "rf_etimer.h"
#include "rf_configure.h"
#include "rf_general.h"
#include "rf_freelist.h"
#include "rf_debugprint.h"
#include "rf_shutdown.h"
#include "rf_cvscan.h"
#include "rf_sstf.h"
#include "rf_fifo.h"
#ifdef SIMULATE
#include "rf_diskevent.h"
#endif /* SIMULATE */
#if !defined(__NetBSD__)
extern struct buf *ubc_bufget();
#endif
static int init_dqd(RF_DiskQueueData_t *);
static void clean_dqd(RF_DiskQueueData_t *);
static void rf_ShutdownDiskQueueSystem(void *);
/* From rf_kintf.c */
int rf_DispatchKernelIO(RF_DiskQueue_t *,RF_DiskQueueData_t *);
#define Dprintf1(s,a) if (rf_queueDebug) rf_debug_printf(s,(void *)((unsigned long)a),NULL,NULL,NULL,NULL,NULL,NULL,NULL)
#define Dprintf2(s,a,b) if (rf_queueDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),NULL,NULL,NULL,NULL,NULL,NULL)
#define Dprintf3(s,a,b,c) if (rf_queueDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),NULL,NULL,NULL,NULL,NULL)
#define Dprintf4(s,a,b,c,d) if (rf_queueDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),(void *)((unsigned long)d),NULL,NULL,NULL,NULL)
#define Dprintf5(s,a,b,c,d,e) if (rf_queueDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),(void *)((unsigned long)d),(void *)((unsigned long)e),NULL,NULL,NULL)
#if !defined(KERNEL) && !defined(SIMULATE)
/* queue must be locked before invoking this */
#define SIGNAL_DISK_QUEUE(_q_,_wh_) \
{ \
if ( (_q_)->numWaiting > 0) { \
(_q_)->numWaiting--; \
RF_SIGNAL_COND( ((_q_)->cond) ); \
} \
}
/* queue must be locked before invoking this */
#define WAIT_DISK_QUEUE(_q_,_wh_) \
{ \
(_q_)->numWaiting++; \
RF_WAIT_COND( ((_q_)->cond), ((_q_)->mutex) ); \
}
#else /* !defined(KERNEL) && !defined(SIMULATE) */
#define SIGNAL_DISK_QUEUE(_q_,_wh_)
#define WAIT_DISK_QUEUE(_q_,_wh_)
#endif /* !defined(KERNEL) && !defined(SIMULATE) */
/*****************************************************************************************
*
* the disk queue switch defines all the functions used in the different queueing
* disciplines
* queue ID, init routine, enqueue routine, dequeue routine
*
****************************************************************************************/
static RF_DiskQueueSW_t diskqueuesw[] = {
{"fifo", /* FIFO */
rf_FifoCreate,
rf_FifoEnqueue,
rf_FifoDequeue,
rf_FifoPeek,
rf_FifoPromote},
{"cvscan", /* cvscan */
rf_CvscanCreate,
rf_CvscanEnqueue,
rf_CvscanDequeue,
rf_CvscanPeek,
rf_CvscanPromote },
{"sstf", /* shortest seek time first */
rf_SstfCreate,
rf_SstfEnqueue,
rf_SstfDequeue,
rf_SstfPeek,
rf_SstfPromote},
{"scan", /* SCAN (two-way elevator) */
rf_ScanCreate,
rf_SstfEnqueue,
rf_ScanDequeue,
rf_ScanPeek,
rf_SstfPromote},
{"cscan", /* CSCAN (one-way elevator) */
rf_CscanCreate,
rf_SstfEnqueue,
rf_CscanDequeue,
rf_CscanPeek,
rf_SstfPromote},
#if !defined(KERNEL) && RF_INCLUDE_QUEUE_RANDOM > 0
/* to make a point to Chris :-> */
{"random", /* random */
rf_FifoCreate,
rf_FifoEnqueue,
rf_RandomDequeue,
rf_RandomPeek,
rf_FifoPromote},
#endif /* !KERNEL && RF_INCLUDE_QUEUE_RANDOM > 0 */
};
#define NUM_DISK_QUEUE_TYPES (sizeof(diskqueuesw)/sizeof(RF_DiskQueueSW_t))
static RF_FreeList_t *rf_dqd_freelist;
#define RF_MAX_FREE_DQD 256
#define RF_DQD_INC 16
#define RF_DQD_INITIAL 64
#ifdef __NetBSD__
#ifdef _KERNEL
#include <sys/buf.h>
#endif
#endif
static int init_dqd(dqd)
RF_DiskQueueData_t *dqd;
{
#ifdef KERNEL
#ifdef __NetBSD__
/* XXX not sure if the following malloc is appropriate... probably not quite... */
dqd->bp = (struct buf *) malloc( sizeof(struct buf), M_DEVBUF, M_NOWAIT);
memset(dqd->bp,0,sizeof(struct buf)); /* if you don't do it, nobody else will.. */
/* XXX */
/* printf("NEED TO IMPLEMENT THIS BETTER!\n"); */
#else
dqd->bp = ubc_bufget();
#endif
if (dqd->bp == NULL) {
return(ENOMEM);
}
#endif /* KERNEL */
return(0);
}
static void clean_dqd(dqd)
RF_DiskQueueData_t *dqd;
{
#ifdef KERNEL
#ifdef __NetBSD__
/* printf("NEED TO IMPLEMENT THIS BETTER(2)!\n"); */
/* XXX ? */
free( dqd->bp, M_DEVBUF );
#else
ubc_buffree(dqd->bp);
#endif
#endif /* KERNEL */
}
/* configures a single disk queue */
static int config_disk_queue(
RF_Raid_t *raidPtr,
RF_DiskQueue_t *diskqueue,
RF_RowCol_t r, /* row & col -- debug only. BZZT not any more... */
RF_RowCol_t c,
RF_DiskQueueSW_t *p,
RF_SectorCount_t sectPerDisk,
dev_t dev,
int maxOutstanding,
RF_ShutdownList_t **listp,
RF_AllocListElem_t *clList)
{
int rc;
diskqueue->row = r;
diskqueue->col = c;
diskqueue->qPtr = p;
diskqueue->qHdr = (p->Create)(sectPerDisk, clList, listp);
diskqueue->dev = dev;
diskqueue->numOutstanding = 0;
diskqueue->queueLength = 0;
diskqueue->maxOutstanding = maxOutstanding;
diskqueue->curPriority = RF_IO_NORMAL_PRIORITY;
diskqueue->nextLockingOp = NULL;
diskqueue->unlockingOp = NULL;
diskqueue->numWaiting=0;
diskqueue->flags = 0;
diskqueue->raidPtr = raidPtr;
#if defined(__NetBSD__) && defined(_KERNEL)
diskqueue->rf_cinfo = &raidPtr->raid_cinfo[r][c];
#endif
rc = rf_create_managed_mutex(listp, &diskqueue->mutex);
if (rc) {
RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__,
__LINE__, rc);
return(rc);
}
rc = rf_create_managed_cond(listp, &diskqueue->cond);
if (rc) {
RF_ERRORMSG3("Unable to init cond file %s line %d rc=%d\n", __FILE__,
__LINE__, rc);
return(rc);
}
return(0);
}
static void rf_ShutdownDiskQueueSystem(ignored)
void *ignored;
{
RF_FREELIST_DESTROY_CLEAN(rf_dqd_freelist,next,(RF_DiskQueueData_t *),clean_dqd);
}
int rf_ConfigureDiskQueueSystem(listp)
RF_ShutdownList_t **listp;
{
int rc;
RF_FREELIST_CREATE(rf_dqd_freelist, RF_MAX_FREE_DQD,
RF_DQD_INC, sizeof(RF_DiskQueueData_t));
if (rf_dqd_freelist == NULL)
return(ENOMEM);
rc = rf_ShutdownCreate(listp, rf_ShutdownDiskQueueSystem, NULL);
if (rc) {
RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n",
__FILE__, __LINE__, rc);
rf_ShutdownDiskQueueSystem(NULL);
return(rc);
}
RF_FREELIST_PRIME_INIT(rf_dqd_freelist, RF_DQD_INITIAL,next,
(RF_DiskQueueData_t *),init_dqd);
return(0);
}
#ifndef KERNEL
/* this is called prior to shutdown to wakeup everyone waiting on a disk queue
* and tell them to exit
*/
void rf_TerminateDiskQueues(raidPtr)
RF_Raid_t *raidPtr;
{
RF_RowCol_t r, c;
raidPtr->terminate_disk_queues = 1;
for (r=0; r<raidPtr->numRow; r++) {
for (c=0; c<raidPtr->numCol + ((r==0) ? raidPtr->numSpare : 0); c++) {
RF_LOCK_QUEUE_MUTEX(&raidPtr->Queues[r][c], "TerminateDiskQueues");
RF_BROADCAST_COND(raidPtr->Queues[r][c].cond);
RF_UNLOCK_QUEUE_MUTEX(&raidPtr->Queues[r][c], "TerminateDiskQueues");
}
}
}
#endif /* !KERNEL */
int rf_ConfigureDiskQueues(
RF_ShutdownList_t **listp,
RF_Raid_t *raidPtr,
RF_Config_t *cfgPtr)
{
RF_DiskQueue_t **diskQueues, *spareQueues;
RF_DiskQueueSW_t *p;
RF_RowCol_t r, c;
int rc, i;
raidPtr->maxQueueDepth = cfgPtr->maxOutstandingDiskReqs;
for(p=NULL,i=0;i<NUM_DISK_QUEUE_TYPES;i++) {
if (!strcmp(diskqueuesw[i].queueType, cfgPtr->diskQueueType)) {
p = &diskqueuesw[i];
break;
}
}
if (p == NULL) {
RF_ERRORMSG2("Unknown queue type \"%s\". Using %s\n",cfgPtr->diskQueueType, diskqueuesw[0].queueType);
p = &diskqueuesw[0];
}
RF_CallocAndAdd(diskQueues, raidPtr->numRow, sizeof(RF_DiskQueue_t *), (RF_DiskQueue_t **), raidPtr->cleanupList);
if (diskQueues == NULL) {
return(ENOMEM);
}
raidPtr->Queues = diskQueues;
for (r=0; r<raidPtr->numRow; r++) {
RF_CallocAndAdd(diskQueues[r], raidPtr->numCol + ((r==0) ? raidPtr->numSpare : 0), sizeof(RF_DiskQueue_t), (RF_DiskQueue_t *), raidPtr->cleanupList);
if (diskQueues[r] == NULL)
return(ENOMEM);
for (c=0; c<raidPtr->numCol; c++) {
rc = config_disk_queue(raidPtr, &diskQueues[r][c], r, c, p,
raidPtr->sectorsPerDisk, raidPtr->Disks[r][c].dev,
cfgPtr->maxOutstandingDiskReqs, listp, raidPtr->cleanupList);
if (rc)
return(rc);
}
}
spareQueues = &raidPtr->Queues[0][raidPtr->numCol];
for (r=0; r<raidPtr->numSpare; r++) {
rc = config_disk_queue(raidPtr, &spareQueues[r],
0, raidPtr->numCol+r, p,
raidPtr->sectorsPerDisk,
raidPtr->Disks[0][raidPtr->numCol+r].dev,
cfgPtr->maxOutstandingDiskReqs, listp,
raidPtr->cleanupList);
if (rc)
return(rc);
}
return(0);
}
/* Enqueue a disk I/O
*
* Unfortunately, we have to do things differently in the different
* environments (simulator, user-level, kernel).
* At user level, all I/O is blocking, so we have 1 or more threads/disk
* and the thread that enqueues is different from the thread that dequeues.
* In the kernel, I/O is non-blocking and so we'd like to have multiple
* I/Os outstanding on the physical disks when possible.
*
* when any request arrives at a queue, we have two choices:
* dispatch it to the lower levels
* queue it up
*
* kernel rules for when to do what:
* locking request: queue empty => dispatch and lock queue,
* else queue it
* unlocking req : always dispatch it
* normal req : queue empty => dispatch it & set priority
* queue not full & priority is ok => dispatch it
* else queue it
*
* user-level rules:
* always enqueue. In the special case of an unlocking op, enqueue
* in a special way that will cause the unlocking op to be the next
* thing dequeued.
*
* simulator rules:
* Do the same as at user level, with the sleeps and wakeups suppressed.
*/
void rf_DiskIOEnqueue(queue, req, pri)
RF_DiskQueue_t *queue;
RF_DiskQueueData_t *req;
int pri;
{
int tid;
RF_ETIMER_START(req->qtime);
rf_get_threadid(tid);
RF_ASSERT(req->type == RF_IO_TYPE_NOP || req->numSector);
req->priority = pri;
if (rf_queueDebug && (req->numSector == 0)) {
printf("Warning: Enqueueing zero-sector access\n");
}
#ifdef KERNEL
/*
* kernel
*/
RF_LOCK_QUEUE_MUTEX( queue, "DiskIOEnqueue" );
/* locking request */
if (RF_LOCKING_REQ(req)) {
if (RF_QUEUE_EMPTY(queue)) {
Dprintf3("Dispatching pri %d locking op to r %d c %d (queue empty)\n",pri,queue->row, queue->col);
RF_LOCK_QUEUE(queue);
rf_DispatchKernelIO(queue, req);
} else {
queue->queueLength++; /* increment count of number of requests waiting in this queue */
Dprintf3("Enqueueing pri %d locking op to r %d c %d (queue not empty)\n",pri,queue->row, queue->col);
req->queue = (void *)queue;
(queue->qPtr->Enqueue)(queue->qHdr, req, pri);
}
}
/* unlocking request */
else if (RF_UNLOCKING_REQ(req)) { /* we'll do the actual unlock when this I/O completes */
Dprintf3("Dispatching pri %d unlocking op to r %d c %d\n",pri,queue->row, queue->col);
RF_ASSERT(RF_QUEUE_LOCKED(queue));
rf_DispatchKernelIO(queue, req);
}
/* normal request */
else if (RF_OK_TO_DISPATCH(queue, req)) {
Dprintf3("Dispatching pri %d regular op to r %d c %d (ok to dispatch)\n",pri,queue->row, queue->col);
rf_DispatchKernelIO(queue, req);
} else {
queue->queueLength++; /* increment count of number of requests waiting in this queue */
Dprintf3("Enqueueing pri %d regular op to r %d c %d (not ok to dispatch)\n",pri,queue->row, queue->col);
req->queue = (void *)queue;
(queue->qPtr->Enqueue)(queue->qHdr, req, pri);
}
RF_UNLOCK_QUEUE_MUTEX( queue, "DiskIOEnqueue" );
#else /* KERNEL */
/*
* user-level
*/
RF_LOCK_QUEUE_MUTEX( queue, "DiskIOEnqueue" );
queue->queueLength++; /* increment count of number of requests waiting in this queue */
/* unlocking request */
if (RF_UNLOCKING_REQ(req)) {
Dprintf4("[%d] enqueueing pri %d unlocking op & signalling r %d c %d\n", tid, pri, queue->row, queue->col);
RF_ASSERT(RF_QUEUE_LOCKED(queue) && queue->unlockingOp == NULL);
queue->unlockingOp = req;
}
/* locking and normal requests */
else {
req->queue = (void *)queue;
Dprintf5("[%d] enqueueing pri %d %s op & signalling r %d c %d\n", tid, pri,
(RF_LOCKING_REQ(req)) ? "locking" : "regular",queue->row,queue->col);
(queue->qPtr->Enqueue)(queue->qHdr, req, pri);
}
SIGNAL_DISK_QUEUE( queue, "DiskIOEnqueue");
RF_UNLOCK_QUEUE_MUTEX( queue, "DiskIOEnqueue" );
#endif /* KERNEL */
}
#if !defined(KERNEL) && !defined(SIMULATE)
/* user-level only: tell all threads to wake up & recheck the queue */
void rf_BroadcastOnQueue(queue)
RF_DiskQueue_t *queue;
{
int i;
if (queue->maxOutstanding > 1) for (i=0; i<queue->maxOutstanding; i++) {
SIGNAL_DISK_QUEUE(queue, "BroadcastOnQueue" );
}
}
#endif /* !KERNEL && !SIMULATE */
#ifndef KERNEL /* not used in kernel */
RF_DiskQueueData_t *rf_DiskIODequeue(queue)
RF_DiskQueue_t *queue;
{
RF_DiskQueueData_t *p, *headItem;
int tid;
rf_get_threadid(tid);
RF_LOCK_QUEUE_MUTEX( queue, "DiskIODequeue" );
for (p=NULL; !p; ) {
if (queue->unlockingOp) {
/* unlocking request */
RF_ASSERT(RF_QUEUE_LOCKED(queue));
p = queue->unlockingOp;
queue->unlockingOp = NULL;
Dprintf4("[%d] dequeueing pri %d unlocking op r %d c %d\n", tid, p->priority, queue->row,queue->col);
}
else {
headItem = (queue->qPtr->Peek)(queue->qHdr);
if (headItem) {
if (RF_LOCKING_REQ(headItem)) {
/* locking request */
if (!RF_QUEUE_LOCKED(queue)) {
/* queue isn't locked, so dequeue the request & lock the queue */
p = (queue->qPtr->Dequeue)( queue->qHdr );
if (p)
Dprintf4("[%d] dequeueing pri %d locking op r %d c %d\n", tid, p->priority, queue->row, queue->col);
else
Dprintf3("[%d] no dequeue -- raw queue empty r %d c %d\n", tid, queue->row, queue->col);
}
else {
/* queue already locked, no dequeue occurs */
Dprintf3("[%d] no dequeue -- queue is locked r %d c %d\n", tid, queue->row, queue->col);
p = NULL;
}
}
else {
/* normal request, always dequeue and assume caller already has lock (if needed) */
p = (queue->qPtr->Dequeue)( queue->qHdr );
if (p)
Dprintf4("[%d] dequeueing pri %d regular op r %d c %d\n", tid, p->priority, queue->row, queue->col);
else
Dprintf3("[%d] no dequeue -- raw queue empty r %d c %d\n", tid, queue->row, queue->col);
}
}
else {
Dprintf3("[%d] no dequeue -- raw queue empty r %d c %d\n", tid, queue->row, queue->col);
}
}
if (queue->raidPtr->terminate_disk_queues) {
p = NULL;
break;
}
#ifdef SIMULATE
break; /* in simulator, return NULL on empty queue instead of blocking */
#else /* SIMULATE */
if (!p) {
Dprintf3("[%d] nothing to dequeue: waiting r %d c %d\n", tid, queue->row, queue->col);
WAIT_DISK_QUEUE( queue, "DiskIODequeue" );
}
#endif /* SIMULATE */
}
if (p) {
queue->queueLength--; /* decrement count of number of requests waiting in this queue */
RF_ASSERT(queue->queueLength >= 0);
queue->numOutstanding++;
queue->last_deq_sector = p->sectorOffset;
/* record the amount of time this request spent in the disk queue */
RF_ETIMER_STOP(p->qtime);
RF_ETIMER_EVAL(p->qtime);
if (p->tracerec)
p->tracerec->diskqueue_us += RF_ETIMER_VAL_US(p->qtime);
}
if (p && RF_LOCKING_REQ(p)) {
RF_ASSERT(!RF_QUEUE_LOCKED(queue));
Dprintf3("[%d] locking queue r %d c %d\n",tid,queue->row,queue->col);
RF_LOCK_QUEUE(queue);
}
RF_UNLOCK_QUEUE_MUTEX( queue, "DiskIODequeue" );
return(p);
}
#else /* !KERNEL */
/* get the next set of I/Os started, kernel version only */
void rf_DiskIOComplete(queue, req, status)
RF_DiskQueue_t *queue;
RF_DiskQueueData_t *req;
int status;
{
int done=0;
RF_LOCK_QUEUE_MUTEX( queue, "DiskIOComplete" );
/* unlock the queue:
(1) after an unlocking req completes
(2) after a locking req fails
*/
if (RF_UNLOCKING_REQ(req) || (RF_LOCKING_REQ(req) && status)) {
Dprintf2("DiskIOComplete: unlocking queue at r %d c %d\n", queue->row, queue->col);
RF_ASSERT(RF_QUEUE_LOCKED(queue) && (queue->unlockingOp == NULL));
RF_UNLOCK_QUEUE(queue);
}
queue->numOutstanding--;
RF_ASSERT(queue->numOutstanding >= 0);
/* dispatch requests to the disk until we find one that we can't. */
/* no reason to continue once we've filled up the queue */
/* no reason to even start if the queue is locked */
while (!done && !RF_QUEUE_FULL(queue) && !RF_QUEUE_LOCKED(queue)) {
if (queue->nextLockingOp) {
req = queue->nextLockingOp; queue->nextLockingOp = NULL;
Dprintf3("DiskIOComplete: a pri %d locking req was pending at r %d c %d\n",req->priority,queue->row, queue->col);
} else {
req = (queue->qPtr->Dequeue)( queue->qHdr );
Dprintf3("DiskIOComplete: extracting pri %d req from queue at r %d c %d\n",req->priority,queue->row, queue->col);
}
if (req) {
queue->queueLength--; /* decrement count of number of requests waiting in this queue */
RF_ASSERT(queue->queueLength >= 0);
}
if (!req) done=1;
else if (RF_LOCKING_REQ(req)) {
if (RF_QUEUE_EMPTY(queue)) { /* dispatch it */
Dprintf3("DiskIOComplete: dispatching pri %d locking req to r %d c %d (queue empty)\n",req->priority,queue->row, queue->col);
RF_LOCK_QUEUE(queue);
rf_DispatchKernelIO(queue, req);
done = 1;
} else { /* put it aside to wait for the queue to drain */
Dprintf3("DiskIOComplete: postponing pri %d locking req to r %d c %d\n",req->priority,queue->row, queue->col);
RF_ASSERT(queue->nextLockingOp == NULL);
queue->nextLockingOp = req;
done = 1;
}
} else if (RF_UNLOCKING_REQ(req)) { /* should not happen: unlocking ops should not get queued */
RF_ASSERT(RF_QUEUE_LOCKED(queue)); /* support it anyway for the future */
Dprintf3("DiskIOComplete: dispatching pri %d unl req to r %d c %d (SHOULD NOT SEE THIS)\n",req->priority,queue->row, queue->col);
rf_DispatchKernelIO(queue, req);
done = 1;
} else if (RF_OK_TO_DISPATCH(queue, req)) {
Dprintf3("DiskIOComplete: dispatching pri %d regular req to r %d c %d (ok to dispatch)\n",req->priority,queue->row, queue->col);
rf_DispatchKernelIO(queue, req);
} else { /* we can't dispatch it, so just re-enqueue it. */
/* potential trouble here if disk queues batch reqs */
Dprintf3("DiskIOComplete: re-enqueueing pri %d regular req to r %d c %d\n",req->priority,queue->row, queue->col);
queue->queueLength++;
(queue->qPtr->Enqueue)(queue->qHdr, req, req->priority);
done = 1;
}
}
RF_UNLOCK_QUEUE_MUTEX( queue, "DiskIOComplete" );
}
#endif /* !KERNEL */
/* promotes accesses tagged with the given parityStripeID from low priority
* to normal priority. This promotion is optional, meaning that a queue
* need not implement it. If there is no promotion routine associated with
* a queue, this routine does nothing and returns -1.
*/
int rf_DiskIOPromote(queue, parityStripeID, which_ru)
RF_DiskQueue_t *queue;
RF_StripeNum_t parityStripeID;
RF_ReconUnitNum_t which_ru;
{
int retval;
if (!queue->qPtr->Promote)
return(-1);
RF_LOCK_QUEUE_MUTEX( queue, "DiskIOPromote" );
retval = (queue->qPtr->Promote)( queue->qHdr, parityStripeID, which_ru );
RF_UNLOCK_QUEUE_MUTEX( queue, "DiskIOPromote" );
return(retval);
}
RF_DiskQueueData_t *rf_CreateDiskQueueData(
RF_IoType_t typ,
RF_SectorNum_t ssect,
RF_SectorCount_t nsect,
caddr_t buf,
RF_StripeNum_t parityStripeID,
RF_ReconUnitNum_t which_ru,
int (*wakeF)(void *,int),
void *arg,
RF_DiskQueueData_t *next,
RF_AccTraceEntry_t *tracerec,
void *raidPtr,
RF_DiskQueueDataFlags_t flags,
void *kb_proc)
{
RF_DiskQueueData_t *p;
RF_FREELIST_GET_INIT(rf_dqd_freelist,p,next,(RF_DiskQueueData_t *),init_dqd);
p->sectorOffset = ssect + rf_protectedSectors;
p->numSector = nsect;
p->type = typ;
p->buf = buf;
p->parityStripeID= parityStripeID;
p->which_ru = which_ru;
p->CompleteFunc = wakeF;
p->argument = arg;
p->next = next;
p->tracerec = tracerec;
p->priority = RF_IO_NORMAL_PRIORITY;
p->AuxFunc = NULL;
p->buf2 = NULL;
#ifdef SIMULATE
p->owner = rf_GetCurrentOwner();
#endif /* SIMULATE */
p->raidPtr = raidPtr;
p->flags = flags;
#ifdef KERNEL
p->b_proc = kb_proc;
#endif /* KERNEL */
return(p);
}
RF_DiskQueueData_t *rf_CreateDiskQueueDataFull(
RF_IoType_t typ,
RF_SectorNum_t ssect,
RF_SectorCount_t nsect,
caddr_t buf,
RF_StripeNum_t parityStripeID,
RF_ReconUnitNum_t which_ru,
int (*wakeF)(void *,int),
void *arg,
RF_DiskQueueData_t *next,
RF_AccTraceEntry_t *tracerec,
int priority,
int (*AuxFunc)(void *,...),
caddr_t buf2,
void *raidPtr,
RF_DiskQueueDataFlags_t flags,
void *kb_proc)
{
RF_DiskQueueData_t *p;
RF_FREELIST_GET_INIT(rf_dqd_freelist,p,next,(RF_DiskQueueData_t *),init_dqd);
p->sectorOffset = ssect + rf_protectedSectors;
p->numSector = nsect;
p->type = typ;
p->buf = buf;
p->parityStripeID= parityStripeID;
p->which_ru = which_ru;
p->CompleteFunc = wakeF;
p->argument = arg;
p->next = next;
p->tracerec = tracerec;
p->priority = priority;
p->AuxFunc = AuxFunc;
p->buf2 = buf2;
#ifdef SIMULATE
p->owner = rf_GetCurrentOwner();
#endif /* SIMULATE */
p->raidPtr = raidPtr;
p->flags = flags;
#ifdef KERNEL
p->b_proc = kb_proc;
#endif /* KERNEL */
return(p);
}
void rf_FreeDiskQueueData(p)
RF_DiskQueueData_t *p;
{
RF_FREELIST_FREE_CLEAN(rf_dqd_freelist,p,next,clean_dqd);
}

View File

@ -0,0 +1,310 @@
/* $NetBSD: rf_diskqueue.h,v 1.1 1998/11/13 04:20:29 oster Exp $ */
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
*
* Author: Mark Holland
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/*****************************************************************************************
*
* rf_diskqueue.h -- header file for disk queues
*
* see comments in rf_diskqueue.c
*
****************************************************************************************/
/*
*
* :
*
* Log: rf_diskqueue.h,v
* Revision 1.31 1996/08/07 21:08:49 jimz
* b_proc -> kb_proc (IRIX complained)
*
* Revision 1.30 1996/06/18 20:53:11 jimz
* fix up disk queueing (remove configure routine,
* add shutdown list arg to create routines)
*
* Revision 1.29 1996/06/13 20:38:19 jimz
* fix queue type in DiskQueueData
*
* Revision 1.28 1996/06/10 11:55:47 jimz
* Straightened out some per-array/not-per-array distinctions, fixed
* a couple bugs related to confusion. Added shutdown lists. Removed
* layout shutdown function (now subsumed by shutdown lists).
*
* Revision 1.27 1996/06/07 22:26:27 jimz
* type-ify which_ru (RF_ReconUnitNum_t)
*
* Revision 1.26 1996/06/07 21:33:04 jimz
* begin using consistent types for sector numbers,
* stripe numbers, row+col numbers, recon unit numbers
*
* Revision 1.25 1996/06/06 17:29:12 jimz
* track arm position of last I/O dequeued
*
* Revision 1.24 1996/06/05 18:06:02 jimz
* Major code cleanup. The Great Renaming is now done.
* Better modularity. Better typing. Fixed a bunch of
* synchronization bugs. Made a lot of global stuff
* per-desc or per-array. Removed dead code.
*
* Revision 1.23 1996/06/02 17:31:48 jimz
* Moved a lot of global stuff into array structure, where it belongs.
* Fixed up paritylogging, pss modules in this manner. Some general
* code cleanup. Removed lots of dead code, some dead files.
*
* Revision 1.22 1996/05/30 23:22:16 jimz
* bugfixes of serialization, timing problems
* more cleanup
*
* Revision 1.21 1996/05/30 11:29:41 jimz
* Numerous bug fixes. Stripe lock release code disagreed with the taking code
* about when stripes should be locked (I made it consistent: no parity, no lock)
* There was a lot of extra serialization of I/Os which I've removed- a lot of
* it was to calculate values for the cache code, which is no longer with us.
* More types, function, macro cleanup. Added code to properly quiesce the array
* on shutdown. Made a lot of stuff array-specific which was (bogusly) general
* before. Fixed memory allocation, freeing bugs.
*
* Revision 1.20 1996/05/24 22:17:04 jimz
* continue code + namespace cleanup
* typed a bunch of flags
*
* Revision 1.19 1996/05/24 01:59:45 jimz
* another checkpoint in code cleanup for release
* time to sync kernel tree
*
* Revision 1.18 1996/05/23 21:46:35 jimz
* checkpoint in code cleanup (release prep)
* lots of types, function names have been fixed
*
* Revision 1.17 1996/05/23 00:33:23 jimz
* code cleanup: move all debug decls to rf_options.c, all extern
* debug decls to rf_options.h, all debug vars preceded by rf_
*
* Revision 1.16 1996/05/18 19:51:34 jimz
* major code cleanup- fix syntax, make some types consistent,
* add prototypes, clean out dead code, et cetera
*
* Revision 1.15 1996/05/10 19:39:31 jimz
* add prev pointer to DiskQueueData
*
* Revision 1.14 1996/05/10 16:24:04 jimz
* mark old defines as deprecated, add RF_ defines
*
* Revision 1.13 1995/12/01 15:59:04 root
* added copyright info
*
* Revision 1.12 1995/11/07 16:26:44 wvcii
* added Peek() function to diskqueuesw
*
* Revision 1.11 1995/10/05 02:33:15 jimz
* made queue lens longs (less instructions to read :-)
*
* Revision 1.10 1995/10/04 07:07:07 wvcii
* queue->numOutstanding now valid for user & sim
* user tested & verified, sim untested
*
* Revision 1.9 1995/09/12 00:21:37 wvcii
* added support for tracing disk queue time
*
* Revision 1.8 95/04/24 13:25:51 holland
* rewrite to move disk queues, recon, & atomic RMW to kernel
*
* Revision 1.6.10.2 1995/04/03 20:13:56 holland
* added numOutstanding and maxOutstanding to support moving
* disk queues into kernel code
*
* Revision 1.6.10.1 1995/04/03 20:03:56 holland
* initial checkin on branch
*
* Revision 1.6 1995/03/03 18:34:33 rachad
* Simulator mechanism added
*
* Revision 1.5 1995/03/01 20:25:48 holland
* kernelization changes
*
* Revision 1.4 1995/02/03 22:31:36 holland
* many changes related to kernelization
*
* Revision 1.3 1995/02/01 14:25:19 holland
* began changes for kernelization:
* changed all instances of mutex_t and cond_t to DECLARE macros
* converted configuration code to use config structure
*
* Revision 1.2 1994/11/29 20:36:02 danner
* Added symbolic constants for io_type (e.g,IO_TYPE_READ)
* and support for READ_OP_WRITE
*
*/
#ifndef _RF__RF_DISKQUEUE_H_
#define _RF__RF_DISKQUEUE_H_
#include "rf_threadstuff.h"
#include "rf_acctrace.h"
#include "rf_alloclist.h"
#include "rf_types.h"
#include "rf_etimer.h"
#if defined(__NetBSD__) && defined(_KERNEL)
#include "rf_netbsd.h"
#endif
#define RF_IO_NORMAL_PRIORITY 1
#define RF_IO_LOW_PRIORITY 0
/* the data held by a disk queue entry */
struct RF_DiskQueueData_s {
RF_SectorNum_t sectorOffset; /* sector offset into the disk */
RF_SectorCount_t numSector; /* number of sectors to read/write */
RF_IoType_t type; /* read/write/nop */
caddr_t buf; /* buffer pointer */
RF_StripeNum_t parityStripeID; /* the RAID parity stripe ID this access is for */
RF_ReconUnitNum_t which_ru; /* which RU within this parity stripe */
int priority; /* the priority of this request */
int (*CompleteFunc)(void *,int);/* function to be called upon completion */
int (*AuxFunc)(void *,...); /* function called upon completion of the first I/O of a Read_Op_Write pair*/
void *argument; /* argument to be passed to CompleteFunc */
#ifdef SIMULATE
RF_Owner_t owner; /* which task is responsible for this request */
#endif /* SIMULATE */
void *raidPtr; /* needed for simulation */
RF_AccTraceEntry_t *tracerec; /* perf mon only */
RF_Etimer_t qtime; /* perf mon only - time request is in queue */
long entryTime;
RF_DiskQueueData_t *next;
RF_DiskQueueData_t *prev;
caddr_t buf2; /* for read-op-write */
dev_t dev; /* the device number for in-kernel version */
RF_DiskQueue_t *queue; /* the disk queue to which this req is targeted */
RF_DiskQueueDataFlags_t flags; /* flags controlling operation */
#ifdef KERNEL
struct proc *b_proc; /* the b_proc from the original bp passed into the driver for this I/O */
struct buf *bp; /* a bp to use to get this I/O done */
#endif /* KERNEL */
};
#define RF_LOCK_DISK_QUEUE 0x01
#define RF_UNLOCK_DISK_QUEUE 0x02
/* note: "Create" returns type-specific queue header pointer cast to (void *) */
struct RF_DiskQueueSW_s {
RF_DiskQueueType_t queueType;
void *(*Create)(RF_SectorCount_t, RF_AllocListElem_t *, RF_ShutdownList_t **); /* creation routine -- one call per queue in system */
void (*Enqueue)(void *,RF_DiskQueueData_t * ,int); /* enqueue routine */
RF_DiskQueueData_t *(*Dequeue)(void *); /* dequeue routine */
RF_DiskQueueData_t *(*Peek)(void *); /* peek at head of queue */
/* the rest are optional: they improve performance, but the driver will deal with it if they don't exist */
int (*Promote)(void *, RF_StripeNum_t, RF_ReconUnitNum_t); /* promotes priority of tagged accesses */
};
struct RF_DiskQueue_s {
RF_DiskQueueSW_t *qPtr; /* access point to queue functions */
void *qHdr; /* queue header, of whatever type */
RF_DECLARE_MUTEX(mutex) /* mutex locking data structures */
RF_DECLARE_COND(cond) /* condition variable for synchronization */
long numOutstanding; /* number of I/Os currently outstanding on disk */
long maxOutstanding; /* max # of I/Os that can be outstanding on a disk (in-kernel only) */
int curPriority; /* the priority of accs all that are currently outstanding */
long queueLength; /* number of requests in queue */
RF_DiskQueueData_t *nextLockingOp; /* a locking op that has arrived at the head of the queue & is waiting for drainage */
RF_DiskQueueData_t *unlockingOp; /* used at user level to communicate unlocking op b/w user (or dag exec) & disk threads */
int numWaiting; /* number of threads waiting on this variable. user-level only */
RF_DiskQueueFlags_t flags; /* terminate, locked */
RF_Raid_t *raidPtr; /* associated array */
dev_t dev; /* device number for kernel version */
RF_SectorNum_t last_deq_sector; /* last sector number dequeued or dispatched */
int row, col; /* debug only */
#if defined(__NetBSD__) && defined(_KERNEL)
struct raidcinfo *rf_cinfo; /* disks component info.. */
#endif
};
#define RF_DQ_LOCKED 0x02 /* no new accs allowed until queue is explicitly unlocked */
/* macros setting & returning information about queues and requests */
#define RF_QUEUE_LOCKED(_q) ((_q)->flags & RF_DQ_LOCKED)
#define RF_QUEUE_EMPTY(_q) (((_q)->numOutstanding == 0) && ((_q)->nextLockingOp == NULL) && !RF_QUEUE_LOCKED(_q))
#define RF_QUEUE_FULL(_q) ((_q)->numOutstanding == (_q)->maxOutstanding)
#define RF_LOCK_QUEUE(_q) (_q)->flags |= RF_DQ_LOCKED
#define RF_UNLOCK_QUEUE(_q) (_q)->flags &= ~RF_DQ_LOCKED
#define RF_LOCK_QUEUE_MUTEX(_q_,_wh_) RF_LOCK_MUTEX((_q_)->mutex)
#define RF_UNLOCK_QUEUE_MUTEX(_q_,_wh_) RF_UNLOCK_MUTEX((_q_)->mutex)
#define RF_LOCKING_REQ(_r) ((_r)->flags & RF_LOCK_DISK_QUEUE)
#define RF_UNLOCKING_REQ(_r) ((_r)->flags & RF_UNLOCK_DISK_QUEUE)
/* whether it is ok to dispatch a regular request */
#define RF_OK_TO_DISPATCH(_q_,_r_) \
(RF_QUEUE_EMPTY(_q_) || \
(!RF_QUEUE_FULL(_q_) && ((_r_)->priority >= (_q_)->curPriority)))
int rf_ConfigureDiskQueueSystem(RF_ShutdownList_t **listp);
void rf_TerminateDiskQueues(RF_Raid_t *raidPtr);
int rf_ConfigureDiskQueues(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
RF_Config_t *cfgPtr);
void rf_DiskIOEnqueue(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req, int pri);
#if !defined(KERNEL) && !defined(SIMULATE)
void rf_BroadcastOnQueue(RF_DiskQueue_t *queue);
#endif /* !KERNEL && !SIMULATE */
#ifndef KERNEL
RF_DiskQueueData_t *rf_DiskIODequeue(RF_DiskQueue_t *queue);
#else /* !KERNEL */
void rf_DiskIOComplete(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req, int status);
#endif /* !KERNEL */
int rf_DiskIOPromote(RF_DiskQueue_t *queue, RF_StripeNum_t parityStripeID,
RF_ReconUnitNum_t which_ru);
RF_DiskQueueData_t *rf_CreateDiskQueueData(RF_IoType_t typ,
RF_SectorNum_t ssect, RF_SectorCount_t nsect, caddr_t buf,
RF_StripeNum_t parityStripeID, RF_ReconUnitNum_t which_ru,
int (*wakeF)(void *, int),
void *arg, RF_DiskQueueData_t *next, RF_AccTraceEntry_t *tracerec,
void *raidPtr, RF_DiskQueueDataFlags_t flags, void *kb_proc);
RF_DiskQueueData_t *rf_CreateDiskQueueDataFull(RF_IoType_t typ,
RF_SectorNum_t ssect, RF_SectorCount_t nsect, caddr_t buf,
RF_StripeNum_t parityStripeID, RF_ReconUnitNum_t which_ru,
int (*wakeF)(void *, int),
void *arg, RF_DiskQueueData_t *next, RF_AccTraceEntry_t *tracerec,
int priority, int (*AuxFunc)(void *,...), caddr_t buf2,
void *raidPtr, RF_DiskQueueDataFlags_t flags, void *kb_proc);
void rf_FreeDiskQueueData(RF_DiskQueueData_t *p);
#endif /* !_RF__RF_DISKQUEUE_H_ */

View File

@ -0,0 +1,632 @@
/* $NetBSD: rf_disks.c,v 1.1 1998/11/13 04:20:29 oster Exp $ */
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
*
* Author: Mark Holland
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/***************************************************************
* rf_disks.c -- code to perform operations on the actual disks
***************************************************************/
/* :
* Log: rf_disks.c,v
* Revision 1.32 1996/07/27 18:40:24 jimz
* cleanup sweep
*
* Revision 1.31 1996/07/22 19:52:16 jimz
* switched node params to RF_DagParam_t, a union of
* a 64-bit int and a void *, for better portability
* attempted hpux port, but failed partway through for
* lack of a single C compiler capable of compiling all
* source files
*
* Revision 1.30 1996/07/19 16:11:21 jimz
* pass devname to DoReadCapacity
*
* Revision 1.29 1996/07/18 22:57:14 jimz
* port simulator to AIX
*
* Revision 1.28 1996/07/10 22:28:38 jimz
* get rid of obsolete row statuses (dead,degraded2)
*
* Revision 1.27 1996/06/10 12:06:14 jimz
* don't do any SCSI op stuff in simulator at all
*
* Revision 1.26 1996/06/10 11:55:47 jimz
* Straightened out some per-array/not-per-array distinctions, fixed
* a couple bugs related to confusion. Added shutdown lists. Removed
* layout shutdown function (now subsumed by shutdown lists).
*
* Revision 1.25 1996/06/09 02:36:46 jimz
* lots of little crufty cleanup- fixup whitespace
* issues, comment #ifdefs, improve typing in some
* places (esp size-related)
*
* Revision 1.24 1996/06/07 21:33:04 jimz
* begin using consistent types for sector numbers,
* stripe numbers, row+col numbers, recon unit numbers
*
* Revision 1.23 1996/06/03 23:28:26 jimz
* more bugfixes
* check in tree to sync for IPDS runs with current bugfixes
* there still may be a problem with threads in the script test
* getting I/Os stuck- not trivially reproducible (runs ~50 times
* in a row without getting stuck)
*
* Revision 1.22 1996/06/02 17:31:48 jimz
* Moved a lot of global stuff into array structure, where it belongs.
* Fixed up paritylogging, pss modules in this manner. Some general
* code cleanup. Removed lots of dead code, some dead files.
*
* Revision 1.21 1996/05/30 23:22:16 jimz
* bugfixes of serialization, timing problems
* more cleanup
*
* Revision 1.20 1996/05/30 11:29:41 jimz
* Numerous bug fixes. Stripe lock release code disagreed with the taking code
* about when stripes should be locked (I made it consistent: no parity, no lock)
* There was a lot of extra serialization of I/Os which I've removed- a lot of
* it was to calculate values for the cache code, which is no longer with us.
* More types, function, macro cleanup. Added code to properly quiesce the array
* on shutdown. Made a lot of stuff array-specific which was (bogusly) general
* before. Fixed memory allocation, freeing bugs.
*
* Revision 1.19 1996/05/27 18:56:37 jimz
* more code cleanup
* better typing
* compiles in all 3 environments
*
* Revision 1.18 1996/05/24 22:17:04 jimz
* continue code + namespace cleanup
* typed a bunch of flags
*
* Revision 1.17 1996/05/24 01:59:45 jimz
* another checkpoint in code cleanup for release
* time to sync kernel tree
*
* Revision 1.16 1996/05/23 21:46:35 jimz
* checkpoint in code cleanup (release prep)
* lots of types, function names have been fixed
*
* Revision 1.15 1996/05/23 00:33:23 jimz
* code cleanup: move all debug decls to rf_options.c, all extern
* debug decls to rf_options.h, all debug vars preceded by rf_
*
* Revision 1.14 1996/05/18 19:51:34 jimz
* major code cleanup- fix syntax, make some types consistent,
* add prototypes, clean out dead code, et cetera
*
* Revision 1.13 1996/05/02 14:57:43 jimz
* initialize sectorMask
*
* Revision 1.12 1995/12/01 15:57:04 root
* added copyright info
*
*/
#include "rf_types.h"
#include "rf_raid.h"
#include "rf_alloclist.h"
#include "rf_utils.h"
#include "rf_configure.h"
#include "rf_general.h"
#if !defined(__NetBSD__)
#include "rf_camlayer.h"
#endif
#include "rf_options.h"
#include "rf_sys.h"
#if defined(__NetBSD__) && defined(_KERNEL)
#include <sys/types.h>
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/ioctl.h>
#include <sys/fcntl.h>
#include <sys/vnode.h>
int raidlookup __P((char *, struct proc *p, struct vnode **));
#endif
#ifdef SIMULATE
static char disk_db_file_name[120], disk_type_name[120];
static double init_offset;
#endif /* SIMULATE */
#define DPRINTF6(a,b,c,d,e,f) if (rf_diskDebug) printf(a,b,c,d,e,f)
#define DPRINTF7(a,b,c,d,e,f,g) if (rf_diskDebug) printf(a,b,c,d,e,f,g)
#include "rf_ccmn.h"
/****************************************************************************************
*
* initialize the disks comprising the array
*
* We want the spare disks to have regular row,col numbers so that we can easily
* substitue a spare for a failed disk. But, the driver code assumes throughout
* that the array contains numRow by numCol _non-spare_ disks, so it's not clear
* how to fit in the spares. This is an unfortunate holdover from raidSim. The
* quick and dirty fix is to make row zero bigger than the rest, and put all the
* spares in it. This probably needs to get changed eventually.
*
***************************************************************************************/
int rf_ConfigureDisks(
RF_ShutdownList_t **listp,
RF_Raid_t *raidPtr,
RF_Config_t *cfgPtr)
{
RF_RaidDisk_t **disks;
RF_SectorCount_t min_numblks = (RF_SectorCount_t)0x7FFFFFFFFFFFLL;
RF_RowCol_t r, c;
int bs, ret;
unsigned i, count, foundone=0, numFailuresThisRow;
RF_DiskOp_t *rdcap_op = NULL, *tur_op = NULL;
int num_rows_done,num_cols_done;
#if defined(__NetBSD__) && defined(_KERNEL)
struct proc *proc = 0;
#endif
#ifndef SIMULATE
#ifndef __NetBSD__
ret = rf_SCSI_AllocReadCapacity(&rdcap_op);
if (ret)
goto fail;
ret = rf_SCSI_AllocTUR(&tur_op);
if (ret)
goto fail;
#endif /* !__NetBSD__ */
#endif /* !SIMULATE */
num_rows_done = 0;
num_cols_done = 0;
RF_CallocAndAdd(disks, raidPtr->numRow, sizeof(RF_RaidDisk_t *), (RF_RaidDisk_t **), raidPtr->cleanupList);
if (disks == NULL) {
ret = ENOMEM;
goto fail;
}
raidPtr->Disks = disks;
#if defined(__NetBSD__) && defined(_KERNEL)
proc = raidPtr->proc; /* Blah XXX */
/* get space for the device-specific stuff... */
RF_CallocAndAdd(raidPtr->raid_cinfo, raidPtr->numRow,
sizeof(struct raidcinfo *), (struct raidcinfo **),
raidPtr->cleanupList);
if (raidPtr->raid_cinfo == NULL) {
ret = ENOMEM;
goto fail;
}
#endif
for (r=0; r<raidPtr->numRow; r++) {
numFailuresThisRow = 0;
RF_CallocAndAdd(disks[r], raidPtr->numCol + ((r==0) ? raidPtr->numSpare : 0), sizeof(RF_RaidDisk_t), (RF_RaidDisk_t *), raidPtr->cleanupList);
if (disks[r] == NULL) {
ret = ENOMEM;
goto fail;
}
/* get more space for device specific stuff.. */
RF_CallocAndAdd(raidPtr->raid_cinfo[r],
raidPtr->numCol + ((r==0) ? raidPtr->numSpare : 0),
sizeof(struct raidcinfo), (struct raidcinfo *),
raidPtr->cleanupList);
if (raidPtr->raid_cinfo[r] == NULL) {
ret = ENOMEM;
goto fail;
}
for (c=0; c<raidPtr->numCol; c++) {
ret = rf_ConfigureDisk(raidPtr,&cfgPtr->devnames[r][c][0],
&disks[r][c], rdcap_op, tur_op,
cfgPtr->devs[r][c],r,c);
if (ret)
goto fail;
if (disks[r][c].status != rf_ds_optimal) {
numFailuresThisRow++;
}
else {
if (disks[r][c].numBlocks < min_numblks)
min_numblks = disks[r][c].numBlocks;
DPRINTF7("Disk at row %d col %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n",
r,c,disks[r][c].devname,
(long int) disks[r][c].numBlocks,
disks[r][c].blockSize,
(long int) disks[r][c].numBlocks * disks[r][c].blockSize / 1024 / 1024);
}
num_cols_done++;
}
/* XXX fix for n-fault tolerant */
if (numFailuresThisRow > 0)
raidPtr->status[r] = rf_rs_degraded;
num_rows_done++;
}
#ifndef SIMULATE
#if defined(__NetBSD__) && defined(_KERNEL)
/* we do nothing */
#else
rf_SCSI_FreeDiskOp(rdcap_op, 1); rdcap_op = NULL;
rf_SCSI_FreeDiskOp(tur_op, 0); tur_op = NULL;
#endif
#endif /* !SIMULATE */
/* all disks must be the same size & have the same block size, bs must be a power of 2 */
bs = 0;
for (foundone=r=0; !foundone && r<raidPtr->numRow; r++) {
for (c=0; !foundone && c<raidPtr->numCol; c++) {
if (disks[r][c].status == rf_ds_optimal) {
bs = disks[r][c].blockSize;
foundone = 1;
}
}
}
if (!foundone) {
RF_ERRORMSG("RAIDFRAME: Did not find any live disks in the array.\n");
ret = EINVAL;
goto fail;
}
for (count=0,i=1; i; i<<=1) if (bs & i)
count++;
if (count != 1) {
RF_ERRORMSG1("Error: block size on disks (%d) must be a power of 2\n",bs);
ret = EINVAL;
goto fail;
}
for (r=0; r<raidPtr->numRow; r++) {
for (c=0; c<raidPtr->numCol; c++) {
if (disks[r][c].status == rf_ds_optimal) {
if (disks[r][c].blockSize != bs) {
RF_ERRORMSG2("Error: block size of disk at r %d c %d different from disk at r 0 c 0\n",r,c);
ret = EINVAL;
goto fail;
}
if (disks[r][c].numBlocks != min_numblks) {
RF_ERRORMSG3("WARNING: truncating disk at r %d c %d to %d blocks\n",
r,c,(int) min_numblks);
disks[r][c].numBlocks = min_numblks;
}
}
}
}
raidPtr->sectorsPerDisk = min_numblks;
raidPtr->logBytesPerSector = ffs(bs) - 1;
raidPtr->bytesPerSector = bs;
raidPtr->sectorMask = bs-1;
return(0);
fail:
#ifndef SIMULATE
#if defined(__NetBSD__) && defined(_KERNEL)
for(r=0;r<raidPtr->numRow;r++) {
for(c=0;c<raidPtr->numCol;c++) {
/* Cleanup.. */
#ifdef DEBUG
printf("Cleaning up row: %d col: %d\n",r,c);
#endif
if (raidPtr->raid_cinfo[r][c].ci_vp) {
(void)vn_close(raidPtr->raid_cinfo[r][c].ci_vp,
FREAD|FWRITE, proc->p_ucred, proc);
}
}
}
/* Space allocated for raid_vpp will get cleaned up at some other point */
/* XXX Need more #ifdefs in the above... */
#else
if (rdcap_op) rf_SCSI_FreeDiskOp(rdcap_op, 1);
if (tur_op) rf_SCSI_FreeDiskOp(tur_op, 0);
#endif
#endif /* !SIMULATE */
return(ret);
}
/****************************************************************************************
* set up the data structures describing the spare disks in the array
* recall from the above comment that the spare disk descriptors are stored
* in row zero, which is specially expanded to hold them.
***************************************************************************************/
int rf_ConfigureSpareDisks(
RF_ShutdownList_t **listp,
RF_Raid_t *raidPtr,
RF_Config_t *cfgPtr)
{
char buf[256];
int i, ret;
RF_DiskOp_t *rdcap_op = NULL, *tur_op = NULL;
unsigned bs;
RF_RaidDisk_t *disks;
int num_spares_done;
#if defined(__NetBSD__) && defined(_KERNEL)
struct proc *proc;
#endif
#ifndef SIMULATE
#ifndef __NetBSD__
ret = rf_SCSI_AllocReadCapacity(&rdcap_op);
if (ret)
goto fail;
ret = rf_SCSI_AllocTUR(&tur_op);
if (ret)
goto fail;
#endif /* !__NetBSD__ */
#endif /* !SIMULATE */
num_spares_done = 0;
#if defined(__NetBSD__) && defined(_KERNEL)
proc = raidPtr->proc;
/* The space for the spares should have already been
allocated by ConfigureDisks() */
#endif
disks = &raidPtr->Disks[0][raidPtr->numCol];
for (i=0; i<raidPtr->numSpare; i++) {
ret = rf_ConfigureDisk(raidPtr,&cfgPtr->spare_names[i][0],
&disks[i], rdcap_op, tur_op,
cfgPtr->spare_devs[i],0,raidPtr->numCol+i);
if (ret)
goto fail;
if (disks[i].status != rf_ds_optimal) {
RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",buf);
} else {
disks[i].status = rf_ds_spare; /* change status to spare */
DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n",i,
disks[i].devname,
(long int) disks[i].numBlocks,disks[i].blockSize,
(long int) disks[i].numBlocks * disks[i].blockSize / 1024 / 1024);
}
num_spares_done++;
}
#ifndef SIMULATE
#if defined(__NetBSD__) && (_KERNEL)
#else
rf_SCSI_FreeDiskOp(rdcap_op, 1); rdcap_op = NULL;
rf_SCSI_FreeDiskOp(tur_op, 0); tur_op = NULL;
#endif
#endif /* !SIMULATE */
/* check sizes and block sizes on spare disks */
bs = 1 << raidPtr->logBytesPerSector;
for (i=0; i<raidPtr->numSpare; i++) {
if (disks[i].blockSize != bs) {
RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n",disks[i].blockSize, disks[i].devname, bs);
ret = EINVAL;
goto fail;
}
if (disks[i].numBlocks < raidPtr->sectorsPerDisk) {
RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %ld blocks)\n",
disks[i].devname, disks[i].blockSize, (long int)raidPtr->sectorsPerDisk);
ret = EINVAL;
goto fail;
} else if (disks[i].numBlocks > raidPtr->sectorsPerDisk) {
RF_ERRORMSG2("Warning: truncating spare disk %s to %ld blocks\n",disks[i].devname, (long int) raidPtr->sectorsPerDisk);
disks[i].numBlocks = raidPtr->sectorsPerDisk;
}
}
return(0);
fail:
#ifndef SIMULATE
#if defined(__NetBSD__) && defined(_KERNEL)
for(i=0;i<raidPtr->numSpare;i++) {
/* Cleanup.. */
#ifdef DEBUG
printf("Cleaning up spare: %d\n",i);
#endif
if (raidPtr->raid_cinfo[0][raidPtr->numCol+i].ci_vp) {
(void)vn_close(raidPtr->raid_cinfo[0][raidPtr->numCol+i].ci_vp,
FREAD|FWRITE, proc->p_ucred, proc);
}
}
#else
if (rdcap_op) rf_SCSI_FreeDiskOp(rdcap_op, 1);
if (tur_op) rf_SCSI_FreeDiskOp(tur_op, 0);
#endif
#endif /* !SIMULATE */
return(ret);
}
/* configure a single disk in the array */
int rf_ConfigureDisk(raidPtr, buf, diskPtr, rdcap_op, tur_op, dev, row, col)
RF_Raid_t *raidPtr; /* We need this down here too!! GO */
char *buf;
RF_RaidDisk_t *diskPtr;
RF_DiskOp_t *rdcap_op;
RF_DiskOp_t *tur_op;
dev_t dev; /* device number used only in kernel */
RF_RowCol_t row;
RF_RowCol_t col;
{
char *p;
#ifdef SIMULATE
double init_offset;
#else /* SIMULATE */
#if defined(__NetBSD__) && defined(_KERNEL)
int retcode;
#else
int busid, targid, lun, retcode;
#endif
#endif /* SIMULATE */
#if defined(__NetBSD__) && defined(_KERNEL)
struct partinfo dpart;
struct vnode *vp;
struct vattr va;
struct proc *proc;
int error;
#endif
retcode = 0;
p = rf_find_non_white(buf);
if (p[strlen(p)-1] == '\n') {
/* strip off the newline */
p[strlen(p)-1] = '\0';
}
(void) strcpy(diskPtr->devname, p);
#ifdef SIMULATE
init_offset = 0.0;
rf_InitDisk(&diskPtr->diskState, disk_db_file_name,diskPtr->devname,0,0,init_offset,row,col);
rf_GeometryDoReadCapacity(&diskPtr->diskState, &diskPtr->numBlocks, &diskPtr->blockSize);
diskPtr->numBlocks = diskPtr->numBlocks * rf_sizePercentage / 100;
/* we allow the user to specify that only a fraction of the disks should be used
* this is just for debug: it speeds up the parity scan
*/
#else /* SIMULATE */
#ifndef __NetBSD__
/* get bus, target, lun */
retcode = rf_extract_ids(p, &busid, &targid, &lun);
if (retcode)
return(retcode);
/* required in kernel, nop at user level */
retcode = rf_SCSI_OpenUnit(dev);
if (retcode)
return(retcode);
diskPtr->dev = dev;
if (rf_SCSI_DoTUR(tur_op, (u_char)busid, (u_char)targid, (u_char)lun, dev)) {
RF_ERRORMSG1("Disk %s failed TUR. Marked as dead.\n",diskPtr->devname);
diskPtr->status = rf_ds_failed;
} else {
diskPtr->status = rf_ds_optimal;
retcode = rf_SCSI_DoReadCapacity(raidPtr,rdcap_op, busid, targid, lun, dev,
&diskPtr->numBlocks, &diskPtr->blockSize, diskPtr->devname);
if (retcode)
return(retcode);
/* we allow the user to specify that only a fraction of the disks should be used
* this is just for debug: it speeds up the parity scan
*/
diskPtr->numBlocks = diskPtr->numBlocks * rf_sizePercentage / 100;
}
#endif
#if defined(__NetBSD__) && defined(_KERNEL)
proc = raidPtr->proc; /* XXX Yes, this is not nice.. */
/* Let's start by claiming the component is fine and well... */
/* XXX not the case if the disk is toast.. */
diskPtr->status = rf_ds_optimal;
raidPtr->raid_cinfo[row][col].ci_vp = NULL;
raidPtr->raid_cinfo[row][col].ci_dev = NULL;
error = raidlookup(diskPtr->devname, proc, &vp);
if (error) {
printf("raidlookup on device: %s failed!\n",diskPtr->devname);
if (error == ENXIO) {
/* XXX the component isn't there... must be dead :-( */
diskPtr->status = rf_ds_failed;
} else {
return(error);
}
}
if (diskPtr->status == rf_ds_optimal) {
if ((error = VOP_GETATTR(vp, &va, proc->p_ucred, proc)) != 0) {
return(error);
}
error = VOP_IOCTL(vp, DIOCGPART, (caddr_t)&dpart,
FREAD, proc->p_ucred, proc);
if (error) {
return(error);
}
diskPtr->blockSize = dpart.disklab->d_secsize;
diskPtr->numBlocks = dpart.part->p_size - rf_protectedSectors;
raidPtr->raid_cinfo[row][col].ci_vp = vp;
raidPtr->raid_cinfo[row][col].ci_dev = va.va_rdev;
#if 0
diskPtr->dev = dev;
#endif
diskPtr->dev = va.va_rdev; /* XXX or the above? */
/* we allow the user to specify that only a fraction of the disks should be used
* this is just for debug: it speeds up the parity scan
*/
diskPtr->numBlocks = diskPtr->numBlocks * rf_sizePercentage / 100;
}
#endif /* !__NetBSD__ */
#endif /* SIMULATE */
return(0);
}
#ifdef SIMULATE
void rf_default_disk_names()
{
sprintf(disk_db_file_name,"disk.db");
sprintf(disk_type_name,"HP2247");
}
void rf_set_disk_db_name(s)
char *s;
{
strcpy(disk_db_file_name,s);
}
void rf_set_disk_type_name(s)
char *s;
{
strcpy(disk_type_name,s);
}
#endif /* SIMULATE */

View File

@ -0,0 +1,160 @@
/* $NetBSD: rf_disks.h,v 1.1 1998/11/13 04:20:29 oster Exp $ */
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
*
* Author: Mark Holland
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/*
* rf_disks.h -- header file for code related to physical disks
*/
/* :
* Log: rf_disks.h,v
* Revision 1.15 1996/08/20 23:05:13 jimz
* add nreads, nwrites to RaidDisk
*
* Revision 1.14 1996/06/17 03:20:15 jimz
* increase devname len to 56
*
* Revision 1.13 1996/06/10 11:55:47 jimz
* Straightened out some per-array/not-per-array distinctions, fixed
* a couple bugs related to confusion. Added shutdown lists. Removed
* layout shutdown function (now subsumed by shutdown lists).
*
* Revision 1.12 1996/06/09 02:36:46 jimz
* lots of little crufty cleanup- fixup whitespace
* issues, comment #ifdefs, improve typing in some
* places (esp size-related)
*
* Revision 1.11 1996/06/07 21:33:04 jimz
* begin using consistent types for sector numbers,
* stripe numbers, row+col numbers, recon unit numbers
*
* Revision 1.10 1996/05/30 11:29:41 jimz
* Numerous bug fixes. Stripe lock release code disagreed with the taking code
* about when stripes should be locked (I made it consistent: no parity, no lock)
* There was a lot of extra serialization of I/Os which I've removed- a lot of
* it was to calculate values for the cache code, which is no longer with us.
* More types, function, macro cleanup. Added code to properly quiesce the array
* on shutdown. Made a lot of stuff array-specific which was (bogusly) general
* before. Fixed memory allocation, freeing bugs.
*
* Revision 1.9 1996/05/27 18:56:37 jimz
* more code cleanup
* better typing
* compiles in all 3 environments
*
* Revision 1.8 1996/05/24 01:59:45 jimz
* another checkpoint in code cleanup for release
* time to sync kernel tree
*
* Revision 1.7 1996/05/18 19:51:34 jimz
* major code cleanup- fix syntax, make some types consistent,
* add prototypes, clean out dead code, et cetera
*
* Revision 1.6 1996/05/02 22:06:57 jimz
* add RF_RaidDisk_t
*
* Revision 1.5 1995/12/01 15:56:53 root
* added copyright info
*
*/
#ifndef _RF__RF_DISKS_H_
#define _RF__RF_DISKS_H_
#include <sys/types.h>
#include "rf_archs.h"
#include "rf_types.h"
#ifdef SIMULATE
#include "rf_geometry.h"
#endif /* SIMULATE */
/*
* A physical disk can be in one of several states:
* IF YOU ADD A STATE, CHECK TO SEE IF YOU NEED TO MODIFY RF_DEAD_DISK() BELOW.
*/
enum RF_DiskStatus_e {
rf_ds_optimal, /* no problems */
rf_ds_failed, /* reconstruction ongoing */
rf_ds_reconstructing, /* reconstruction complete to spare, dead disk not yet replaced */
rf_ds_dist_spared, /* reconstruction complete to distributed spare space, dead disk not yet replaced */
rf_ds_spared, /* reconstruction complete to distributed spare space, dead disk not yet replaced */
rf_ds_spare, /* an available spare disk */
rf_ds_used_spare /* a spare which has been used, and hence is not available */
};
typedef enum RF_DiskStatus_e RF_DiskStatus_t;
struct RF_RaidDisk_s {
char devname[56]; /* name of device file */
RF_DiskStatus_t status; /* whether it is up or down */
RF_RowCol_t spareRow; /* if in status "spared", this identifies the spare disk */
RF_RowCol_t spareCol; /* if in status "spared", this identifies the spare disk */
RF_SectorCount_t numBlocks; /* number of blocks, obtained via READ CAPACITY */
int blockSize;
/* XXX the folling is needed since we seem to need SIMULATE defined
in order to get user-land stuff to compile, but we *don't* want
this in the structure for the user-land utilities, as the
kernel doesn't know about it!! (and it messes up the size of
the structure, so there is a communication problem between
the kernel and the userland utils :-( GO */
#if defined(SIMULATE) && !defined(RF_UTILITY)
RF_DiskState_t diskState; /* the name of the disk as used in the disk module */
#endif /* SIMULATE */
#if RF_KEEP_DISKSTATS > 0
RF_uint64 nreads;
RF_uint64 nwrites;
#endif /* RF_KEEP_DISKSTATS > 0 */
dev_t dev;
};
/*
* An RF_DiskOp_t ptr is really a pointer to a UAGT_CCB, but I want
* to isolate the cam layer from all other layers, so I typecast to/from
* RF_DiskOp_t * (i.e. void *) at the interfaces.
*/
typedef void RF_DiskOp_t;
/* if a disk is in any of these states, it is inaccessible */
#define RF_DEAD_DISK(_dstat_) (((_dstat_) == rf_ds_spared) || \
((_dstat_) == rf_ds_reconstructing) || ((_dstat_) == rf_ds_failed) || \
((_dstat_) == rf_ds_dist_spared))
int rf_ConfigureDisks(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
RF_Config_t *cfgPtr);
int rf_ConfigureSpareDisks(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
RF_Config_t *cfgPtr);
int rf_ConfigureDisk(RF_Raid_t *raidPtr, char *buf, RF_RaidDisk_t *diskPtr,
RF_DiskOp_t *rdcap_op, RF_DiskOp_t *tur_op, dev_t dev,
RF_RowCol_t row, RF_RowCol_t col);
#ifdef SIMULATE
void rf_default_disk_names(void);
void rf_set_disk_db_name(char *s);
void rf_set_disk_type_name(char *s);
#endif /* SIMULATE */
#endif /* !_RF__RF_DISKS_H_ */

View File

@ -0,0 +1,102 @@
/* $NetBSD: rf_diskthreads.h,v 1.1 1998/11/13 04:20:29 oster Exp $ */
/*
* rf_diskthreads.h
*/
/*
* Copyright (c) 1996 Carnegie-Mellon University.
* All rights reserved.
*
* Author: Jim Zelenka
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/*
* :
* Log: rf_diskthreads.h,v
* Revision 1.7 1996/06/10 11:55:47 jimz
* Straightened out some per-array/not-per-array distinctions, fixed
* a couple bugs related to confusion. Added shutdown lists. Removed
* layout shutdown function (now subsumed by shutdown lists).
*
* Revision 1.6 1996/06/09 02:36:46 jimz
* lots of little crufty cleanup- fixup whitespace
* issues, comment #ifdefs, improve typing in some
* places (esp size-related)
*
* Revision 1.5 1996/06/07 21:33:04 jimz
* begin using consistent types for sector numbers,
* stripe numbers, row+col numbers, recon unit numbers
*
* Revision 1.4 1996/05/30 23:22:16 jimz
* bugfixes of serialization, timing problems
* more cleanup
*
* Revision 1.3 1996/05/30 11:29:41 jimz
* Numerous bug fixes. Stripe lock release code disagreed with the taking code
* about when stripes should be locked (I made it consistent: no parity, no lock)
* There was a lot of extra serialization of I/Os which I've removed- a lot of
* it was to calculate values for the cache code, which is no longer with us.
* More types, function, macro cleanup. Added code to properly quiesce the array
* on shutdown. Made a lot of stuff array-specific which was (bogusly) general
* before. Fixed memory allocation, freeing bugs.
*
* Revision 1.2 1996/05/23 21:46:35 jimz
* checkpoint in code cleanup (release prep)
* lots of types, function names have been fixed
*
* Revision 1.1 1996/05/18 19:55:58 jimz
* Initial revision
*
*/
/*
* rf_diskthreads.h -- types and prototypes for disk thread system
*/
#ifndef _RF__RF_DISKTHREADS_H_
#define _RF__RF_DISKTHREADS_H_
#include "rf_types.h"
/* this is the information that a disk thread needs to do its job */
struct RF_DiskId_s {
RF_DiskQueue_t *queue;
RF_Raid_t *raidPtr;
RF_RaidDisk_t *disk;
int fd; /* file descriptor */
RF_RowCol_t row, col; /* debug only */
#ifdef SIMULATE
int state;
#endif /* SIMULATE */
};
int rf_ConfigureDiskThreads(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
RF_Config_t *cfgPtr);
#ifdef SIMULATE
int rf_SetDiskIdle(RF_Raid_t *raidPtr, RF_RowCol_t r, RF_RowCol_t c);
int rf_ScanDiskQueues(RF_Raid_t *raidPtr);
void rf_simulator_complete_io(RF_DiskId_t *id);
void rf_PrintDiskStat(RF_Raid_t *raidPtr);
#else /* SIMULATE */
int rf_ShutdownDiskThreads(RF_Raid_t *raidPtr);
#endif /* SIMULATE */
#endif /* !_RF__RF_DISKTHREADS_H_ */

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,125 @@
/* $NetBSD: rf_driver.h,v 1.1 1998/11/13 04:20:29 oster Exp $ */
/*
* rf_driver.h
*/
/*
* Copyright (c) 1996 Carnegie-Mellon University.
* All rights reserved.
*
* Author: Jim Zelenka
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/*
* :
* Log: rf_driver.h,v
* Revision 1.11 1996/07/11 19:08:00 jimz
* generalize reconstruction mechanism
* allow raid1 reconstructs via copyback (done with array
* quiesced, not online, therefore not disk-directed)
*
* Revision 1.10 1996/06/10 14:18:58 jimz
* move user, throughput stats into per-array structure
*
* Revision 1.9 1996/06/07 21:33:04 jimz
* begin using consistent types for sector numbers,
* stripe numbers, row+col numbers, recon unit numbers
*
* Revision 1.8 1996/06/05 18:06:02 jimz
* Major code cleanup. The Great Renaming is now done.
* Better modularity. Better typing. Fixed a bunch of
* synchronization bugs. Made a lot of global stuff
* per-desc or per-array. Removed dead code.
*
* Revision 1.7 1996/05/30 11:29:41 jimz
* Numerous bug fixes. Stripe lock release code disagreed with the taking code
* about when stripes should be locked (I made it consistent: no parity, no lock)
* There was a lot of extra serialization of I/Os which I've removed- a lot of
* it was to calculate values for the cache code, which is no longer with us.
* More types, function, macro cleanup. Added code to properly quiesce the array
* on shutdown. Made a lot of stuff array-specific which was (bogusly) general
* before. Fixed memory allocation, freeing bugs.
*
* Revision 1.6 1996/05/27 18:56:37 jimz
* more code cleanup
* better typing
* compiles in all 3 environments
*
* Revision 1.5 1996/05/24 22:17:04 jimz
* continue code + namespace cleanup
* typed a bunch of flags
*
* Revision 1.4 1996/05/24 04:28:55 jimz
* release cleanup ckpt
*
* Revision 1.3 1996/05/24 01:59:45 jimz
* another checkpoint in code cleanup for release
* time to sync kernel tree
*
* Revision 1.2 1996/05/23 21:46:35 jimz
* checkpoint in code cleanup (release prep)
* lots of types, function names have been fixed
*
* Revision 1.1 1996/05/18 19:56:10 jimz
* Initial revision
*
*/
#ifndef _RF__RF_DRIVER_H_
#define _RF__RF_DRIVER_H_
#include "rf_threadstuff.h"
#include "rf_types.h"
RF_DECLARE_EXTERN_MUTEX(rf_printf_mutex)
int rf_BootRaidframe(void);
int rf_UnbootRaidframe(void);
int rf_Shutdown(RF_Raid_t *raidPtr);
int rf_Configure(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr);
RF_RaidAccessDesc_t *rf_AllocRaidAccDesc(RF_Raid_t *raidPtr, RF_IoType_t type,
RF_RaidAddr_t raidAddress, RF_SectorCount_t numBlocks, caddr_t bufPtr,
void *bp, RF_DagHeader_t **paramDAG, RF_AccessStripeMapHeader_t **paramASM,
RF_RaidAccessFlags_t flags, void (*cbF)(struct buf *), void *cbA,
RF_AccessState_t *states);
void rf_FreeRaidAccDesc(RF_RaidAccessDesc_t *desc);
int rf_DoAccess(RF_Raid_t *raidPtr, RF_IoType_t type, int async_flag,
RF_RaidAddr_t raidAddress, RF_SectorCount_t numBlocks, caddr_t bufPtr,
void *bp_in, RF_DagHeader_t **paramDAG,
RF_AccessStripeMapHeader_t **paramASM, RF_RaidAccessFlags_t flags,
RF_RaidAccessDesc_t **paramDesc, void (*cbF)(struct buf *), void *cbA);
int rf_SetReconfiguredMode(RF_Raid_t *raidPtr, RF_RowCol_t row,
RF_RowCol_t col);
int rf_FailDisk(RF_Raid_t *raidPtr, RF_RowCol_t frow, RF_RowCol_t fcol,
int initRecon);
#ifdef SIMULATE
void rf_ScheduleContinueReconstructFailedDisk(RF_RaidReconDesc_t *reconDesc);
#endif /* SIMULATE */
void rf_SignalQuiescenceLock(RF_Raid_t *raidPtr, RF_RaidReconDesc_t *reconDesc);
int rf_SuspendNewRequestsAndWait(RF_Raid_t *raidPtr);
void rf_ResumeNewRequests(RF_Raid_t *raidPtr);
void rf_StartThroughputStats(RF_Raid_t *raidPtr);
void rf_StartUserStats(RF_Raid_t *raidPtr);
void rf_StopUserStats(RF_Raid_t *raidPtr);
void rf_UpdateUserStats(RF_Raid_t *raidPtr, int rt, int numsect);
void rf_PrintUserStats(RF_Raid_t *raidPtr);
#endif /* !_RF__RF_DRIVER_H_ */

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,74 @@
/* $NetBSD: rf_engine.h,v 1.1 1998/11/13 04:20:29 oster Exp $ */
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
*
* Author: William V. Courtright II, Mark Holland, Jim Zelenka
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/**********************************************************
* *
* engine.h -- header file for execution engine functions *
* *
**********************************************************/
/* :
* Log: rf_engine.h,v
* Revision 1.11 1996/06/14 14:16:22 jimz
* new decl of ConfigureEngine
*
* Revision 1.10 1996/06/10 11:55:47 jimz
* Straightened out some per-array/not-per-array distinctions, fixed
* a couple bugs related to confusion. Added shutdown lists. Removed
* layout shutdown function (now subsumed by shutdown lists).
*
* Revision 1.9 1996/05/30 12:59:18 jimz
* make etimer happier, more portable
*
* Revision 1.8 1996/05/24 04:28:55 jimz
* release cleanup ckpt
*
* Revision 1.7 1996/05/23 21:46:35 jimz
* checkpoint in code cleanup (release prep)
* lots of types, function names have been fixed
*
* Revision 1.6 1996/05/18 19:51:34 jimz
* major code cleanup- fix syntax, make some types consistent,
* add prototypes, clean out dead code, et cetera
*
* Revision 1.5 1995/12/01 18:12:17 root
* added copyright info
*
*/
#ifndef _RF__RF_ENGINE_H_
#define _RF__RF_ENGINE_H_
int rf_ConfigureEngine(RF_ShutdownList_t **listp,
RF_Raid_t *raidPtr, RF_Config_t *cfgPtr);
int rf_FinishNode(RF_DagNode_t *node, int context); /* return finished node to engine */
int rf_DispatchDAG(RF_DagHeader_t *dag, void (*cbFunc)(void *), void *cbArg); /* execute dag */
#endif /* !_RF__RF_ENGINE_H_ */

View File

@ -0,0 +1,352 @@
/* $NetBSD: rf_etimer.h,v 1.1 1998/11/13 04:20:29 oster Exp $ */
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
*
* Author: Mark Holland
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/* rf_etimer.h -- header file for code related to accurate timing
* This code currently assumes that the elapsed time between START_TIMER
* and START_TIMER is less than the period of the cycle counter. This
* means the events you want to time must be less than:
* clock speed max time
* ---------- --------
* 175 MHz 24 sec
* 150 MHz 28 sec
* 125 MHz 34 sec
*
*
* :
* Log: rf_etimer.h,v
* Revision 1.32 1996/08/13 18:11:09 jimz
* want MACH&&!__osf__, not just MACH for mach timing (MACH defined under OSF/1)
*
* Revision 1.31 1996/08/12 20:11:38 jimz
* use read_real_time() on AIX4+
*
* Revision 1.30 1996/08/09 18:48:12 jimz
* for now, use gettimeofday() on MACH
* (should eventually use better clock stuff)
*
* Revision 1.29 1996/08/07 21:09:08 jimz
* add IRIX as a gettimeofday system
*
* Revision 1.28 1996/08/06 22:25:23 jimz
* add LINUX_I386
*
* Revision 1.27 1996/07/30 04:45:53 jimz
* add ultrix stuff
*
* Revision 1.26 1996/07/28 20:31:39 jimz
* i386netbsd port
* true/false fixup
*
* Revision 1.25 1996/07/27 23:36:08 jimz
* Solaris port of simulator
*
* Revision 1.24 1996/07/27 18:40:24 jimz
* cleanup sweep
*
* Revision 1.23 1996/07/22 19:52:16 jimz
* switched node params to RF_DagParam_t, a union of
* a 64-bit int and a void *, for better portability
* attempted hpux port, but failed partway through for
* lack of a single C compiler capable of compiling all
* source files
*
* Revision 1.22 1996/07/18 22:57:14 jimz
* port simulator to AIX
*
* Revision 1.21 1996/07/17 21:00:58 jimz
* clean up timer interface, tracing
*
* Revision 1.20 1996/07/17 14:26:28 jimz
* rf_scc -> rf_rpcc
*
* Revision 1.19 1996/06/14 21:24:48 jimz
* move out ConfigureEtimer
*
* Revision 1.18 1996/06/03 23:28:26 jimz
* more bugfixes
* check in tree to sync for IPDS runs with current bugfixes
* there still may be a problem with threads in the script test
* getting I/Os stuck- not trivially reproducible (runs ~50 times
* in a row without getting stuck)
*
* Revision 1.17 1996/05/30 23:22:16 jimz
* bugfixes of serialization, timing problems
* more cleanup
*
* Revision 1.16 1996/05/30 12:59:18 jimz
* make etimer happier, more portable
*
* Revision 1.15 1996/05/27 18:56:37 jimz
* more code cleanup
* better typing
* compiles in all 3 environments
*
* Revision 1.14 1996/05/23 21:46:35 jimz
* checkpoint in code cleanup (release prep)
* lots of types, function names have been fixed
*
* Revision 1.13 1996/05/23 00:33:23 jimz
* code cleanup: move all debug decls to rf_options.c, all extern
* debug decls to rf_options.h, all debug vars preceded by rf_
*
* Revision 1.12 1996/05/18 19:51:34 jimz
* major code cleanup- fix syntax, make some types consistent,
* add prototypes, clean out dead code, et cetera
*
* Revision 1.11 1995/12/01 18:10:40 root
* added copyright info
*
* Revision 1.10 1995/09/29 14:27:32 wvcii
* removed printfs from ConfigureEtimer()
*
* Revision 1.9 95/09/19 22:57:31 jimz
* added kernel version of ConfigureEtimer
*
* Revision 1.8 1995/09/14 13:03:04 amiri
* set default CPU speed to 125Mhz to avoid divide by zero problems.
*
* Revision 1.7 1995/09/11 19:04:36 wvcii
* timer autoconfigs using pdl routine to check cpu speed
* value may still be overridden via config debug var timerTicksPerSec
*
*/
#ifndef _RF__RF_TIMER_H_
#define _RF__RF_TIMER_H_
#include "rf_options.h"
#ifdef _KERNEL
#define KERNEL
#endif
#ifdef __NetBSD__
#ifdef KERNEL
extern unsigned int rpcc(void);
#define rf_read_cycle_counter rpcc
#else /* KERNEL */
#ifndef __NetBSD__
/* XXX does this function even exist anywhere??? GO */
extern unsigned int rf_rpcc();
#endif
#define rf_read_cycle_counter rf_rpcc
#endif /* KERNEL */
#define RF_DEF_TIMER_MAX_VAL 0xFFFFFFFF
typedef struct RF_EtimerVal_s {
unsigned ccnt; /* cycle count */
} RF_EtimerVal_t;
struct RF_Etimer_s {
RF_EtimerVal_t st;
RF_EtimerVal_t et;
unsigned long ticks; /* elapsed time in ticks */
};
extern long rf_timer_max_val;
extern long rf_timer_ticks_per_second;
extern unsigned long rf_timer_ticks_per_usec;
#define RF_ETIMER_TICKS2US(_tcks_) ( (_tcks_) / rf_timer_ticks_per_usec )
#define RF_ETIMER_START(_t_) { (_t_).st.ccnt = rf_read_cycle_counter(); }
#define RF_ETIMER_STOP(_t_) { (_t_).et.ccnt = rf_read_cycle_counter(); }
#define RF_ETIMER_EVAL(_t_) { \
if ((_t_).st.ccnt < (_t_).et.ccnt) \
(_t_).ticks = (_t_).et.ccnt - (_t_).st.ccnt; \
else \
(_t_).ticks = rf_timer_max_val - ((_t_).st.ccnt - (_t_).et.ccnt); \
}
#define RF_ETIMER_VAL_TICKS(_t_) ((_t_).ticks)
#define RF_ETIMER_VAL_US(_t_) (RF_ETIMER_TICKS2US((_t_).ticks))
#define RF_ETIMER_VAL_MS(_t_) (RF_ETIMER_TICKS2US((_t_).ticks)/1000)
#endif /* __NetBSD__ */
#if defined(__alpha) && !defined(__NetBSD__)
#ifdef KERNEL
extern unsigned int rpcc();
#define rf_read_cycle_counter rpcc
#else /* KERNEL */
extern unsigned int rf_rpcc();
#define rf_read_cycle_counter rf_rpcc
#endif /* KERNEL */
#define RF_DEF_TIMER_MAX_VAL 0xFFFFFFFF
typedef struct RF_EtimerVal_s {
unsigned ccnt; /* cycle count */
} RF_EtimerVal_t;
struct RF_Etimer_s {
RF_EtimerVal_t st;
RF_EtimerVal_t et;
unsigned long ticks; /* elapsed time in ticks */
};
extern long rf_timer_max_val;
extern long rf_timer_ticks_per_second;
extern unsigned long rf_timer_ticks_per_usec;
#define RF_ETIMER_TICKS2US(_tcks_) ( (_tcks_) / rf_timer_ticks_per_usec )
#define RF_ETIMER_START(_t_) { (_t_).st.ccnt = rf_read_cycle_counter(); }
#define RF_ETIMER_STOP(_t_) { (_t_).et.ccnt = rf_read_cycle_counter(); }
#define RF_ETIMER_EVAL(_t_) { \
if ((_t_).st.ccnt < (_t_).et.ccnt) \
(_t_).ticks = (_t_).et.ccnt - (_t_).st.ccnt; \
else \
(_t_).ticks = rf_timer_max_val - ((_t_).st.ccnt - (_t_).et.ccnt); \
}
#define RF_ETIMER_VAL_TICKS(_t_) ((_t_).ticks)
#define RF_ETIMER_VAL_US(_t_) (RF_ETIMER_TICKS2US((_t_).ticks))
#define RF_ETIMER_VAL_MS(_t_) (RF_ETIMER_TICKS2US((_t_).ticks)/1000)
#endif /* __alpha */
#ifdef _IBMR2
extern void rf_rtclock(unsigned int *secs, unsigned int *nsecs);
#define RF_MSEC_PER_SEC 1000
#define RF_USEC_PER_SEC 1000000
#define RF_NSEC_PER_SEC 1000000000
typedef struct RF_EtimerVal_s {
unsigned int secs;
unsigned int nsecs;
} RF_EtimerVal_t;
struct RF_Etimer_s {
RF_EtimerVal_t start;
RF_EtimerVal_t end;
RF_EtimerVal_t elapsed;
};
#if RF_AIXVERS >= 4
#include <sys/time.h>
#define RF_ETIMER_START(_t_) { \
timebasestruct_t tb; \
tb.flag = 1; \
read_real_time(&tb, TIMEBASE_SZ); \
(_t_).start.secs = tb.tb_high; \
(_t_).start.nsecs = tb.tb_low; \
}
#define RF_ETIMER_STOP(_t_) { \
timebasestruct_t tb; \
tb.flag = 1; \
read_real_time(&tb, TIMEBASE_SZ); \
(_t_).end.secs = tb.tb_high; \
(_t_).end.nsecs = tb.tb_low; \
}
#else /* RF_AIXVERS >= 4 */
#define RF_ETIMER_START(_t_) { \
rf_rtclock(&((_t_).start.secs), &((_t_).start.nsecs)); \
}
#define RF_ETIMER_STOP(_t_) { \
rf_rtclock(&((_t_).end.secs), &((_t_).end.nsecs)); \
}
#endif /* RF_AIXVERS >= 4 */
#define RF_ETIMER_EVAL(_t_) { \
if ((_t_).end.nsecs >= (_t_).start.nsecs) { \
(_t_).elapsed.nsecs = (_t_).end.nsecs - (_t_).start.nsecs; \
(_t_).elapsed.secs = (_t_).end.secs - (_t_).start.nsecs; \
} \
else { \
(_t_).elapsed.nsecs = RF_NSEC_PER_SEC + (_t_).end.nsecs; \
(_t_).elapsed.nsecs -= (_t_).start.nsecs; \
(_t_).elapsed.secs = (_t_).end.secs - (_t_).start.secs + 1; \
} \
}
#define RF_ETIMER_VAL_US(_t_) (((_t_).elapsed.secs*RF_USEC_PER_SEC)+((_t_).elapsed.nsecs/1000))
#define RF_ETIMER_VAL_MS(_t_) (((_t_).elapsed.secs*RF_MSEC_PER_SEC)+((_t_).elapsed.nsecs/1000000))
#endif /* _IBMR2 */
/*
* XXX investigate better timing for these
*/
#if defined(hpux) || defined(sun) || defined(NETBSD_I386) || defined(ultrix) || defined(LINUX_I386) || defined(IRIX) || (defined(MACH) && !defined(__osf__))
#include <sys/time.h>
#define RF_USEC_PER_SEC 1000000
struct RF_Etimer_s {
struct timeval start;
struct timeval end;
struct timeval elapsed;
};
#ifndef __NetBSD__
#define RF_ETIMER_START(_t_) { \
gettimeofday(&((_t_).start), NULL); \
}
#define RF_ETIMER_STOP(_t_) { \
gettimeofday(&((_t_).end), NULL); \
}
#else
#define RF_ETIMER_START(_t_) { \
}
/* XXX these just drop off the end of the world... */
#define RF_ETIMER_STOP(_t_) { \
}
#endif
#define RF_ETIMER_EVAL(_t_) { \
if ((_t_).end.tv_usec >= (_t_).start.tv_usec) { \
(_t_).elapsed.tv_usec = (_t_).end.tv_usec - (_t_).start.tv_usec; \
(_t_).elapsed.tv_sec = (_t_).end.tv_sec - (_t_).start.tv_usec; \
} \
else { \
(_t_).elapsed.tv_usec = RF_USEC_PER_SEC + (_t_).end.tv_usec; \
(_t_).elapsed.tv_usec -= (_t_).start.tv_usec; \
(_t_).elapsed.tv_sec = (_t_).end.tv_sec - (_t_).start.tv_sec + 1; \
} \
}
#define RF_ETIMER_VAL_US(_t_) (((_t_).elapsed.tv_sec*RF_USEC_PER_SEC)+(_t_).elapsed.tv_usec)
#define RF_ETIMER_VAL_MS(_t_) (((_t_).elapsed.tv_sec*RF_MSEC_PER_SEC)+((_t_).elapsed.tv_usec/1000))
#endif /* hpux || sun || NETBSD_I386 || ultrix || LINUX_I386 || IRIX || (MACH && !__osf__) */
#endif /* !_RF__RF_TIMER_H_ */

View File

@ -0,0 +1,555 @@
/* $NetBSD: rf_evenodd.c,v 1.1 1998/11/13 04:20:29 oster Exp $ */
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
*
* Author: Chang-Ming Wu
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/*****************************************************************************************
*
* rf_evenodd.c -- implements EVENODD array architecture
*
****************************************************************************************/
#include "rf_archs.h"
#if RF_INCLUDE_EVENODD > 0
#include "rf_types.h"
#include "rf_raid.h"
#include "rf_dag.h"
#include "rf_dagffrd.h"
#include "rf_dagffwr.h"
#include "rf_dagdegrd.h"
#include "rf_dagdegwr.h"
#include "rf_dagutils.h"
#include "rf_dagfuncs.h"
#include "rf_threadid.h"
#include "rf_etimer.h"
#include "rf_general.h"
#include "rf_evenodd.h"
#include "rf_configure.h"
#include "rf_parityscan.h"
#include "rf_utils.h"
#include "rf_map.h"
#include "rf_pq.h"
#include "rf_mcpair.h"
#include "rf_sys.h"
#include "rf_evenodd.h"
#include "rf_evenodd_dagfuncs.h"
#include "rf_evenodd_dags.h"
#include "rf_engine.h"
typedef struct RF_EvenOddConfigInfo_s {
RF_RowCol_t **stripeIdentifier; /* filled in at config time & used by IdentifyStripe */
} RF_EvenOddConfigInfo_t;
int rf_ConfigureEvenOdd(listp, raidPtr, cfgPtr)
RF_ShutdownList_t **listp;
RF_Raid_t *raidPtr;
RF_Config_t *cfgPtr;
{
RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
RF_EvenOddConfigInfo_t *info;
RF_RowCol_t i, j, startdisk;
RF_MallocAndAdd(info, sizeof(RF_EvenOddConfigInfo_t), (RF_EvenOddConfigInfo_t *), raidPtr->cleanupList);
layoutPtr->layoutSpecificInfo = (void *) info;
RF_ASSERT(raidPtr->numRow == 1);
info->stripeIdentifier = rf_make_2d_array(raidPtr->numCol, raidPtr->numCol, raidPtr->cleanupList);
startdisk = 0;
for (i=0; i<raidPtr->numCol; i++) {
for (j=0; j<raidPtr->numCol; j++) {
info->stripeIdentifier[i][j] = (startdisk + j) % raidPtr->numCol;
}
if ((startdisk -= 2) < 0) startdisk += raidPtr->numCol;
}
/* fill in the remaining layout parameters */
layoutPtr->numStripe = layoutPtr->stripeUnitsPerDisk;
layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << raidPtr->logBytesPerSector;
layoutPtr->numDataCol = raidPtr->numCol-2; /* ORIG: layoutPtr->numDataCol = raidPtr->numCol-1; */
#if RF_EO_MATRIX_DIM > 17
if (raidPtr->numCol <= 17){
printf("Number of stripe units in a parity stripe is smaller than 17. Please\n");
printf("define the macro RF_EO_MATRIX_DIM in file rf_evenodd_dagfuncs.h to \n");
printf("be 17 to increase performance. \n");
return(EINVAL);
}
#elif RF_EO_MATRIX_DIM == 17
if (raidPtr->numCol > 17) {
printf("Number of stripe units in a parity stripe is bigger than 17. Please\n");
printf("define the macro RF_EO_MATRIX_DIM in file rf_evenodd_dagfuncs.h to \n");
printf("be 257 for encoding and decoding functions to work. \n");
return(EINVAL);
}
#endif
layoutPtr->dataSectorsPerStripe = layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit;
layoutPtr->numParityCol = 2;
layoutPtr->dataStripeUnitsPerDisk = layoutPtr->stripeUnitsPerDisk;
raidPtr->sectorsPerDisk = layoutPtr->stripeUnitsPerDisk * layoutPtr->sectorsPerStripeUnit;
raidPtr->totalSectors = layoutPtr->stripeUnitsPerDisk * layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit;
return(0);
}
int rf_GetDefaultNumFloatingReconBuffersEvenOdd(RF_Raid_t *raidPtr)
{
return(20);
}
RF_HeadSepLimit_t rf_GetDefaultHeadSepLimitEvenOdd(RF_Raid_t *raidPtr)
{
return(10);
}
void rf_IdentifyStripeEvenOdd(
RF_Raid_t *raidPtr,
RF_RaidAddr_t addr,
RF_RowCol_t **diskids,
RF_RowCol_t *outRow)
{
RF_StripeNum_t stripeID = rf_RaidAddressToStripeID(&raidPtr->Layout, addr);
RF_EvenOddConfigInfo_t *info = (RF_EvenOddConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
*outRow = 0;
*diskids = info->stripeIdentifier[ stripeID % raidPtr->numCol ];
}
/* The layout of stripe unit on the disks are: c0 c1 c2 c3 c4
0 1 2 E P
5 E P 3 4
P 6 7 8 E
10 11 E P 9
E P 12 13 14
....
We use the MapSectorRAID5 to map data information because the routine can be shown to map exactly
the layout of data stripe unit as shown above although we have 2 redundant information now.
But for E and P, we use rf_MapEEvenOdd and rf_MapParityEvenOdd which are different method from raid-5.
*/
void rf_MapParityEvenOdd(
RF_Raid_t *raidPtr,
RF_RaidAddr_t raidSector,
RF_RowCol_t *row,
RF_RowCol_t *col,
RF_SectorNum_t *diskSector,
int remap)
{
RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit;
RF_StripeNum_t endSUIDofthisStrip = (SUID/raidPtr->Layout.numDataCol + 1)*raidPtr->Layout.numDataCol - 1;
*row = 0;
*col = ( endSUIDofthisStrip + 2)%raidPtr->numCol;
*diskSector = (SUID / (raidPtr->Layout.numDataCol)) * raidPtr->Layout.sectorsPerStripeUnit +
(raidSector % raidPtr->Layout.sectorsPerStripeUnit);
}
void rf_MapEEvenOdd(
RF_Raid_t *raidPtr,
RF_RaidAddr_t raidSector,
RF_RowCol_t *row,
RF_RowCol_t *col,
RF_SectorNum_t *diskSector,
int remap)
{
RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit;
RF_StripeNum_t endSUIDofthisStrip = (SUID/raidPtr->Layout.numDataCol + 1)*raidPtr->Layout.numDataCol - 1;
*row = 0;
*col = ( endSUIDofthisStrip + 1)%raidPtr->numCol;
*diskSector = (SUID / (raidPtr->Layout.numDataCol)) * raidPtr->Layout.sectorsPerStripeUnit +
(raidSector % raidPtr->Layout.sectorsPerStripeUnit);
}
void rf_EODagSelect(
RF_Raid_t *raidPtr,
RF_IoType_t type,
RF_AccessStripeMap_t *asmap,
RF_VoidFuncPtr *createFunc)
{
RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
unsigned ndfail = asmap->numDataFailed;
unsigned npfail = asmap->numParityFailed +asmap->numQFailed;
unsigned ntfail = npfail + ndfail;
RF_ASSERT(RF_IO_IS_R_OR_W(type));
if (ntfail > 2)
{
RF_ERRORMSG("more than two disks failed in a single group! Aborting I/O operation.\n");
/* *infoFunc = */ *createFunc = NULL;
return;
}
/* ok, we can do this I/O */
if (type == RF_IO_TYPE_READ)
{
switch (ndfail)
{
case 0:
/* fault free read */
*createFunc = (RF_VoidFuncPtr)rf_CreateFaultFreeReadDAG; /* same as raid 5 */
break;
case 1:
/* lost a single data unit */
/* two cases:
(1) parity is not lost.
do a normal raid 5 reconstruct read.
(2) parity is lost.
do a reconstruct read using "e".
*/
if (ntfail == 2) /* also lost redundancy */
{
if (asmap->failedPDAs[1]->type == RF_PDA_TYPE_PARITY)
*createFunc = (RF_VoidFuncPtr)rf_EO_110_CreateReadDAG;
else
*createFunc = (RF_VoidFuncPtr)rf_EO_101_CreateReadDAG;
}
else
{
/* P and E are ok. But is there a failure
in some unaccessed data unit?
*/
if (rf_NumFailedDataUnitsInStripe(raidPtr,asmap)==2)
*createFunc = (RF_VoidFuncPtr)rf_EO_200_CreateReadDAG;
else
*createFunc = (RF_VoidFuncPtr)rf_EO_100_CreateReadDAG;
}
break;
case 2:
/* *createFunc = rf_EO_200_CreateReadDAG; */
*createFunc = NULL;
break;
}
return;
}
/* a write */
switch (ntfail)
{
case 0: /* fault free */
if (rf_suppressLocksAndLargeWrites ||
(((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) && (layoutPtr->numDataCol != 1)) ||
(asmap->parityInfo->next != NULL) || (asmap->qInfo->next != NULL) || rf_CheckStripeForFailures(raidPtr, asmap))) {
*createFunc = (RF_VoidFuncPtr)rf_EOCreateSmallWriteDAG;
}
else {
*createFunc = (RF_VoidFuncPtr)rf_EOCreateLargeWriteDAG;
}
break;
case 1: /* single disk fault */
if (npfail==1)
{
RF_ASSERT ((asmap->failedPDAs[0]->type == RF_PDA_TYPE_PARITY) || (asmap->failedPDAs[0]->type == RF_PDA_TYPE_Q));
if (asmap->failedPDAs[0]->type == RF_PDA_TYPE_Q)
{ /* q died, treat like normal mode raid5 write.*/
if (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) || (asmap->numStripeUnitsAccessed == 1))
|| (asmap->parityInfo->next!=NULL) || rf_NumFailedDataUnitsInStripe(raidPtr,asmap))
*createFunc = (RF_VoidFuncPtr)rf_EO_001_CreateSmallWriteDAG;
else
*createFunc = (RF_VoidFuncPtr)rf_EO_001_CreateLargeWriteDAG;
}
else
{ /* parity died, small write only updating Q */
if (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) || (asmap->numStripeUnitsAccessed == 1))
|| (asmap->qInfo->next!=NULL) || rf_NumFailedDataUnitsInStripe(raidPtr,asmap))
*createFunc = (RF_VoidFuncPtr)rf_EO_010_CreateSmallWriteDAG;
else
*createFunc = (RF_VoidFuncPtr)rf_EO_010_CreateLargeWriteDAG;
}
}
else
{ /* data missing.
Do a P reconstruct write if only a single data unit
is lost in the stripe, otherwise a reconstruct
write which employnig both P and E units. */
if (rf_NumFailedDataUnitsInStripe(raidPtr,asmap)==2)
{
if (asmap->numStripeUnitsAccessed == 1)
*createFunc = (RF_VoidFuncPtr)rf_EO_200_CreateWriteDAG;
else
*createFunc = NULL; /* No direct support for this case now, like that in Raid-5 */
}
else
{
if (asmap->numStripeUnitsAccessed != 1 && asmap->failedPDAs[0]->numSector != layoutPtr->sectorsPerStripeUnit)
*createFunc = NULL; /* No direct support for this case now, like that in Raid-5 */
else *createFunc = (RF_VoidFuncPtr)rf_EO_100_CreateWriteDAG;
}
}
break;
case 2: /* two disk faults */
switch (npfail)
{
case 2: /* both p and q dead */
*createFunc = (RF_VoidFuncPtr)rf_EO_011_CreateWriteDAG;
break;
case 1: /* either p or q and dead data */
RF_ASSERT(asmap->failedPDAs[0]->type == RF_PDA_TYPE_DATA);
RF_ASSERT ((asmap->failedPDAs[1]->type == RF_PDA_TYPE_PARITY) || (asmap->failedPDAs[1]->type == RF_PDA_TYPE_Q));
if (asmap->failedPDAs[1]->type == RF_PDA_TYPE_Q)
{
if(asmap->numStripeUnitsAccessed != 1 && asmap->failedPDAs[0]->numSector != layoutPtr->sectorsPerStripeUnit)
*createFunc = NULL; /* In both PQ and EvenOdd, no direct support for this case now, like that in Raid-5 */
else
*createFunc = (RF_VoidFuncPtr)rf_EO_101_CreateWriteDAG;
}
else
{
if (asmap->numStripeUnitsAccessed != 1 && asmap->failedPDAs[0]->numSector != layoutPtr->sectorsPerStripeUnit)
*createFunc = NULL; /* No direct support for this case, like that in Raid-5 */
else
*createFunc = (RF_VoidFuncPtr)rf_EO_110_CreateWriteDAG;
}
break;
case 0: /* double data loss */
/* if(asmap->failedPDAs[0]->numSector + asmap->failedPDAs[1]->numSector == 2 * layoutPtr->sectorsPerStripeUnit )
*createFunc = rf_EOCreateLargeWriteDAG;
else */
*createFunc = NULL; /* currently, in Evenodd, No support for simultaneous access of both failed SUs */
break;
}
break;
default: /* more than 2 disk faults */
*createFunc = NULL;
RF_PANIC();
}
return;
}
int rf_VerifyParityEvenOdd(raidPtr, raidAddr, parityPDA, correct_it, flags)
RF_Raid_t *raidPtr;
RF_RaidAddr_t raidAddr;
RF_PhysDiskAddr_t *parityPDA;
int correct_it;
RF_RaidAccessFlags_t flags;
{
RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
RF_RaidAddr_t startAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, raidAddr);
RF_SectorCount_t numsector = parityPDA->numSector;
int numbytes = rf_RaidAddressToByte(raidPtr, numsector);
int bytesPerStripe = numbytes * layoutPtr->numDataCol;
RF_DagHeader_t *rd_dag_h, *wr_dag_h; /* read, write dag */
RF_DagNode_t *blockNode, *unblockNode, *wrBlock, *wrUnblock;
RF_AccessStripeMapHeader_t *asm_h;
RF_AccessStripeMap_t *asmap;
RF_AllocListElem_t *alloclist;
RF_PhysDiskAddr_t *pda;
char *pbuf, *buf, *end_p, *p;
char *redundantbuf2;
int redundantTwoErr = 0, redundantOneErr = 0;
int parity_cant_correct = RF_FALSE, red2_cant_correct = RF_FALSE, parity_corrected = RF_FALSE, red2_corrected = RF_FALSE;
int i, retcode;
RF_ReconUnitNum_t which_ru;
RF_StripeNum_t psID = rf_RaidAddressToParityStripeID(layoutPtr, raidAddr, &which_ru);
int stripeWidth = layoutPtr->numDataCol + layoutPtr->numParityCol;
RF_AccTraceEntry_t tracerec;
RF_MCPair_t *mcpair;
retcode = RF_PARITY_OKAY;
mcpair = rf_AllocMCPair();
rf_MakeAllocList(alloclist);
RF_MallocAndAdd(buf, numbytes * (layoutPtr->numDataCol + layoutPtr->numParityCol), (char *), alloclist);
RF_CallocAndAdd(pbuf, 1, numbytes, (char *), alloclist); /* use calloc to make sure buffer is zeroed */
end_p = buf + bytesPerStripe;
RF_CallocAndAdd(redundantbuf2, 1, numbytes, (char *), alloclist); /* use calloc to make sure buffer is zeroed */
rd_dag_h = rf_MakeSimpleDAG(raidPtr, stripeWidth, numbytes, buf, rf_DiskReadFunc, rf_DiskReadUndoFunc,
"Rod", alloclist, flags, RF_IO_NORMAL_PRIORITY);
blockNode = rd_dag_h->succedents[0];
unblockNode = blockNode->succedents[0]->succedents[0];
/* map the stripe and fill in the PDAs in the dag */
asm_h = rf_MapAccess(raidPtr, startAddr, layoutPtr->dataSectorsPerStripe, buf, RF_DONT_REMAP);
asmap = asm_h->stripeMap;
for (pda=asmap->physInfo,i=0; i<layoutPtr->numDataCol; i++,pda=pda->next) {
RF_ASSERT(pda);
rf_RangeRestrictPDA(raidPtr, parityPDA, pda, 0, 1);
RF_ASSERT(pda->numSector != 0);
if (rf_TryToRedirectPDA(raidPtr, pda, 0)) goto out; /* no way to verify parity if disk is dead. return w/ good status */
blockNode->succedents[i]->params[0].p = pda;
blockNode->succedents[i]->params[2].v = psID;
blockNode->succedents[i]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
}
RF_ASSERT(!asmap->parityInfo->next);
rf_RangeRestrictPDA(raidPtr, parityPDA, asmap->parityInfo, 0, 1);
RF_ASSERT(asmap->parityInfo->numSector != 0);
if (rf_TryToRedirectPDA(raidPtr, asmap->parityInfo, 1))
goto out;
blockNode->succedents[ layoutPtr->numDataCol ]->params[0].p = asmap->parityInfo;
RF_ASSERT(!asmap->qInfo->next);
rf_RangeRestrictPDA(raidPtr, parityPDA, asmap->qInfo, 0, 1);
RF_ASSERT(asmap->qInfo->numSector != 0);
if (rf_TryToRedirectPDA(raidPtr, asmap->qInfo, 1)) goto out;
/*
* if disk is dead, b/c no reconstruction is implemented right now,
* the function "rf_TryToRedirectPDA" always return one, which cause
* go to out and return w/ good status
*/
blockNode->succedents[ layoutPtr->numDataCol +1 ]->params[0].p = asmap->qInfo;
/* fire off the DAG */
bzero((char *)&tracerec,sizeof(tracerec));
rd_dag_h->tracerec = &tracerec;
if (rf_verifyParityDebug) {
printf("Parity verify read dag:\n");
rf_PrintDAGList(rd_dag_h);
}
RF_LOCK_MUTEX(mcpair->mutex);
mcpair->flag = 0;
rf_DispatchDAG(rd_dag_h, (void (*)(void *))rf_MCPairWakeupFunc,
(void *) mcpair);
while (!mcpair->flag) RF_WAIT_COND(mcpair->cond, mcpair->mutex);
RF_UNLOCK_MUTEX(mcpair->mutex);
if (rd_dag_h->status != rf_enable) {
RF_ERRORMSG("Unable to verify parity: can't read the stripe\n");
retcode = RF_PARITY_COULD_NOT_VERIFY;
goto out;
}
for (p=buf, i=0; p<end_p; p+=numbytes, i++) {
rf_e_encToBuf(raidPtr, i, p, RF_EO_MATRIX_DIM - 2, redundantbuf2, numsector);
/*
* the corresponding columes in EvenOdd encoding Matrix for these p pointers which point
* to the databuffer in a full stripe are sequentially from 0 to layoutPtr->numDataCol-1
*/
rf_bxor(p, pbuf, numbytes, NULL);
}
RF_ASSERT(i==layoutPtr->numDataCol);
for (i=0; i<numbytes; i++) {
if (pbuf[i] != buf[bytesPerStripe+i]) {
if (!correct_it) {
RF_ERRORMSG3("Parity verify error: byte %d of parity is 0x%x should be 0x%x\n",
i,(u_char) buf[bytesPerStripe+i],(u_char) pbuf[i]);
}
}
redundantOneErr = 1;
break;
}
for (i=0; i<numbytes; i++) {
if (redundantbuf2[i] != buf[bytesPerStripe+numbytes+i]) {
if (!correct_it) {
RF_ERRORMSG3("Parity verify error: byte %d of second redundant information is 0x%x should be 0x%x\n",
i,(u_char) buf[bytesPerStripe+numbytes+i],(u_char) redundantbuf2[i]);
}
redundantTwoErr = 1;
break;
}
}
if (redundantOneErr || redundantTwoErr )
retcode = RF_PARITY_BAD;
/* correct the first redundant disk, ie parity if it is error */
if (redundantOneErr && correct_it) {
wr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, numbytes, pbuf, rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
"Wnp", alloclist, flags, RF_IO_NORMAL_PRIORITY);
wrBlock = wr_dag_h->succedents[0]; wrUnblock = wrBlock->succedents[0]->succedents[0];
wrBlock->succedents[0]->params[0].p = asmap->parityInfo;
wrBlock->succedents[0]->params[2].v = psID;
wrBlock->succedents[0]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
bzero((char *)&tracerec,sizeof(tracerec));
wr_dag_h->tracerec = &tracerec;
if (rf_verifyParityDebug) {
printf("Parity verify write dag:\n");
rf_PrintDAGList(wr_dag_h);
}
RF_LOCK_MUTEX(mcpair->mutex);
mcpair->flag = 0;
rf_DispatchDAG(wr_dag_h, (void (*)(void *))rf_MCPairWakeupFunc,
(void *) mcpair);
while (!mcpair->flag)
RF_WAIT_COND(mcpair->cond, mcpair->mutex);
RF_UNLOCK_MUTEX(mcpair->mutex);
if (wr_dag_h->status != rf_enable) {
RF_ERRORMSG("Unable to correct parity in VerifyParity: can't write the stripe\n");
parity_cant_correct = RF_TRUE;
} else {
parity_corrected = RF_TRUE;
}
rf_FreeDAG(wr_dag_h);
}
if (redundantTwoErr && correct_it) {
wr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, numbytes, redundantbuf2, rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
"Wnred2", alloclist, flags, RF_IO_NORMAL_PRIORITY);
wrBlock = wr_dag_h->succedents[0]; wrUnblock = wrBlock->succedents[0]->succedents[0];
wrBlock->succedents[0]->params[0].p = asmap->qInfo;
wrBlock->succedents[0]->params[2].v = psID;
wrBlock->succedents[0]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
bzero((char *)&tracerec,sizeof(tracerec));
wr_dag_h->tracerec = &tracerec;
if (rf_verifyParityDebug) {
printf("Dag of write new second redundant information in parity verify :\n");
rf_PrintDAGList(wr_dag_h);
}
RF_LOCK_MUTEX(mcpair->mutex);
mcpair->flag = 0;
rf_DispatchDAG(wr_dag_h, (void (*)(void *))rf_MCPairWakeupFunc,
(void *) mcpair);
while (!mcpair->flag)
RF_WAIT_COND(mcpair->cond, mcpair->mutex);
RF_UNLOCK_MUTEX(mcpair->mutex);
if (wr_dag_h->status != rf_enable) {
RF_ERRORMSG("Unable to correct second redundant information in VerifyParity: can't write the stripe\n");
red2_cant_correct = RF_TRUE;
} else {
red2_corrected = RF_TRUE;
}
rf_FreeDAG(wr_dag_h);
}
if ( (redundantOneErr && parity_cant_correct) ||
(redundantTwoErr && red2_cant_correct ))
retcode = RF_PARITY_COULD_NOT_CORRECT;
if ( (retcode = RF_PARITY_BAD) && parity_corrected && red2_corrected )
retcode = RF_PARITY_CORRECTED;
out:
rf_FreeAccessStripeMap(asm_h);
rf_FreeAllocList(alloclist);
rf_FreeDAG(rd_dag_h);
rf_FreeMCPair(mcpair);
return(retcode);
}
#endif /* RF_INCLUDE_EVENODD > 0 */

View File

@ -0,0 +1,48 @@
/* $NetBSD: rf_evenodd.h,v 1.1 1998/11/13 04:20:29 oster Exp $ */
/*
* Copyright (c) 1995, 1996 Carnegie-Mellon University.
* All rights reserved.
*
* Author: Chang-Ming Wu
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
#ifndef _RF__RF_EVENODD_H_
#define _RF__RF_EVENODD_H_
/* extern declerations of the failure mode functions. */
int rf_ConfigureEvenOdd(RF_ShutdownList_t **shutdownListp, RF_Raid_t *raidPtr,
RF_Config_t *cfgPtr);
int rf_GetDefaultNumFloatingReconBuffersEvenOdd(RF_Raid_t *raidPtr);
RF_HeadSepLimit_t rf_GetDefaultHeadSepLimitEvenOdd(RF_Raid_t *raidPtr);
void rf_IdentifyStripeEvenOdd(RF_Raid_t *raidPtr, RF_RaidAddr_t addr,
RF_RowCol_t **diskids, RF_RowCol_t *outrow);
void rf_MapParityEvenOdd(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector,
RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap);
void rf_MapEEvenOdd(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector,
RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap);
void rf_EODagSelect(RF_Raid_t *raidPtr, RF_IoType_t type,
RF_AccessStripeMap_t *asmap, RF_VoidFuncPtr *createFunc);
int rf_VerifyParityEvenOdd(RF_Raid_t *raidPtr, RF_RaidAddr_t raidAddr,
RF_PhysDiskAddr_t *parityPDA, int correct_it, RF_RaidAccessFlags_t flags);
#endif /* !_RF__RF_EVENODD_H_ */

View File

@ -0,0 +1,886 @@
/* $NetBSD: rf_evenodd_dagfuncs.c,v 1.1 1998/11/13 04:20:29 oster Exp $ */
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
*
* Author: ChangMing Wu
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/*
* Code for RAID-EVENODD architecture.
*/
#include "rf_types.h"
#include "rf_raid.h"
#include "rf_dag.h"
#include "rf_dagffrd.h"
#include "rf_dagffwr.h"
#include "rf_dagdegrd.h"
#include "rf_dagdegwr.h"
#include "rf_dagutils.h"
#include "rf_dagfuncs.h"
#include "rf_threadid.h"
#include "rf_etimer.h"
#include "rf_general.h"
#include "rf_configure.h"
#include "rf_parityscan.h"
#include "rf_sys.h"
#include "rf_evenodd.h"
#include "rf_evenodd_dagfuncs.h"
/* These redundant functions are for small write */
RF_RedFuncs_t rf_EOSmallWritePFuncs = { rf_RegularXorFunc, "Regular Old-New P", rf_SimpleXorFunc, "Simple Old-New P" };
RF_RedFuncs_t rf_EOSmallWriteEFuncs = { rf_RegularONEFunc, "Regular Old-New E", rf_SimpleONEFunc, "Regular Old-New E" };
/* These redundant functions are for degraded read */
RF_RedFuncs_t rf_eoPRecoveryFuncs = { rf_RecoveryXorFunc, "Recovery Xr", rf_RecoveryXorFunc, "Recovery Xr"};
RF_RedFuncs_t rf_eoERecoveryFuncs = { rf_RecoveryEFunc, "Recovery E Func", rf_RecoveryEFunc, "Recovery E Func" };
/**********************************************************************************************
* the following encoding node functions is used in EO_000_CreateLargeWriteDAG
**********************************************************************************************/
int rf_RegularPEFunc(node)
RF_DagNode_t *node;
{
rf_RegularESubroutine(node,node->results[1]);
rf_RegularXorFunc(node); /* does the wakeup here! */
#if 1
return(0); /* XXX This was missing... GO */
#endif
}
/************************************************************************************************
* For EO_001_CreateSmallWriteDAG, there are (i)RegularONEFunc() and (ii)SimpleONEFunc() to
* be used. The previous case is when write access at least sectors of full stripe unit.
* The later function is used when the write access two stripe units but with total sectors
* less than sectors per SU. In this case, the access of parity and 'E' are shown as disconnected
* areas in their stripe unit and parity write and 'E' write are both devided into two distinct
* writes( totally four). This simple old-new write and regular old-new write happen as in RAID-5
************************************************************************************************/
/* Algorithm:
1. Store the difference of old data and new data in the Rod buffer.
2. then encode this buffer into the buffer which already have old 'E' information inside it,
the result can be shown to be the new 'E' information.
3. xor the Wnd buffer into the difference buffer to recover the original old data.
Here we have another alternative: to allocate a temporary buffer for storing the difference of
old data and new data, then encode temp buf into old 'E' buf to form new 'E', but this approach
take the same speed as the previous, and need more memory.
*/
int rf_RegularONEFunc(node)
RF_DagNode_t *node;
{
RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams-1].p;
RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) &raidPtr->Layout;
int EpdaIndex = (node->numParams-1)/2 - 1; /* the parameter of node where you can find e-pda */
int i, k, retcode = 0;
int suoffset, length;
RF_RowCol_t scol;
char *srcbuf, *destbuf;
RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
RF_Etimer_t timer;
RF_PhysDiskAddr_t *pda, *EPDA = (RF_PhysDiskAddr_t *) node->params[EpdaIndex].p;
int ESUOffset = rf_StripeUnitOffset(layoutPtr, EPDA->startSector); /* generally zero */
RF_ASSERT( EPDA->type == RF_PDA_TYPE_Q );
RF_ASSERT(ESUOffset == 0);
RF_ETIMER_START(timer);
/* Xor the Wnd buffer into Rod buffer, the difference of old data and new data is stored in Rod buffer */
for( k=0; k< EpdaIndex; k += 2) {
length = rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *)node->params[k].p)->numSector );
retcode = rf_bxor( node->params[k+EpdaIndex+3].p, node->params[k+1].p, length, node->dagHdr->bp);
}
/* Start to encoding the buffer storing the difference of old data and new data into 'E' buffer */
for (i=0; i<EpdaIndex; i+=2) if (node->params[i+1].p != node->results[0]) { /* results[0] is buf ptr of E */
pda = (RF_PhysDiskAddr_t *) node->params[i].p;
srcbuf = (char *) node->params[i+1].p;
scol = rf_EUCol(layoutPtr, pda->raidAddress );
suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr,suoffset);
rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector);
}
/* Recover the original old data to be used by parity encoding function in XorNode */
for( k=0; k< EpdaIndex; k += 2) {
length = rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *)node->params[k].p)->numSector );
retcode = rf_bxor( node->params[k+EpdaIndex+3].p, node->params[k+1].p, length, node->dagHdr->bp);
}
RF_ETIMER_STOP(timer);
RF_ETIMER_EVAL(timer);
tracerec->q_us += RF_ETIMER_VAL_US(timer);
rf_GenericWakeupFunc(node, 0);
#if 1
return(0); /* XXX this was missing.. GO */
#endif
}
int rf_SimpleONEFunc(node)
RF_DagNode_t *node;
{
RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams-1].p;
RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) &raidPtr->Layout;
RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p;
int retcode = 0;
char *srcbuf, *destbuf;
RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
int length;
RF_RowCol_t scol;
RF_Etimer_t timer;
RF_ASSERT( ((RF_PhysDiskAddr_t *)node->params[2].p)->type == RF_PDA_TYPE_Q );
if (node->dagHdr->status == rf_enable) {
RF_ETIMER_START(timer);
length = rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *)node->params[4].p)->numSector );/* this is a pda of writeDataNodes */
/* bxor to buffer of readDataNodes */
retcode = rf_bxor( node->params[5].p, node->params[1].p, length, node->dagHdr->bp);
/* find out the corresponding colume in encoding matrix for write colume to be encoded into redundant disk 'E' */
scol = rf_EUCol(layoutPtr, pda->raidAddress );
srcbuf = node->params[1].p;
destbuf = node->params[3].p;
/* Start encoding process */
rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector);
rf_bxor( node->params[5].p, node->params[1].p, length, node->dagHdr->bp);
RF_ETIMER_STOP(timer); RF_ETIMER_EVAL(timer); tracerec->q_us += RF_ETIMER_VAL_US(timer);
}
return(rf_GenericWakeupFunc(node, retcode)); /* call wake func explicitly since no I/O in this node */
}
/****** called by rf_RegularPEFunc(node) and rf_RegularEFunc(node) in f.f. large write ********/
void rf_RegularESubroutine(node, ebuf)
RF_DagNode_t *node;
char *ebuf;
{
RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams-1].p;
RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) &raidPtr->Layout;
RF_PhysDiskAddr_t *pda;
int i, suoffset;
RF_RowCol_t scol;
char *srcbuf, *destbuf;
RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
RF_Etimer_t timer;
RF_ETIMER_START(timer);
for (i=0; i<node->numParams-2; i+=2) {
RF_ASSERT( node->params[i+1].p != ebuf );
pda = (RF_PhysDiskAddr_t *) node->params[i].p;
suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
scol = rf_EUCol(layoutPtr, pda->raidAddress );
srcbuf = (char *) node->params[i+1].p;
destbuf = ebuf + rf_RaidAddressToByte(raidPtr, suoffset );
rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector);
}
RF_ETIMER_STOP(timer);
RF_ETIMER_EVAL(timer);
tracerec->xor_us += RF_ETIMER_VAL_US(timer);
}
/*******************************************************************************************
* Used in EO_001_CreateLargeWriteDAG
******************************************************************************************/
int rf_RegularEFunc(node)
RF_DagNode_t *node;
{
rf_RegularESubroutine(node, node->results[0]);
rf_GenericWakeupFunc(node, 0);
#if 1
return(0); /* XXX this was missing?.. GO */
#endif
}
/*******************************************************************************************
* This degraded function allow only two case:
* 1. when write access the full failed stripe unit, then the access can be more than
* one tripe units.
* 2. when write access only part of the failed SU, we assume accesses of more than
* one stripe unit is not allowed so that the write can be dealt with like a
* large write.
* The following function is based on these assumptions. So except in the second case,
* it looks the same as a large write encodeing function. But this is not exactly the
* normal way for doing a degraded write, since raidframe have to break cases of access
* other than the above two into smaller accesses. We may have to change
* DegrESubroutin in the future.
*******************************************************************************************/
void rf_DegrESubroutine(node, ebuf)
RF_DagNode_t *node;
char *ebuf;
{
RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams-1].p;
RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) &raidPtr->Layout;
RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams-2].p;
RF_PhysDiskAddr_t *pda;
int i, suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector);
RF_RowCol_t scol;
char *srcbuf, *destbuf;
RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
RF_Etimer_t timer;
RF_ETIMER_START(timer);
for (i=0; i<node->numParams-2; i+=2) {
RF_ASSERT( node->params[i+1].p != ebuf );
pda = (RF_PhysDiskAddr_t *) node->params[i].p;
suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
scol = rf_EUCol(layoutPtr, pda->raidAddress );
srcbuf = (char *) node->params[i+1].p;
destbuf = ebuf + rf_RaidAddressToByte(raidPtr, suoffset-failedSUOffset);
rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector);
}
RF_ETIMER_STOP(timer); RF_ETIMER_EVAL(timer); tracerec->q_us += RF_ETIMER_VAL_US(timer);
}
/**************************************************************************************
* This function is used in case where one data disk failed and both redundant disks
* alive. It is used in the EO_100_CreateWriteDAG. Note: if there is another disk
* failed in the stripe but not accessed at this time, then we should, instead, use
* the rf_EOWriteDoubleRecoveryFunc().
**************************************************************************************/
int rf_Degraded_100_EOFunc(node)
RF_DagNode_t *node;
{
rf_DegrESubroutine(node, node->results[1]);
rf_RecoveryXorFunc(node); /* does the wakeup here! */
#if 1
return(0); /* XXX this was missing... SHould these be void functions??? GO */
#endif
}
/**************************************************************************************
* This function is to encode one sector in one of the data disks to the E disk.
* However, in evenodd this function can also be used as decoding function to recover
* data from dead disk in the case of parity failure and a single data failure.
**************************************************************************************/
void rf_e_EncOneSect(
RF_RowCol_t srcLogicCol,
char *srcSecbuf,
RF_RowCol_t destLogicCol,
char *destSecbuf,
int bytesPerSector)
{
int S_index; /* index of the EU in the src col which need be Xored into all EUs in a dest sector */
int numRowInEncMatix = (RF_EO_MATRIX_DIM) -1;
RF_RowCol_t j, indexInDest, /* row index of an encoding unit in the destination colume of encoding matrix */
indexInSrc; /* row index of an encoding unit in the source colume used for recovery */
int bytesPerEU = bytesPerSector/numRowInEncMatix;
#if RF_EO_MATRIX_DIM > 17
int shortsPerEU = bytesPerEU/sizeof(short);
short *destShortBuf, *srcShortBuf1, *srcShortBuf2;
register short temp1;
#elif RF_EO_MATRIX_DIM == 17
int longsPerEU = bytesPerEU/sizeof(long);
long *destLongBuf, *srcLongBuf1, *srcLongBuf2;
register long temp1;
#endif
#if RF_EO_MATRIX_DIM > 17
RF_ASSERT( sizeof(short) == 2 || sizeof(short) == 1 );
RF_ASSERT( bytesPerEU % sizeof(short) == 0 );
#elif RF_EO_MATRIX_DIM == 17
RF_ASSERT( sizeof(long) == 8 || sizeof(long) == 4 );
RF_ASSERT( bytesPerEU % sizeof(long) == 0);
#endif
S_index = rf_EO_Mod( ( RF_EO_MATRIX_DIM -1 + destLogicCol - srcLogicCol), RF_EO_MATRIX_DIM);
#if RF_EO_MATRIX_DIM > 17
srcShortBuf1 = (short *)(srcSecbuf + S_index * bytesPerEU);
#elif RF_EO_MATRIX_DIM == 17
srcLongBuf1 = (long *)(srcSecbuf + S_index * bytesPerEU);
#endif
for( indexInDest = 0; indexInDest < numRowInEncMatix ; indexInDest++){
indexInSrc = rf_EO_Mod( (indexInDest + destLogicCol - srcLogicCol), RF_EO_MATRIX_DIM );
#if RF_EO_MATRIX_DIM > 17
destShortBuf = (short *)(destSecbuf + indexInDest * bytesPerEU);
srcShortBuf2 = (short *)(srcSecbuf + indexInSrc * bytesPerEU);
for(j=0; j < shortsPerEU; j++) {
temp1 = destShortBuf[j]^srcShortBuf1[j];
/* note: S_index won't be at the end row for any src col! */
if(indexInSrc != RF_EO_MATRIX_DIM -1) destShortBuf[j] = (srcShortBuf2[j])^temp1;
/* if indexInSrc is at the end row, ie. RF_EO_MATRIX_DIM -1, then all elements are zero! */
else destShortBuf[j] = temp1;
}
#elif RF_EO_MATRIX_DIM == 17
destLongBuf = (long *)(destSecbuf + indexInDest * bytesPerEU);
srcLongBuf2 = (long *)(srcSecbuf + indexInSrc * bytesPerEU);
for(j=0; j < longsPerEU; j++) {
temp1 = destLongBuf[j]^srcLongBuf1[j];
if(indexInSrc != RF_EO_MATRIX_DIM -1) destLongBuf[j] = (srcLongBuf2[j])^temp1;
else destLongBuf[j] = temp1;
}
#endif
}
}
void rf_e_encToBuf(
RF_Raid_t *raidPtr,
RF_RowCol_t srcLogicCol,
char *srcbuf,
RF_RowCol_t destLogicCol,
char *destbuf,
int numSector)
{
int i, bytesPerSector = rf_RaidAddressToByte(raidPtr, 1);
for (i=0; i < numSector; i++)
{
rf_e_EncOneSect( srcLogicCol, srcbuf, destLogicCol, destbuf, bytesPerSector);
srcbuf += bytesPerSector;
destbuf += bytesPerSector;
}
}
/**************************************************************************************
* when parity die and one data die, We use second redundant information, 'E',
* to recover the data in dead disk. This function is used in the recovery node of
* for EO_110_CreateReadDAG
**************************************************************************************/
int rf_RecoveryEFunc(node)
RF_DagNode_t *node;
{
RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams-1].p;
RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) &raidPtr->Layout;
RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams-2].p;
RF_RowCol_t scol, /*source logical column*/
fcol = rf_EUCol(layoutPtr, failedPDA->raidAddress ); /* logical column of failed SU */
int i;
RF_PhysDiskAddr_t *pda;
int suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr,failedPDA->startSector);
char *srcbuf, *destbuf;
RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
RF_Etimer_t timer;
bzero( (char *)node->results[0], rf_RaidAddressToByte(raidPtr,failedPDA->numSector));
if (node->dagHdr->status == rf_enable) {
RF_ETIMER_START(timer);
for (i=0; i<node->numParams-2; i+=2) if (node->params[i+1].p != node->results[0]) {
pda = (RF_PhysDiskAddr_t *) node->params[i].p;
if( i == node->numParams - 4 ) scol = RF_EO_MATRIX_DIM - 2; /* the colume of redundant E */
else scol = rf_EUCol(layoutPtr, pda->raidAddress );
srcbuf = (char *) node->params[i+1].p;
suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr,suoffset-failedSUOffset);
rf_e_encToBuf(raidPtr, scol, srcbuf, fcol, destbuf, pda->numSector);
}
RF_ETIMER_STOP(timer);
RF_ETIMER_EVAL(timer);
tracerec->xor_us += RF_ETIMER_VAL_US(timer);
}
return (rf_GenericWakeupFunc(node, 0)); /* node execute successfully */
}
/**************************************************************************************
* This function is used in the case where one data and the parity have filed.
* (in EO_110_CreateWriteDAG )
**************************************************************************************/
int rf_EO_DegradedWriteEFunc(RF_DagNode_t *node)
{
rf_DegrESubroutine(node, node->results[0]);
rf_GenericWakeupFunc(node, 0);
#if 1
return(0); /* XXX Yet another one!! GO */
#endif
}
/**************************************************************************************
* THE FUNCTION IS FOR DOUBLE DEGRADED READ AND WRITE CASES
**************************************************************************************/
void rf_doubleEOdecode(
RF_Raid_t *raidPtr,
char **rrdbuf,
char **dest,
RF_RowCol_t *fcol,
char *pbuf,
char *ebuf)
{
RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) &(raidPtr->Layout);
int i, j, k, f1, f2, row;
int rrdrow, erow, count = 0;
int bytesPerSector = rf_RaidAddressToByte(raidPtr, 1 );
int numRowInEncMatix = (RF_EO_MATRIX_DIM) -1;
#if 0
int pcol = (RF_EO_MATRIX_DIM) - 1;
#endif
int ecol = (RF_EO_MATRIX_DIM) - 2;
int bytesPerEU = bytesPerSector/numRowInEncMatix;
int numDataCol = layoutPtr->numDataCol;
#if RF_EO_MATRIX_DIM > 17
int shortsPerEU = bytesPerEU/sizeof(short);
short *rrdbuf_current, *pbuf_current, *ebuf_current;
short *dest_smaller, *dest_smaller_current, *dest_larger, *dest_larger_current;
register short *temp;
short *P;
RF_ASSERT( bytesPerEU % sizeof(short) == 0);
RF_Malloc(P, bytesPerEU, (short *));
RF_Malloc(temp, bytesPerEU, (short *));
#elif RF_EO_MATRIX_DIM == 17
int longsPerEU = bytesPerEU/sizeof(long);
long *rrdbuf_current, *pbuf_current, *ebuf_current;
long *dest_smaller, *dest_smaller_current, *dest_larger, *dest_larger_current;
register long *temp;
long *P;
RF_ASSERT( bytesPerEU % sizeof(long) == 0);
RF_Malloc(P, bytesPerEU, (long *));
RF_Malloc(temp, bytesPerEU, (long *));
#endif
RF_ASSERT( *((long *)dest[0]) == 0);
RF_ASSERT( *((long *)dest[1]) == 0);
bzero((char *)P, bytesPerEU);
bzero((char *)temp, bytesPerEU);
RF_ASSERT( *P == 0 );
/* calculate the 'P' parameter, which, not parity, is the Xor of all elements in
the last two column, ie. 'E' and 'parity' colume, see the Ref. paper by Blaum, et al 1993 */
for( i=0; i< numRowInEncMatix; i++)
for( k=0; k< longsPerEU; k++) {
#if RF_EO_MATRIX_DIM > 17
ebuf_current = ((short *)ebuf) + i*shortsPerEU + k;
pbuf_current = ((short *)pbuf) + i*shortsPerEU + k;
#elif RF_EO_MATRIX_DIM == 17
ebuf_current = ((long *)ebuf) + i*longsPerEU + k;
pbuf_current = ((long *)pbuf) + i*longsPerEU + k;
#endif
P[k] ^= *ebuf_current;
P[k] ^= *pbuf_current;
}
RF_ASSERT( fcol[0] != fcol[1] );
if( fcol[0] < fcol[1] ) {
#if RF_EO_MATRIX_DIM > 17
dest_smaller = (short *)(dest[0]);
dest_larger = (short *)(dest[1]);
#elif RF_EO_MATRIX_DIM == 17
dest_smaller = (long *)(dest[0]);
dest_larger = (long *)(dest[1]);
#endif
f1 = fcol[0];
f2 = fcol[1];
}
else {
#if RF_EO_MATRIX_DIM > 17
dest_smaller = (short *)(dest[1]);
dest_larger = (short *)(dest[0]);
#elif RF_EO_MATRIX_DIM == 17
dest_smaller = (long *)(dest[1]);
dest_larger = (long *)(dest[0]);
#endif
f1 = fcol[1];
f2 = fcol[0];
}
row = (RF_EO_MATRIX_DIM) -1;
while( (row = rf_EO_Mod( (row+f1-f2), RF_EO_MATRIX_DIM )) != ( (RF_EO_MATRIX_DIM) -1) )
{
#if RF_EO_MATRIX_DIM > 17
dest_larger_current = dest_larger + row*shortsPerEU;
dest_smaller_current = dest_smaller + row*shortsPerEU;
#elif RF_EO_MATRIX_DIM == 17
dest_larger_current = dest_larger + row*longsPerEU;
dest_smaller_current = dest_smaller + row*longsPerEU;
#endif
/** Do the diagonal recovery. Initially, temp[k] = (failed 1),
which is the failed data in the colume which has smaller col index. **/
/* step 1: ^(SUM of nonfailed in-diagonal A(rrdrow,0..m-3)) */
for( j=0; j< numDataCol; j++)
{
if( j == f1 || j == f2 ) continue;
rrdrow = rf_EO_Mod( (row+f2-j), RF_EO_MATRIX_DIM );
if ( rrdrow != (RF_EO_MATRIX_DIM) -1 ) {
#if RF_EO_MATRIX_DIM > 17
rrdbuf_current = (short *)(rrdbuf[j]) + rrdrow * shortsPerEU;
for (k=0; k< shortsPerEU; k++) temp[k] ^= *(rrdbuf_current + k);
#elif RF_EO_MATRIX_DIM == 17
rrdbuf_current = (long *)(rrdbuf[j]) + rrdrow * longsPerEU;
for (k=0; k< longsPerEU; k++) temp[k] ^= *(rrdbuf_current + k);
#endif
}
}
/* step 2: ^E(erow,m-2), If erow is at the buttom row, don't Xor into it
E(erow,m-2) = (principle diagonal) ^ (failed 1) ^ (failed 2)
^ ( SUM of nonfailed in-diagonal A(rrdrow,0..m-3) )
After this step, temp[k] = (principle diagonal) ^ (failed 2) */
erow = rf_EO_Mod( (row+f2-ecol), (RF_EO_MATRIX_DIM) );
if ( erow != (RF_EO_MATRIX_DIM) -1) {
#if RF_EO_MATRIX_DIM > 17
ebuf_current = (short *)ebuf + shortsPerEU * erow;
for (k=0; k< shortsPerEU; k++) temp[k] ^= *(ebuf_current+k);
#elif RF_EO_MATRIX_DIM == 17
ebuf_current = (long *)ebuf + longsPerEU * erow;
for (k=0; k< longsPerEU; k++) temp[k] ^= *(ebuf_current+k);
#endif
}
/* step 3: ^P to obtain the failed data (failed 2).
P can be proved to be actually (principle diagonal)
After this step, temp[k] = (failed 2), the failed data to be recovered */
#if RF_EO_MATRIX_DIM > 17
for (k=0; k< shortsPerEU; k++) temp[k] ^= P[k];
/* Put the data to the destination buffer */
for (k=0; k< shortsPerEU; k++) dest_larger_current[k] = temp[k];
#elif RF_EO_MATRIX_DIM == 17
for (k=0; k< longsPerEU; k++) temp[k] ^= P[k];
/* Put the data to the destination buffer */
for (k=0; k< longsPerEU; k++) dest_larger_current[k] = temp[k];
#endif
/** THE FOLLOWING DO THE HORIZONTAL XOR **/
/* step 1: ^(SUM of A(row,0..m-3)), ie. all nonfailed data columes */
for (j=0; j< numDataCol; j++)
{
if( j == f1 || j == f2 ) continue;
#if RF_EO_MATRIX_DIM > 17
rrdbuf_current = (short *)(rrdbuf[j]) + row * shortsPerEU;
for (k=0; k< shortsPerEU; k++) temp[k] ^= *(rrdbuf_current+k);
#elif RF_EO_MATRIX_DIM == 17
rrdbuf_current = (long *)(rrdbuf[j]) + row * longsPerEU;
for (k=0; k< longsPerEU; k++) temp[k] ^= *(rrdbuf_current+k);
#endif
}
/* step 2: ^A(row,m-1) */
/* step 3: Put the data to the destination buffer */
#if RF_EO_MATRIX_DIM > 17
pbuf_current = (short *)pbuf + shortsPerEU * row;
for (k=0; k< shortsPerEU; k++) temp[k] ^= *(pbuf_current+k);
for (k=0; k< shortsPerEU; k++) dest_smaller_current[k] = temp[k];
#elif RF_EO_MATRIX_DIM == 17
pbuf_current = (long *)pbuf + longsPerEU * row;
for (k=0; k< longsPerEU; k++) temp[k] ^= *(pbuf_current+k);
for (k=0; k< longsPerEU; k++) dest_smaller_current[k] = temp[k];
#endif
count++;
}
/* Check if all Encoding Unit in the data buffer have been decoded,
according EvenOdd theory, if "RF_EO_MATRIX_DIM" is a prime number,
this algorithm will covered all buffer */
RF_ASSERT( count == numRowInEncMatix );
RF_Free((char *)P, bytesPerEU);
RF_Free((char *)temp, bytesPerEU);
}
/***************************************************************************************
* This function is called by double degragded read
* EO_200_CreateReadDAG
*
***************************************************************************************/
int rf_EvenOddDoubleRecoveryFunc(node)
RF_DagNode_t *node;
{
int ndataParam = 0;
int np = node->numParams;
RF_AccessStripeMap_t *asmap = (RF_AccessStripeMap_t *) node->params[np-1].p;
RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np-2].p;
RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) &(raidPtr->Layout);
int i, prm, sector, nresults = node->numResults;
RF_SectorCount_t secPerSU = layoutPtr->sectorsPerStripeUnit;
unsigned sosAddr;
int two = 0, mallc_one= 0, mallc_two = 0; /* flags to indicate if memory is allocated */
int bytesPerSector = rf_RaidAddressToByte(raidPtr, 1 );
RF_PhysDiskAddr_t *ppda,*ppda2,*epda,*epda2,*pda, *pda0, *pda1, npda;
RF_RowCol_t fcol[2], fsuoff[2], fsuend[2], numDataCol = layoutPtr->numDataCol;
char **buf, *ebuf, *pbuf, *dest[2];
long *suoff=NULL, *suend=NULL, *prmToCol=NULL, psuoff, esuoff;
RF_SectorNum_t startSector, endSector;
RF_Etimer_t timer;
RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
RF_ETIMER_START(timer);
/* Find out the number of parameters which are pdas for data information */
for (i = 0; i<= np; i++)
if( ((RF_PhysDiskAddr_t *)node->params[i].p)->type != RF_PDA_TYPE_DATA) {ndataParam = i ; break; }
RF_Malloc(buf, numDataCol*sizeof(char *), (char **));
if (ndataParam != 0 ){
RF_Malloc(suoff, ndataParam*sizeof(long), (long *) );
RF_Malloc(suend, ndataParam*sizeof(long), (long *) );
RF_Malloc(prmToCol, ndataParam*sizeof(long), (long *) );
}
if (asmap->failedPDAs[1] &&
(asmap->failedPDAs[1]->numSector + asmap->failedPDAs[0]->numSector < secPerSU)) {
RF_ASSERT(0); /* currently, no support for this situation */
ppda = node->params[np-6].p;
ppda2 = node->params[np-5].p;
RF_ASSERT( ppda2->type == RF_PDA_TYPE_PARITY );
epda = node->params[np-4].p;
epda2 = node->params[np-3].p;
RF_ASSERT( epda2->type == RF_PDA_TYPE_Q );
two = 1;
}
else {
ppda = node->params[np-4].p;
epda = node->params[np-3].p;
psuoff = rf_StripeUnitOffset(layoutPtr, ppda->startSector);
esuoff = rf_StripeUnitOffset(layoutPtr, epda->startSector);
RF_ASSERT( psuoff == esuoff );
}
/*
the followings have three goals:
1. determine the startSector to begin decoding and endSector to end decoding.
2. determine the colume numbers of the two failed disks.
3. determine the offset and end offset of the access within each failed stripe unit.
*/
if( nresults == 1 ) {
/* find the startSector to begin decoding */
pda = node->results[0];
bzero(pda->bufPtr, bytesPerSector*pda->numSector );
fsuoff[0] = rf_StripeUnitOffset(layoutPtr, pda->startSector );
fsuend[0] = fsuoff[0] + pda->numSector;
startSector = fsuoff[0];
endSector = fsuend[0];
/* find out the the column of failed disk being accessed */
fcol[0] = rf_EUCol(layoutPtr, pda->raidAddress );
/* find out the other failed colume not accessed */
sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress);
for (i=0; i < numDataCol; i++) {
npda.raidAddress = sosAddr + (i * secPerSU);
(raidPtr->Layout.map->MapSector)(raidPtr, npda.raidAddress, &(npda.row), &(npda.col), &(npda.startSector), 0);
/* skip over dead disks */
if (RF_DEAD_DISK(raidPtr->Disks[npda.row][npda.col].status))
if (i != fcol[0]) break;
}
RF_ASSERT (i < numDataCol);
fcol[1] = i;
}
else {
RF_ASSERT ( nresults == 2 );
pda0 = node->results[0]; bzero(pda0->bufPtr, bytesPerSector*pda0->numSector );
pda1 = node->results[1]; bzero(pda1->bufPtr, bytesPerSector*pda1->numSector );
/* determine the failed colume numbers of the two failed disks. */
fcol[0] = rf_EUCol(layoutPtr, pda0->raidAddress );
fcol[1] = rf_EUCol(layoutPtr, pda1->raidAddress );
/* determine the offset and end offset of the access within each failed stripe unit. */
fsuoff[0] = rf_StripeUnitOffset(layoutPtr, pda0->startSector );
fsuend[0] = fsuoff[0] + pda0->numSector;
fsuoff[1] = rf_StripeUnitOffset(layoutPtr, pda1->startSector );
fsuend[1] = fsuoff[1] + pda1->numSector;
/* determine the startSector to begin decoding */
startSector = RF_MIN( pda0->startSector, pda1->startSector );
/* determine the endSector to end decoding */
endSector = RF_MAX( fsuend[0], fsuend[1] );
}
/*
assign the beginning sector and the end sector for each parameter
find out the corresponding colume # for each parameter
*/
for( prm=0; prm < ndataParam; prm++ ) {
pda = node->params[prm].p;
suoff[prm] = rf_StripeUnitOffset(layoutPtr, pda->startSector);
suend[prm] = suoff[prm] + pda->numSector;
prmToCol[prm] = rf_EUCol(layoutPtr, pda->raidAddress );
}
/* 'sector' is the sector for the current decoding algorithm. For each sector in the failed SU,
find out the corresponding parameters that cover the current sector and that are needed for
decoding of this sector in failed SU. 2. Find out if sector is in the shadow of any accessed
failed SU. If not, malloc a temporary space of a sector in size.
*/
for( sector = startSector; sector < endSector; sector++ ){
if ( nresults == 2 )
if( !(fsuoff[0]<=sector && sector<fsuend[0]) && !(fsuoff[1]<=sector && sector<fsuend[1]) )continue;
for( prm=0; prm < ndataParam; prm++ )
if( suoff[prm] <= sector && sector < suend[prm] )
buf[(prmToCol[prm])] = ((RF_PhysDiskAddr_t *)node->params[prm].p)->bufPtr +
rf_RaidAddressToByte(raidPtr, sector-suoff[prm]);
/* find out if sector is in the shadow of any accessed failed SU. If yes, assign dest[0], dest[1] to point
at suitable position of the buffer corresponding to failed SUs. if no, malloc a temporary space of
a sector in size for destination of decoding.
*/
RF_ASSERT( nresults == 1 || nresults == 2 );
if ( nresults == 1) {
dest[0] = ((RF_PhysDiskAddr_t *)node->results[0])->bufPtr + rf_RaidAddressToByte(raidPtr, sector-fsuoff[0]);
/* Always malloc temp buffer to dest[1] */
RF_Malloc( dest[1], bytesPerSector, (char *) );
bzero(dest[1],bytesPerSector); mallc_two = 1; }
else {
if( fsuoff[0] <= sector && sector < fsuend[0] )
dest[0] = ((RF_PhysDiskAddr_t *)node->results[0])->bufPtr + rf_RaidAddressToByte(raidPtr, sector-fsuoff[0]);
else { RF_Malloc( dest[0], bytesPerSector, (char *) );
bzero(dest[0],bytesPerSector); mallc_one = 1; }
if( fsuoff[1] <= sector && sector < fsuend[1] )
dest[1] = ((RF_PhysDiskAddr_t *)node->results[1])->bufPtr + rf_RaidAddressToByte(raidPtr, sector-fsuoff[1]);
else { RF_Malloc( dest[1], bytesPerSector, (char *) );
bzero(dest[1],bytesPerSector); mallc_two = 1; }
RF_ASSERT( mallc_one == 0 || mallc_two == 0 );
}
pbuf = ppda->bufPtr + rf_RaidAddressToByte(raidPtr, sector-psuoff );
ebuf = epda->bufPtr + rf_RaidAddressToByte(raidPtr, sector-esuoff );
/*
* After finish finding all needed sectors, call doubleEOdecode function for decoding
* one sector to destination.
*/
rf_doubleEOdecode(raidPtr, buf, dest, fcol, pbuf, ebuf );
/* free all allocated memory, and mark flag to indicate no memory is being allocated */
if( mallc_one == 1) RF_Free( dest[0], bytesPerSector );
if( mallc_two == 1) RF_Free( dest[1], bytesPerSector );
mallc_one = mallc_two = 0;
}
RF_Free(buf, numDataCol*sizeof(char *));
if (ndataParam != 0){
RF_Free(suoff, ndataParam*sizeof(long));
RF_Free(suend, ndataParam*sizeof(long));
RF_Free(prmToCol, ndataParam*sizeof(long));
}
RF_ETIMER_STOP(timer);
RF_ETIMER_EVAL(timer);
if (tracerec) {
tracerec->q_us += RF_ETIMER_VAL_US(timer);
}
rf_GenericWakeupFunc(node,0);
#if 1
return(0); /* XXX is this even close!!?!?!!? GO */
#endif
}
/* currently, only access of one of the two failed SU is allowed in this function.
* also, asmap->numStripeUnitsAccessed is limited to be one, the RaidFrame will break large access into
* many accesses of single stripe unit.
*/
int rf_EOWriteDoubleRecoveryFunc(node)
RF_DagNode_t *node;
{
int np = node->numParams;
RF_AccessStripeMap_t *asmap = (RF_AccessStripeMap_t *) node->params[np-1].p;
RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np-2].p;
RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) &(raidPtr->Layout);
RF_SectorNum_t sector;
RF_RowCol_t col, scol;
int prm, i, j;
RF_SectorCount_t secPerSU = layoutPtr->sectorsPerStripeUnit;
unsigned sosAddr;
unsigned bytesPerSector = rf_RaidAddressToByte(raidPtr, 1 );
RF_int64 numbytes;
RF_SectorNum_t startSector, endSector;
RF_PhysDiskAddr_t *ppda,*epda,*pda, *fpda, npda;
RF_RowCol_t fcol[2], numDataCol = layoutPtr->numDataCol;
char **buf; /* buf[0], buf[1], buf[2], ...etc. point to buffer storing data read from col0, col1, col2 */
char *ebuf, *pbuf, *dest[2], *olddata[2];
RF_Etimer_t timer;
RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
RF_ASSERT( asmap->numDataFailed == 1 ); /* currently only support this case, the other failed SU is not being accessed */
RF_ETIMER_START(timer);
RF_Malloc(buf, numDataCol*sizeof(char *), (char **));
ppda = node->results[0]; /* Instead of being buffers, node->results[0] and [1] are Ppda and Epda */
epda = node->results[1];
fpda = asmap->failedPDAs[0];
/* First, recovery the failed old SU using EvenOdd double decoding */
/* determine the startSector and endSector for decoding */
startSector = rf_StripeUnitOffset(layoutPtr, fpda->startSector );
endSector = startSector + fpda->numSector;
/* Assign buf[col] pointers to point to each non-failed colume and initialize the pbuf
and ebuf to point at the beginning of each source buffers and destination buffers */
for( prm=0; prm < numDataCol-2; prm++ ) {
pda = (RF_PhysDiskAddr_t *)node->params[prm].p;
col = rf_EUCol(layoutPtr, pda->raidAddress );
buf[col] = pda->bufPtr;
}
/* pbuf and ebuf: they will change values as double recovery decoding goes on */
pbuf = ppda->bufPtr;
ebuf = epda->bufPtr;
/* find out the logical colume numbers in the encoding matrix of the two failed columes */
fcol[0] = rf_EUCol(layoutPtr, fpda->raidAddress );
/* find out the other failed colume not accessed this time */
sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress);
for (i=0; i < numDataCol; i++) {
npda.raidAddress = sosAddr + (i * secPerSU);
(raidPtr->Layout.map->MapSector)(raidPtr, npda.raidAddress, &(npda.row), &(npda.col), &(npda.startSector), 0);
/* skip over dead disks */
if (RF_DEAD_DISK(raidPtr->Disks[npda.row][npda.col].status))
if (i != fcol[0]) break;
}
RF_ASSERT (i < numDataCol);
fcol[1] = i;
/* assign temporary space to put recovered failed SU */
numbytes = fpda->numSector * bytesPerSector;
RF_Malloc(olddata[0], numbytes, (char *) );
RF_Malloc(olddata[1], numbytes, (char *) );
dest[0] = olddata[0];
dest[1] = olddata[1];
bzero(olddata[0], numbytes);
bzero(olddata[1], numbytes);
/* Begin the recovery decoding, initially buf[j], ebuf, pbuf, dest[j] have already
pointed at the beginning of each source buffers and destination buffers */
for( sector = startSector, i=0; sector < endSector; sector++ , i++){
rf_doubleEOdecode(raidPtr, buf, dest, fcol, pbuf, ebuf );
for (j=0; j < numDataCol; j++)
if( ( j != fcol[0]) && ( j != fcol[1] ) ) buf[j] += bytesPerSector;
dest[0] += bytesPerSector;
dest[1] += bytesPerSector;
ebuf += bytesPerSector;
pbuf += bytesPerSector;
}
/* after recovery, the buffer pointed by olddata[0] is the old failed data.
With new writing data and this old data, use small write to calculate
the new redundant informations
*/
/* node->params[ 0, ... PDAPerDisk * (numDataCol - 2)-1 ] are Pdas of Rrd;
params[ PDAPerDisk*(numDataCol - 2), ... PDAPerDisk*numDataCol -1 ] are Pdas of Rp, ( Rp2 ), Re, ( Re2 ) ;
params[ PDAPerDisk*numDataCol, ... PDAPerDisk*numDataCol +asmap->numStripeUnitsAccessed -asmap->numDataFailed-1]
are Pdas of wudNodes;
For current implementation, we assume the simplest case:
asmap->numStripeUnitsAccessed == 1 and asmap->numDataFailed == 1 ie. PDAPerDisk = 1
then node->params[numDataCol] must be the new data to be writen to the failed disk. We first bxor the new data
into the old recovered data, then do the same things as small write.
*/
rf_bxor( ((RF_PhysDiskAddr_t *)node->params[numDataCol].p)->bufPtr, olddata[0], numbytes, node->dagHdr->bp);
/* do new 'E' calculation */
/* find out the corresponding colume in encoding matrix for write colume to be encoded into redundant disk 'E' */
scol = rf_EUCol(layoutPtr, fpda->raidAddress );
/* olddata[0] now is source buffer pointer; epda->bufPtr is the dest buffer pointer */
rf_e_encToBuf(raidPtr, scol, olddata[0], RF_EO_MATRIX_DIM - 2, epda->bufPtr, fpda->numSector);
/* do new 'P' calculation */
rf_bxor( olddata[0], ppda->bufPtr, numbytes, node->dagHdr->bp);
/* Free the allocated buffer */
RF_Free( olddata[0], numbytes );
RF_Free( olddata[1], numbytes );
RF_Free( buf, numDataCol*sizeof(char *));
RF_ETIMER_STOP(timer);
RF_ETIMER_EVAL(timer);
if (tracerec) {
tracerec->q_us += RF_ETIMER_VAL_US(timer);
}
rf_GenericWakeupFunc(node,0);
return(0);
}

View File

@ -0,0 +1,76 @@
/* $NetBSD: rf_evenodd_dagfuncs.h,v 1.1 1998/11/13 04:20:29 oster Exp $ */
/*
* rf_evenodd_dagfuncs.h
*/
/*
* Copyright (c) 1996 Carnegie-Mellon University.
* All rights reserved.
*
* Author: Chang-Ming Wu
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
#ifndef _RF__RF_EVENODD_DAGFUNCS_H_
#define _RF__RF_EVENODD_DAGFUNCS_H_
extern RF_RedFuncs_t rf_EOSmallWriteEFuncs;
extern RF_RedFuncs_t rf_EOSmallWritePFuncs;
extern RF_RedFuncs_t rf_eoERecoveryFuncs;
extern RF_RedFuncs_t rf_eoPRecoveryFuncs;
extern RF_RedFuncs_t rf_eoERecoveryFuncs;
int rf_RegularPEFunc(RF_DagNode_t *node);
int rf_RegularONEFunc(RF_DagNode_t *node);
int rf_SimpleONEFunc(RF_DagNode_t *node);
void rf_RegularESubroutine(RF_DagNode_t *node, char *ebuf);
int rf_RegularEFunc(RF_DagNode_t *node);
void rf_DegrESubroutine(RF_DagNode_t *node, char *ebuf);
int rf_Degraded_100_EOFunc(RF_DagNode_t *node);
void rf_e_EncOneSect(RF_RowCol_t srcLogicCol, char *srcSecbuf,
RF_RowCol_t destLogicCol, char *destSecbuf, int bytesPerSector);
void rf_e_encToBuf(RF_Raid_t *raidPtr, RF_RowCol_t srcLogicCol,
char *srcbuf, RF_RowCol_t destLogicCol, char *destbuf, int numSector);
int rf_RecoveryEFunc(RF_DagNode_t *node);
int rf_EO_DegradedWriteEFunc(RF_DagNode_t *node);
void rf_doubleEOdecode(RF_Raid_t *raidPtr, char **rrdbuf, char **dest,
RF_RowCol_t *fcol, char *pbuf, char *ebuf);
int rf_EvenOddDoubleRecoveryFunc(RF_DagNode_t *node);
int rf_EOWriteDoubleRecoveryFunc(RF_DagNode_t *node);
#define rf_EUCol(_layoutPtr_, _addr_ ) \
( (_addr_)%( (_layoutPtr_)->dataSectorsPerStripe ) )/((_layoutPtr_)->sectorsPerStripeUnit)
#define rf_EO_Mod( _int1_, _int2_ ) \
( ((_int1_) < 0)? (((_int1_)+(_int2_))%(_int2_)) : (_int1_)%(_int2_) )
#define rf_OffsetOfNextEUBoundary(_offset_, sec_per_eu) ((_offset_)/(sec_per_eu) + 1)*(sec_per_eu)
#define RF_EO_MATRIX_DIM 17
/*
* RF_EO_MATRIX_DIM should be a prime number: and "bytesPerSector" should be
* dividable by ( RF_EO_MATRIX_DIM - 1) to fully encode and utilize the space
* in a sector, this number could also be 17. Tha later case doesn't apply
* for disk array larger than 17 columns totally.
*/
#endif /* !_RF__RF_EVENODD_DAGFUNCS_H_ */

View File

@ -0,0 +1,198 @@
/* $NetBSD: rf_evenodd_dags.c,v 1.1 1998/11/13 04:20:29 oster Exp $ */
/*
* rf_evenodd_dags.c
*/
/*
* Copyright (c) 1996 Carnegie-Mellon University.
* All rights reserved.
*
* Author: Chang-Ming Wu
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
#include "rf_archs.h"
#if RF_INCLUDE_EVENODD > 0
#include "rf_types.h"
#include "rf_raid.h"
#include "rf_dag.h"
#include "rf_dagfuncs.h"
#include "rf_dagutils.h"
#include "rf_etimer.h"
#include "rf_acctrace.h"
#include "rf_general.h"
#include "rf_evenodd_dags.h"
#include "rf_evenodd.h"
#include "rf_evenodd_dagfuncs.h"
#include "rf_pq.h"
#include "rf_dagdegrd.h"
#include "rf_dagdegwr.h"
#include "rf_dagffwr.h"
/*
* Lost one data.
* Use P to reconstruct missing data.
*/
RF_CREATE_DAG_FUNC_DECL(rf_EO_100_CreateReadDAG)
{
rf_CreateDegradedReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_eoPRecoveryFuncs);
}
/*
* Lost data + E.
* Use P to reconstruct missing data.
*/
RF_CREATE_DAG_FUNC_DECL(rf_EO_101_CreateReadDAG)
{
rf_CreateDegradedReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_eoPRecoveryFuncs);
}
/*
* Lost data + P.
* Make E look like P, and use Eor for Xor, and we can
* use degraded read DAG.
*/
RF_CREATE_DAG_FUNC_DECL(rf_EO_110_CreateReadDAG)
{
RF_PhysDiskAddr_t *temp;
/* swap P and E pointers to fake out the DegradedReadDAG code */
temp = asmap->parityInfo; asmap->parityInfo = asmap->qInfo; asmap->qInfo = temp;
rf_CreateDegradedReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_eoERecoveryFuncs);
}
/*
* Lost two data.
*/
RF_CREATE_DAG_FUNC_DECL(rf_EOCreateDoubleDegradedReadDAG)
{
rf_EO_DoubleDegRead(raidPtr, asmap, dag_h, bp, flags, allocList);
}
/*
* Lost two data.
*/
RF_CREATE_DAG_FUNC_DECL(rf_EO_200_CreateReadDAG)
{
rf_EOCreateDoubleDegradedReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList);
}
RF_CREATE_DAG_FUNC_DECL(rf_EO_100_CreateWriteDAG)
{
if (asmap->numStripeUnitsAccessed != 1 &&
asmap->failedPDAs[0]->numSector != raidPtr->Layout.sectorsPerStripeUnit)
RF_PANIC();
rf_CommonCreateSimpleDegradedWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 2, (int (*)(RF_DagNode_t *))rf_Degraded_100_EOFunc, RF_TRUE);
}
/*
* E is dead. Small write.
*/
RF_CREATE_DAG_FUNC_DECL(rf_EO_001_CreateSmallWriteDAG)
{
rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_EOSmallWritePFuncs, NULL);
}
/*
* E is dead. Large write.
*/
RF_CREATE_DAG_FUNC_DECL(rf_EO_001_CreateLargeWriteDAG)
{
rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 1, rf_RegularPFunc, RF_TRUE);
}
/*
* P is dead. Small write.
* Swap E + P, use single-degraded stuff.
*/
RF_CREATE_DAG_FUNC_DECL(rf_EO_010_CreateSmallWriteDAG)
{
RF_PhysDiskAddr_t *temp;
/* swap P and E pointers to fake out the DegradedReadDAG code */
temp = asmap->parityInfo; asmap->parityInfo = asmap->qInfo; asmap->qInfo = temp;
rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_EOSmallWriteEFuncs, NULL);
}
/*
* P is dead. Large write.
* Swap E + P, use single-degraded stuff.
*/
RF_CREATE_DAG_FUNC_DECL(rf_EO_010_CreateLargeWriteDAG)
{
RF_PhysDiskAddr_t *temp;
/* swap P and E pointers to fake out the code */
temp = asmap->parityInfo; asmap->parityInfo = asmap->qInfo; asmap->qInfo = temp;
rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 1, rf_RegularEFunc, RF_FALSE);
}
RF_CREATE_DAG_FUNC_DECL(rf_EO_011_CreateWriteDAG)
{
rf_CreateNonRedundantWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList,
RF_IO_TYPE_WRITE);
}
RF_CREATE_DAG_FUNC_DECL(rf_EO_110_CreateWriteDAG)
{
RF_PhysDiskAddr_t *temp;
if (asmap->numStripeUnitsAccessed != 1 &&
asmap->failedPDAs[0]->numSector != raidPtr->Layout.sectorsPerStripeUnit)
{
RF_PANIC();
}
/* swap P and E to fake out parity code */
temp = asmap->parityInfo; asmap->parityInfo = asmap->qInfo; asmap->qInfo = temp;
rf_CommonCreateSimpleDegradedWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList,1, (int (*)(RF_DagNode_t *))rf_EO_DegradedWriteEFunc, RF_FALSE);
/* is the regular E func the right one to call? */
}
RF_CREATE_DAG_FUNC_DECL(rf_EO_101_CreateWriteDAG)
{
if (asmap->numStripeUnitsAccessed != 1 &&
asmap->failedPDAs[0]->numSector != raidPtr->Layout.sectorsPerStripeUnit)
RF_PANIC();
rf_CommonCreateSimpleDegradedWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList,1, rf_RecoveryXorFunc, RF_TRUE);
}
RF_CREATE_DAG_FUNC_DECL(rf_EO_DoubleDegRead)
{
rf_DoubleDegRead(raidPtr, asmap, dag_h, bp, flags, allocList,
"Re", "EvenOddRecovery", rf_EvenOddDoubleRecoveryFunc);
}
RF_CREATE_DAG_FUNC_DECL(rf_EOCreateSmallWriteDAG)
{
rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_pFuncs, &rf_EOSmallWriteEFuncs);
}
RF_CREATE_DAG_FUNC_DECL(rf_EOCreateLargeWriteDAG)
{
rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 2, rf_RegularPEFunc, RF_FALSE);
}
RF_CREATE_DAG_FUNC_DECL(rf_EO_200_CreateWriteDAG)
{
rf_DoubleDegSmallWrite(raidPtr, asmap, dag_h, bp, flags, allocList, "Re", "We", "EOWrDDRecovery", rf_EOWriteDoubleRecoveryFunc);
}
#endif /* RF_INCLUDE_EVENODD > 0 */

View File

@ -0,0 +1,63 @@
/* $NetBSD: rf_evenodd_dags.h,v 1.1 1998/11/13 04:20:29 oster Exp $ */
/*
* rf_evenodd_dags.h
*/
/*
* Copyright (c) 1996 Carnegie-Mellon University.
* All rights reserved.
*
* Author: Chang-Ming Wu
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
#ifndef _RF__RF_EVENODD_DAGS_H_
#define _RF__RF_EVENODD_DAGS_H_
#include "rf_types.h"
#if RF_UTILITY == 0
#include "rf_dag.h"
/* extern decl's of the failure mode EO functions.
* swiped from rf_pqdeg.h
*/
RF_CREATE_DAG_FUNC_DECL(rf_EO_100_CreateReadDAG);
RF_CREATE_DAG_FUNC_DECL(rf_EO_101_CreateReadDAG);
RF_CREATE_DAG_FUNC_DECL(rf_EO_110_CreateReadDAG);
RF_CREATE_DAG_FUNC_DECL(rf_EO_200_CreateReadDAG);
RF_CREATE_DAG_FUNC_DECL(rf_EOCreateDoubleDegradedReadDAG);
RF_CREATE_DAG_FUNC_DECL(rf_EO_100_CreateWriteDAG);
RF_CREATE_DAG_FUNC_DECL(rf_EO_010_CreateSmallWriteDAG);
RF_CREATE_DAG_FUNC_DECL(rf_EO_001_CreateSmallWriteDAG);
RF_CREATE_DAG_FUNC_DECL(rf_EO_010_CreateLargeWriteDAG);
RF_CREATE_DAG_FUNC_DECL(rf_EO_001_CreateLargeWriteDAG);
RF_CREATE_DAG_FUNC_DECL(rf_EO_011_CreateWriteDAG);
RF_CREATE_DAG_FUNC_DECL(rf_EO_110_CreateWriteDAG);
RF_CREATE_DAG_FUNC_DECL(rf_EO_101_CreateWriteDAG);
RF_CREATE_DAG_FUNC_DECL(rf_EO_DoubleDegRead);
RF_CREATE_DAG_FUNC_DECL(rf_EOCreateSmallWriteDAG);
RF_CREATE_DAG_FUNC_DECL(rf_EOCreateLargeWriteDAG);
RF_CREATE_DAG_FUNC_DECL(rf_EO_200_CreateWriteDAG);
#endif /* RF_UTILITY == 0 */
#endif /* !_RF__RF_EVENODD_DAGS_H_ */

370
sys/dev/raidframe/rf_fifo.c Normal file
View File

@ -0,0 +1,370 @@
/* $NetBSD: rf_fifo.c,v 1.1 1998/11/13 04:20:29 oster Exp $ */
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
*
* Author: Mark Holland
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/***************************************************
*
* rf_fifo.c -- prioritized fifo queue code.
* There are only two priority levels: hi and lo.
*
* Aug 4, 1994, adapted from raidSim version (MCH)
*
***************************************************/
/*
* :
* Log: rf_fifo.c,v
* Revision 1.20 1996/06/18 20:53:11 jimz
* fix up disk queueing (remove configure routine,
* add shutdown list arg to create routines)
*
* Revision 1.19 1996/06/14 00:08:21 jimz
* make happier in all environments
*
* Revision 1.18 1996/06/13 20:41:24 jimz
* add random queueing
*
* Revision 1.17 1996/06/09 02:36:46 jimz
* lots of little crufty cleanup- fixup whitespace
* issues, comment #ifdefs, improve typing in some
* places (esp size-related)
*
* Revision 1.16 1996/06/07 22:26:27 jimz
* type-ify which_ru (RF_ReconUnitNum_t)
*
* Revision 1.15 1996/06/07 21:33:04 jimz
* begin using consistent types for sector numbers,
* stripe numbers, row+col numbers, recon unit numbers
*
* Revision 1.14 1996/06/06 01:15:02 jimz
* added debugging
*
* Revision 1.13 1996/05/30 23:22:16 jimz
* bugfixes of serialization, timing problems
* more cleanup
*
* Revision 1.12 1996/05/30 11:29:41 jimz
* Numerous bug fixes. Stripe lock release code disagreed with the taking code
* about when stripes should be locked (I made it consistent: no parity, no lock)
* There was a lot of extra serialization of I/Os which I've removed- a lot of
* it was to calculate values for the cache code, which is no longer with us.
* More types, function, macro cleanup. Added code to properly quiesce the array
* on shutdown. Made a lot of stuff array-specific which was (bogusly) general
* before. Fixed memory allocation, freeing bugs.
*
* Revision 1.11 1996/05/27 18:56:37 jimz
* more code cleanup
* better typing
* compiles in all 3 environments
*
* Revision 1.10 1996/05/23 21:46:35 jimz
* checkpoint in code cleanup (release prep)
* lots of types, function names have been fixed
*
* Revision 1.9 1995/12/12 18:10:06 jimz
* MIN -> RF_MIN, MAX -> RF_MAX, ASSERT -> RF_ASSERT
* fix 80-column brain damage in comments
*
* Revision 1.8 1995/12/01 18:22:15 root
* added copyright info
*
* Revision 1.7 1995/11/07 15:32:16 wvcii
* added function FifoPeek()
*
*/
#include "rf_types.h"
#include "rf_alloclist.h"
#include "rf_stripelocks.h"
#include "rf_layout.h"
#include "rf_diskqueue.h"
#include "rf_fifo.h"
#include "rf_debugMem.h"
#include "rf_general.h"
#include "rf_threadid.h"
#include "rf_options.h"
#if !defined(KERNEL) && RF_INCLUDE_QUEUE_RANDOM > 0
#include "rf_randmacros.h"
RF_DECLARE_STATIC_RANDOM
#endif /* !KERNEL && RF_INCLUDE_QUEUE_RANDOM > 0 */
/* just malloc a header, zero it (via calloc), and return it */
/*ARGSUSED*/
void *rf_FifoCreate(sectPerDisk, clList, listp)
RF_SectorCount_t sectPerDisk;
RF_AllocListElem_t *clList;
RF_ShutdownList_t **listp;
{
RF_FifoHeader_t *q;
#if !defined(KERNEL) && RF_INCLUDE_QUEUE_RANDOM > 0
RF_INIT_STATIC_RANDOM(1);
#endif /* !KERNEL && RF_INCLUDE_QUEUE_RANDOM > 0 */
RF_CallocAndAdd(q, 1, sizeof(RF_FifoHeader_t), (RF_FifoHeader_t *), clList);
q->hq_count = q->lq_count = 0;
#if !defined(KERNEL) && RF_INCLUDE_QUEUE_RANDOM > 0
q->rval = (long)RF_STATIC_RANDOM();
#endif /* !KERNEL && RF_INCLUDE_QUEUE_RANDOM > 0 */
return((void *)q);
}
void rf_FifoEnqueue(q_in, elem, priority)
void *q_in;
RF_DiskQueueData_t *elem;
int priority;
{
RF_FifoHeader_t *q = (RF_FifoHeader_t *)q_in;
RF_ASSERT(priority == RF_IO_NORMAL_PRIORITY || priority == RF_IO_LOW_PRIORITY);
elem->next = NULL;
if (priority == RF_IO_NORMAL_PRIORITY) {
if (!q->hq_tail) {
RF_ASSERT(q->hq_count == 0 && q->hq_head == NULL);
q->hq_head = q->hq_tail = elem;
} else {
RF_ASSERT(q->hq_count != 0 && q->hq_head != NULL);
q->hq_tail->next = elem;
q->hq_tail = elem;
}
q->hq_count++;
}
else {
RF_ASSERT(elem->next == NULL);
if (rf_fifoDebug) {
int tid;
rf_get_threadid(tid);
printf("[%d] fifo: ENQ lopri\n", tid);
}
if (!q->lq_tail) {
RF_ASSERT(q->lq_count == 0 && q->lq_head == NULL);
q->lq_head = q->lq_tail = elem;
} else {
RF_ASSERT(q->lq_count != 0 && q->lq_head != NULL);
q->lq_tail->next = elem;
q->lq_tail = elem;
}
q->lq_count++;
}
if ((q->hq_count + q->lq_count)!= elem->queue->queueLength) {
printf("Queue lengths differ!: %d %d %d\n",
q->hq_count, q->lq_count, (int)elem->queue->queueLength);
printf("%d %d %d %d\n",
(int)elem->queue->numOutstanding,
(int)elem->queue->maxOutstanding,
(int)elem->queue->row,
(int)elem->queue->col);
}
RF_ASSERT((q->hq_count + q->lq_count) == elem->queue->queueLength);
}
RF_DiskQueueData_t *rf_FifoDequeue(q_in)
void *q_in;
{
RF_FifoHeader_t *q = (RF_FifoHeader_t *) q_in;
RF_DiskQueueData_t *nd;
RF_ASSERT(q);
if (q->hq_head) {
RF_ASSERT(q->hq_count != 0 && q->hq_tail != NULL);
nd = q->hq_head; q->hq_head = q->hq_head->next;
if (!q->hq_head) q->hq_tail = NULL;
nd->next = NULL;
q->hq_count--;
} else if (q->lq_head) {
RF_ASSERT(q->lq_count != 0 && q->lq_tail != NULL);
nd = q->lq_head; q->lq_head = q->lq_head->next;
if (!q->lq_head) q->lq_tail = NULL;
nd->next = NULL;
q->lq_count--;
if (rf_fifoDebug) {
int tid;
rf_get_threadid(tid);
printf("[%d] fifo: DEQ lopri %lx\n", tid, (long)nd);
}
} else {
RF_ASSERT(q->hq_count == 0 && q->lq_count == 0 && q->hq_tail == NULL && q->lq_tail == NULL);
nd = NULL;
}
return(nd);
}
/* This never gets used!! No loss (I hope) if we don't include it... GO */
#if !defined(__NetBSD__) && !defined(_KERNEL)
static RF_DiskQueueData_t *n_in_q(headp, tailp, countp, n, deq)
RF_DiskQueueData_t **headp;
RF_DiskQueueData_t **tailp;
int *countp;
int n;
int deq;
{
RF_DiskQueueData_t *r, *s;
int i;
for(s=NULL,i=n,r=*headp;r;s=r,r=r->next) {
if (i == 0)
break;
i--;
}
RF_ASSERT(r != NULL);
if (deq == 0)
return(r);
if (s) {
s->next = r->next;
}
else {
*headp = r->next;
}
if (*tailp == r)
*tailp = s;
(*countp)--;
return(r);
}
#endif
#if !defined(KERNEL) && RF_INCLUDE_QUEUE_RANDOM > 0
RF_DiskQueueData_t *rf_RandomPeek(q_in)
void *q_in;
{
RF_FifoHeader_t *q = (RF_FifoHeader_t *) q_in;
RF_DiskQueueData_t *req;
int n;
if (q->hq_head) {
n = q->rval % q->hq_count;
req = n_in_q(&q->hq_head, &q->hq_tail, &q->hq_count, n, 0);
}
else {
RF_ASSERT(q->hq_count == 0);
if (q->lq_head == NULL) {
RF_ASSERT(q->lq_count == 0);
return(NULL);
}
n = q->rval % q->lq_count;
req = n_in_q(&q->lq_head, &q->lq_tail, &q->lq_count, n, 0);
}
RF_ASSERT((q->hq_count + q->lq_count) == req->queue->queueLength);
RF_ASSERT(req != NULL);
return(req);
}
RF_DiskQueueData_t *rf_RandomDequeue(q_in)
void *q_in;
{
RF_FifoHeader_t *q = (RF_FifoHeader_t *) q_in;
RF_DiskQueueData_t *req;
int n;
if (q->hq_head) {
n = q->rval % q->hq_count;
q->rval = (long)RF_STATIC_RANDOM();
req = n_in_q(&q->hq_head, &q->hq_tail, &q->hq_count, n, 1);
}
else {
RF_ASSERT(q->hq_count == 0);
if (q->lq_head == NULL) {
RF_ASSERT(q->lq_count == 0);
return(NULL);
}
n = q->rval % q->lq_count;
q->rval = (long)RF_STATIC_RANDOM();
req = n_in_q(&q->lq_head, &q->lq_tail, &q->lq_count, n, 1);
}
RF_ASSERT((q->hq_count + q->lq_count) == (req->queue->queueLength-1));
return(req);
}
#endif /* !KERNEL && RF_INCLUDE_QUEUE_RANDOM > 0 */
/* Return ptr to item at head of queue. Used to examine request
* info without actually dequeueing the request.
*/
RF_DiskQueueData_t *rf_FifoPeek(void *q_in)
{
RF_DiskQueueData_t *headElement = NULL;
RF_FifoHeader_t *q = (RF_FifoHeader_t *) q_in;
RF_ASSERT(q);
if (q->hq_head)
headElement = q->hq_head;
else if (q->lq_head)
headElement = q->lq_head;
return(headElement);
}
/* We sometimes need to promote a low priority access to a regular priority access.
* Currently, this is only used when the user wants to write a stripe which is currently
* under reconstruction.
* This routine will promote all accesses tagged with the indicated parityStripeID from
* the low priority queue to the end of the normal priority queue.
* We assume the queue is locked upon entry.
*/
int rf_FifoPromote(q_in, parityStripeID, which_ru)
void *q_in;
RF_StripeNum_t parityStripeID;
RF_ReconUnitNum_t which_ru;
{
RF_FifoHeader_t *q = (RF_FifoHeader_t *) q_in;
RF_DiskQueueData_t *lp = q->lq_head, *pt = NULL; /* lp = lo-pri queue pointer, pt = trailer */
int retval = 0;
while (lp) {
/* search for the indicated parity stripe in the low-pri queue */
if (lp->parityStripeID == parityStripeID && lp->which_ru == which_ru) {
/*printf("FifoPromote: promoting access for psid %ld\n",parityStripeID);*/
if (pt) pt->next = lp->next; /* delete an entry other than the first */
else q->lq_head = lp->next; /* delete the head entry */
if (!q->lq_head) q->lq_tail = NULL; /* we deleted the only entry */
else if (lp == q->lq_tail) q->lq_tail = pt; /* we deleted the tail entry */
lp->next = NULL;
q->lq_count--;
if (q->hq_tail) {q->hq_tail->next = lp; q->hq_tail = lp;} /* append to hi-priority queue */
else {q->hq_head = q->hq_tail = lp;}
q->hq_count++;
/*UpdateShortestSeekFinishTimeForced(lp->requestPtr, lp->diskState);*/ /* deal with this later, if ever */
lp = (pt) ? pt->next : q->lq_head; /* reset low-pri pointer and continue */
retval++;
} else {pt = lp; lp = lp->next;}
}
/* sanity check. delete this if you ever put more than one entry in the low-pri queue */
RF_ASSERT(retval == 0 || retval == 1);
if (rf_fifoDebug) {
int tid;
rf_get_threadid(tid);
printf("[%d] fifo: promote %d\n", tid, retval);
}
return(retval);
}

114
sys/dev/raidframe/rf_fifo.h Normal file
View File

@ -0,0 +1,114 @@
/* $NetBSD: rf_fifo.h,v 1.1 1998/11/13 04:20:30 oster Exp $ */
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
*
* Author: Mark Holland
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/*
* rf_fifo.h -- prioritized FIFO queue code.
*
* 4-9-93 Created (MCH)
*/
/*
* :
* Log: rf_fifo.h,v
* Revision 1.12 1996/06/18 20:53:11 jimz
* fix up disk queueing (remove configure routine,
* add shutdown list arg to create routines)
*
* Revision 1.11 1996/06/13 20:41:28 jimz
* add random queueing
*
* Revision 1.10 1996/06/13 20:38:28 jimz
* add random dequeue, peek
*
* Revision 1.9 1996/06/09 02:36:46 jimz
* lots of little crufty cleanup- fixup whitespace
* issues, comment #ifdefs, improve typing in some
* places (esp size-related)
*
* Revision 1.8 1996/06/07 22:26:27 jimz
* type-ify which_ru (RF_ReconUnitNum_t)
*
* Revision 1.7 1996/06/07 21:33:04 jimz
* begin using consistent types for sector numbers,
* stripe numbers, row+col numbers, recon unit numbers
*
* Revision 1.6 1996/05/30 23:22:16 jimz
* bugfixes of serialization, timing problems
* more cleanup
*
* Revision 1.5 1996/05/30 11:29:41 jimz
* Numerous bug fixes. Stripe lock release code disagreed with the taking code
* about when stripes should be locked (I made it consistent: no parity, no lock)
* There was a lot of extra serialization of I/Os which I've removed- a lot of
* it was to calculate values for the cache code, which is no longer with us.
* More types, function, macro cleanup. Added code to properly quiesce the array
* on shutdown. Made a lot of stuff array-specific which was (bogusly) general
* before. Fixed memory allocation, freeing bugs.
*
* Revision 1.4 1996/05/23 21:46:35 jimz
* checkpoint in code cleanup (release prep)
* lots of types, function names have been fixed
*
* Revision 1.3 1995/12/01 18:22:26 root
* added copyright info
*
* Revision 1.2 1995/11/07 15:31:57 wvcii
* added Peek() function
*
*/
#ifndef _RF__RF_FIFO_H_
#define _RF__RF_FIFO_H_
#include "rf_archs.h"
#include "rf_types.h"
#include "rf_diskqueue.h"
typedef struct RF_FifoHeader_s {
RF_DiskQueueData_t *hq_head, *hq_tail; /* high priority requests */
RF_DiskQueueData_t *lq_head, *lq_tail; /* low priority requests */
int hq_count, lq_count; /* debug only */
#if !defined(KERNEL) && RF_INCLUDE_QUEUE_RANDOM > 0
long rval; /* next random number (random qpolicy) */
#endif /* !KERNEL && RF_INCLUDE_QUEUE_RANDOM > 0 */
} RF_FifoHeader_t;
extern void *rf_FifoCreate(RF_SectorCount_t sectPerDisk,
RF_AllocListElem_t *clList, RF_ShutdownList_t **listp);
extern void rf_FifoEnqueue(void *q_in, RF_DiskQueueData_t *elem,
int priority);
extern RF_DiskQueueData_t *rf_FifoDequeue(void *q_in);
extern RF_DiskQueueData_t *rf_FifoPeek(void *q_in);
extern int rf_FifoPromote(void *q_in, RF_StripeNum_t parityStripeID,
RF_ReconUnitNum_t which_ru);
#if !defined(KERNEL) && RF_INCLUDE_QUEUE_RANDOM > 0
extern RF_DiskQueueData_t *rf_RandomDequeue(void *q_in);
extern RF_DiskQueueData_t *rf_RandomPeek(void *q_in);
#endif /* !KERNEL && RF_INCLUDE_QUEUE_RANDOM > 0 */
#endif /* !_RF__RF_FIFO_H_ */

View File

@ -0,0 +1,733 @@
/* $NetBSD: rf_freelist.h,v 1.1 1998/11/13 04:20:30 oster Exp $ */
/*
* rf_freelist.h
*/
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
*
* Author: Jim Zelenka
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/*
* :
* Log: rf_freelist.h,v
* Revision 1.13 1996/06/10 12:50:57 jimz
* Add counters to freelists to track number of allocations, frees,
* grows, max size, etc. Adjust a couple sets of PRIME params based
* on the results.
*
* Revision 1.12 1996/06/10 11:55:47 jimz
* Straightened out some per-array/not-per-array distinctions, fixed
* a couple bugs related to confusion. Added shutdown lists. Removed
* layout shutdown function (now subsumed by shutdown lists).
*
* Revision 1.11 1996/06/05 18:06:02 jimz
* Major code cleanup. The Great Renaming is now done.
* Better modularity. Better typing. Fixed a bunch of
* synchronization bugs. Made a lot of global stuff
* per-desc or per-array. Removed dead code.
*
* Revision 1.10 1996/06/02 17:31:48 jimz
* Moved a lot of global stuff into array structure, where it belongs.
* Fixed up paritylogging, pss modules in this manner. Some general
* code cleanup. Removed lots of dead code, some dead files.
*
* Revision 1.9 1996/05/31 22:26:54 jimz
* fix a lot of mapping problems, memory allocation problems
* found some weird lock issues, fixed 'em
* more code cleanup
*
* Revision 1.8 1996/05/30 11:29:41 jimz
* Numerous bug fixes. Stripe lock release code disagreed with the taking code
* about when stripes should be locked (I made it consistent: no parity, no lock)
* There was a lot of extra serialization of I/Os which I've removed- a lot of
* it was to calculate values for the cache code, which is no longer with us.
* More types, function, macro cleanup. Added code to properly quiesce the array
* on shutdown. Made a lot of stuff array-specific which was (bogusly) general
* before. Fixed memory allocation, freeing bugs.
*
* Revision 1.7 1996/05/27 18:56:37 jimz
* more code cleanup
* better typing
* compiles in all 3 environments
*
* Revision 1.6 1996/05/23 21:46:35 jimz
* checkpoint in code cleanup (release prep)
* lots of types, function names have been fixed
*
* Revision 1.5 1996/05/20 16:16:12 jimz
* switch to rf_{mutex,cond}_{init,destroy}
*
* Revision 1.4 1996/05/18 19:51:34 jimz
* major code cleanup- fix syntax, make some types consistent,
* add prototypes, clean out dead code, et cetera
*
* Revision 1.3 1996/05/16 16:04:52 jimz
* allow init func to fail for FREELIST ops
*
* Revision 1.2 1996/05/16 14:54:08 jimz
* added _INIT and _CLEAN versions of ops for objects with
* internal allocations
*
* Revision 1.1 1996/05/15 23:37:53 jimz
* Initial revision
*
*/
/*
* rf_freelist.h -- code to manage counted freelists
*
* Keep an arena of fixed-size objects. When a new object is needed,
* allocate it as necessary. When an object is freed, either put it
* in the arena, or really free it, depending on the maximum arena
* size.
*/
#ifndef _RF__RF_FREELIST_H_
#define _RF__RF_FREELIST_H_
#include "rf_types.h"
#include "rf_debugMem.h"
#include "rf_general.h"
#include "rf_threadstuff.h"
#define RF_FREELIST_STATS 0
#if RF_FREELIST_STATS > 0
typedef struct RF_FreeListStats_s {
char *file;
int line;
int allocations;
int frees;
int max_free;
int grows;
int outstanding;
int max_outstanding;
} RF_FreeListStats_t;
#define RF_FREELIST_STAT_INIT(_fl_) { \
bzero((char *)&((_fl_)->stats), sizeof(RF_FreeListStats_t)); \
(_fl_)->stats.file = __FILE__; \
(_fl_)->stats.line = __LINE__; \
}
#define RF_FREELIST_STAT_ALLOC(_fl_) { \
(_fl_)->stats.allocations++; \
(_fl_)->stats.outstanding++; \
if ((_fl_)->stats.outstanding > (_fl_)->stats.max_outstanding) \
(_fl_)->stats.max_outstanding = (_fl_)->stats.outstanding; \
}
#define RF_FREELIST_STAT_FREE_UPDATE(_fl_) { \
if ((_fl_)->free_cnt > (_fl_)->stats.max_free) \
(_fl_)->stats.max_free = (_fl_)->free_cnt; \
}
#define RF_FREELIST_STAT_FREE(_fl_) { \
(_fl_)->stats.frees++; \
(_fl_)->stats.outstanding--; \
RF_FREELIST_STAT_FREE_UPDATE(_fl_); \
}
#define RF_FREELIST_STAT_GROW(_fl_) { \
(_fl_)->stats.grows++; \
RF_FREELIST_STAT_FREE_UPDATE(_fl_); \
}
#define RF_FREELIST_STAT_REPORT(_fl_) { \
printf("Freelist at %s %d (%s)\n", (_fl_)->stats.file, (_fl_)->stats.line, RF_STRING(_fl_)); \
printf(" %d allocations, %d frees\n", (_fl_)->stats.allocations, (_fl_)->stats.frees); \
printf(" %d grows\n", (_fl_)->stats.grows); \
printf(" %d outstanding\n", (_fl_)->stats.outstanding); \
printf(" %d free (max)\n", (_fl_)->stats.max_free); \
printf(" %d outstanding (max)\n", (_fl_)->stats.max_outstanding); \
}
#else /* RF_FREELIST_STATS > 0 */
#define RF_FREELIST_STAT_INIT(_fl_)
#define RF_FREELIST_STAT_ALLOC(_fl_)
#define RF_FREELIST_STAT_FREE_UPDATE(_fl_)
#define RF_FREELIST_STAT_FREE(_fl_)
#define RF_FREELIST_STAT_GROW(_fl_)
#define RF_FREELIST_STAT_REPORT(_fl_)
#endif /* RF_FREELIST_STATS > 0 */
struct RF_FreeList_s {
void *objlist; /* list of free obj */
int free_cnt; /* how many free obj */
int max_free_cnt; /* max free arena size */
int obj_inc; /* how many to allocate at a time */
int obj_size; /* size of objects */
RF_DECLARE_MUTEX(lock)
#if RF_FREELIST_STATS > 0
RF_FreeListStats_t stats; /* statistics */
#endif /* RF_FREELIST_STATS > 0 */
};
/*
* fl = freelist
* maxcnt = max number of items in arena
* inc = how many to allocate at a time
* size = size of object
*/
#define RF_FREELIST_CREATE(_fl_,_maxcnt_,_inc_,_size_) { \
int rc; \
RF_ASSERT((_inc_) > 0); \
RF_Malloc(_fl_, sizeof(RF_FreeList_t), (RF_FreeList_t *)); \
(_fl_)->objlist = NULL; \
(_fl_)->free_cnt = 0; \
(_fl_)->max_free_cnt = _maxcnt_; \
(_fl_)->obj_inc = _inc_; \
(_fl_)->obj_size = _size_; \
rc = rf_mutex_init(&(_fl_)->lock); \
if (rc) { \
RF_Free(_fl_, sizeof(RF_FreeList_t)); \
_fl_ = NULL; \
} \
RF_FREELIST_STAT_INIT(_fl_); \
}
/*
* fl = freelist
* cnt = number to prime with
* nextp = name of "next" pointer in obj
* cast = object cast
*/
#define RF_FREELIST_PRIME(_fl_,_cnt_,_nextp_,_cast_) { \
void *_p; \
int _i; \
RF_LOCK_MUTEX((_fl_)->lock); \
for(_i=0;_i<(_cnt_);_i++) { \
RF_Calloc(_p,1,(_fl_)->obj_size,(void *)); \
if (_p) { \
(_cast_(_p))->_nextp_ = (_fl_)->objlist; \
(_fl_)->objlist = _p; \
(_fl_)->free_cnt++; \
} \
else { \
break; \
} \
} \
RF_FREELIST_STAT_FREE_UPDATE(_fl_); \
RF_UNLOCK_MUTEX((_fl_)->lock); \
}
#define RF_FREELIST_MUTEX_OF(_fl_) ((_fl_)->lock)
#define RF_FREELIST_DO_UNLOCK(_fl_) { \
RF_UNLOCK_MUTEX((_fl_)->lock); \
}
#define RF_FREELIST_DO_LOCK(_fl_) { \
RF_LOCK_MUTEX((_fl_)->lock); \
}
/*
* fl = freelist
* cnt = number to prime with
* nextp = name of "next" pointer in obj
* cast = object cast
* init = func to call to init obj
*/
#define RF_FREELIST_PRIME_INIT(_fl_,_cnt_,_nextp_,_cast_,_init_) { \
void *_p; \
int _i; \
RF_LOCK_MUTEX((_fl_)->lock); \
for(_i=0;_i<(_cnt_);_i++) { \
RF_Calloc(_p,1,(_fl_)->obj_size,(void *)); \
if (_init_ (_cast_ _p)) { \
RF_Free(_p,(_fl_)->obj_size); \
_p = NULL; \
} \
if (_p) { \
(_cast_(_p))->_nextp_ = (_fl_)->objlist; \
(_fl_)->objlist = _p; \
(_fl_)->free_cnt++; \
} \
else { \
break; \
} \
} \
RF_FREELIST_STAT_FREE_UPDATE(_fl_); \
RF_UNLOCK_MUTEX((_fl_)->lock); \
}
/*
* fl = freelist
* cnt = number to prime with
* nextp = name of "next" pointer in obj
* cast = object cast
* init = func to call to init obj
* arg = arg to init obj func
*/
#define RF_FREELIST_PRIME_INIT_ARG(_fl_,_cnt_,_nextp_,_cast_,_init_,_arg_) { \
void *_p; \
int _i; \
RF_LOCK_MUTEX((_fl_)->lock); \
for(_i=0;_i<(_cnt_);_i++) { \
RF_Calloc(_p,1,(_fl_)->obj_size,(void *)); \
if (_init_ (_cast_ _p,_arg_)) { \
RF_Free(_p,(_fl_)->obj_size); \
_p = NULL; \
} \
if (_p) { \
(_cast_(_p))->_nextp_ = (_fl_)->objlist; \
(_fl_)->objlist = _p; \
(_fl_)->free_cnt++; \
} \
else { \
break; \
} \
} \
RF_FREELIST_STAT_FREE_UPDATE(_fl_); \
RF_UNLOCK_MUTEX((_fl_)->lock); \
}
/*
* fl = freelist
* obj = object to allocate
* nextp = name of "next" pointer in obj
* cast = cast of obj assignment
* init = init obj func
*/
#define RF_FREELIST_GET_INIT(_fl_,_obj_,_nextp_,_cast_,_init_) { \
void *_p; \
int _i; \
RF_LOCK_MUTEX((_fl_)->lock); \
RF_ASSERT(sizeof(*(_obj_))==((_fl_)->obj_size)); \
if (_fl_->objlist) { \
_obj_ = _cast_((_fl_)->objlist); \
(_fl_)->objlist = (void *)((_obj_)->_nextp_); \
(_fl_)->free_cnt--; \
} \
else { \
/* \
* Allocate one at a time so we can free \
* one at a time without cleverness when arena \
* is full. \
*/ \
RF_Calloc(_obj_,1,(_fl_)->obj_size,_cast_); \
if (_obj_) { \
if (_init_ (_obj_)) { \
RF_Free(_obj_,(_fl_)->obj_size); \
_obj_ = NULL; \
} \
else { \
for(_i=1;_i<(_fl_)->obj_inc;_i++) { \
RF_Calloc(_p,1,(_fl_)->obj_size,(void *)); \
if (_p) { \
if (_init_ (_p)) { \
RF_Free(_p,(_fl_)->obj_size); \
_p = NULL; \
break; \
} \
(_cast_(_p))->_nextp_ = (_fl_)->objlist; \
(_fl_)->objlist = _p; \
} \
else { \
break; \
} \
} \
} \
} \
RF_FREELIST_STAT_GROW(_fl_); \
} \
RF_FREELIST_STAT_ALLOC(_fl_); \
RF_UNLOCK_MUTEX((_fl_)->lock); \
}
/*
* fl = freelist
* obj = object to allocate
* nextp = name of "next" pointer in obj
* cast = cast of obj assignment
* init = init obj func
* arg = arg to init obj func
*/
#define RF_FREELIST_GET_INIT_ARG(_fl_,_obj_,_nextp_,_cast_,_init_,_arg_) { \
void *_p; \
int _i; \
RF_LOCK_MUTEX((_fl_)->lock); \
RF_ASSERT(sizeof(*(_obj_))==((_fl_)->obj_size)); \
if (_fl_->objlist) { \
_obj_ = _cast_((_fl_)->objlist); \
(_fl_)->objlist = (void *)((_obj_)->_nextp_); \
(_fl_)->free_cnt--; \
} \
else { \
/* \
* Allocate one at a time so we can free \
* one at a time without cleverness when arena \
* is full. \
*/ \
RF_Calloc(_obj_,1,(_fl_)->obj_size,_cast_); \
if (_obj_) { \
if (_init_ (_obj_,_arg_)) { \
RF_Free(_obj_,(_fl_)->obj_size); \
_obj_ = NULL; \
} \
else { \
for(_i=1;_i<(_fl_)->obj_inc;_i++) { \
RF_Calloc(_p,1,(_fl_)->obj_size,(void *)); \
if (_p) { \
if (_init_ (_p,_arg_)) { \
RF_Free(_p,(_fl_)->obj_size); \
_p = NULL; \
break; \
} \
(_cast_(_p))->_nextp_ = (_fl_)->objlist; \
(_fl_)->objlist = _p; \
} \
else { \
break; \
} \
} \
} \
} \
RF_FREELIST_STAT_GROW(_fl_); \
} \
RF_FREELIST_STAT_ALLOC(_fl_); \
RF_UNLOCK_MUTEX((_fl_)->lock); \
}
/*
* fl = freelist
* obj = object to allocate
* nextp = name of "next" pointer in obj
* cast = cast of obj assignment
* init = init obj func
*/
#define RF_FREELIST_GET_INIT_NOUNLOCK(_fl_,_obj_,_nextp_,_cast_,_init_) { \
void *_p; \
int _i; \
RF_LOCK_MUTEX((_fl_)->lock); \
RF_ASSERT(sizeof(*(_obj_))==((_fl_)->obj_size)); \
if (_fl_->objlist) { \
_obj_ = _cast_((_fl_)->objlist); \
(_fl_)->objlist = (void *)((_obj_)->_nextp_); \
(_fl_)->free_cnt--; \
} \
else { \
/* \
* Allocate one at a time so we can free \
* one at a time without cleverness when arena \
* is full. \
*/ \
RF_Calloc(_obj_,1,(_fl_)->obj_size,_cast_); \
if (_obj_) { \
if (_init_ (_obj_)) { \
RF_Free(_obj_,(_fl_)->obj_size); \
_obj_ = NULL; \
} \
else { \
for(_i=1;_i<(_fl_)->obj_inc;_i++) { \
RF_Calloc(_p,1,(_fl_)->obj_size,(void *)); \
if (_p) { \
if (_init_ (_p)) { \
RF_Free(_p,(_fl_)->obj_size); \
_p = NULL; \
break; \
} \
(_cast_(_p))->_nextp_ = (_fl_)->objlist; \
(_fl_)->objlist = _p; \
} \
else { \
break; \
} \
} \
} \
} \
RF_FREELIST_STAT_GROW(_fl_); \
} \
RF_FREELIST_STAT_ALLOC(_fl_); \
}
/*
* fl = freelist
* obj = object to allocate
* nextp = name of "next" pointer in obj
* cast = cast of obj assignment
*/
#define RF_FREELIST_GET(_fl_,_obj_,_nextp_,_cast_) { \
void *_p; \
int _i; \
RF_LOCK_MUTEX((_fl_)->lock); \
RF_ASSERT(sizeof(*(_obj_))==((_fl_)->obj_size)); \
if (_fl_->objlist) { \
_obj_ = _cast_((_fl_)->objlist); \
(_fl_)->objlist = (void *)((_obj_)->_nextp_); \
(_fl_)->free_cnt--; \
} \
else { \
/* \
* Allocate one at a time so we can free \
* one at a time without cleverness when arena \
* is full. \
*/ \
RF_Calloc(_obj_,1,(_fl_)->obj_size,_cast_); \
if (_obj_) { \
for(_i=1;_i<(_fl_)->obj_inc;_i++) { \
RF_Calloc(_p,1,(_fl_)->obj_size,(void *)); \
if (_p) { \
(_cast_(_p))->_nextp_ = (_fl_)->objlist; \
(_fl_)->objlist = _p; \
} \
else { \
break; \
} \
} \
} \
RF_FREELIST_STAT_GROW(_fl_); \
} \
RF_FREELIST_STAT_ALLOC(_fl_); \
RF_UNLOCK_MUTEX((_fl_)->lock); \
}
/*
* fl = freelist
* obj = object to allocate
* nextp = name of "next" pointer in obj
* cast = cast of obj assignment
* num = num objs to return
*/
#define RF_FREELIST_GET_N(_fl_,_obj_,_nextp_,_cast_,_num_) { \
void *_p, *_l, *_f; \
int _i, _n; \
_l = _f = NULL; \
_n = 0; \
RF_LOCK_MUTEX((_fl_)->lock); \
RF_ASSERT(sizeof(*(_obj_))==((_fl_)->obj_size)); \
for(_n=0;_n<_num_;_n++) { \
if (_fl_->objlist) { \
_obj_ = _cast_((_fl_)->objlist); \
(_fl_)->objlist = (void *)((_obj_)->_nextp_); \
(_fl_)->free_cnt--; \
} \
else { \
/* \
* Allocate one at a time so we can free \
* one at a time without cleverness when arena \
* is full. \
*/ \
RF_Calloc(_obj_,1,(_fl_)->obj_size,_cast_); \
if (_obj_) { \
for(_i=1;_i<(_fl_)->obj_inc;_i++) { \
RF_Calloc(_p,1,(_fl_)->obj_size,(void *)); \
if (_p) { \
(_cast_(_p))->_nextp_ = (_fl_)->objlist; \
(_fl_)->objlist = _p; \
} \
else { \
break; \
} \
} \
} \
RF_FREELIST_STAT_GROW(_fl_); \
} \
if (_f == NULL) \
_f = _obj_; \
if (_obj_) { \
(_cast_(_obj_))->_nextp_ = _l; \
_l = _obj_; \
RF_FREELIST_STAT_ALLOC(_fl_); \
} \
else { \
(_cast_(_f))->_nextp_ = (_fl_)->objlist; \
(_fl_)->objlist = _l; \
_n = _num_; \
} \
} \
RF_UNLOCK_MUTEX((_fl_)->lock); \
}
/*
* fl = freelist
* obj = object to free
* nextp = name of "next" pointer in obj
*/
#define RF_FREELIST_FREE(_fl_,_obj_,_nextp_) { \
RF_LOCK_MUTEX((_fl_)->lock); \
if ((_fl_)->free_cnt == (_fl_)->max_free_cnt) { \
RF_Free(_obj_,(_fl_)->obj_size); \
} \
else { \
RF_ASSERT((_fl_)->free_cnt < (_fl_)->max_free_cnt); \
(_obj_)->_nextp_ = (_fl_)->objlist; \
(_fl_)->objlist = (void *)(_obj_); \
(_fl_)->free_cnt++; \
} \
RF_FREELIST_STAT_FREE(_fl_); \
RF_UNLOCK_MUTEX((_fl_)->lock); \
}
/*
* fl = freelist
* obj = object to free
* nextp = name of "next" pointer in obj
* num = num to free (debugging)
*/
#define RF_FREELIST_FREE_N(_fl_,_obj_,_nextp_,_cast_,_num_) { \
void *_no; \
int _n; \
_n = 0; \
RF_LOCK_MUTEX((_fl_)->lock); \
while(_obj_) { \
_no = (_cast_(_obj_))->_nextp_; \
if ((_fl_)->free_cnt == (_fl_)->max_free_cnt) { \
RF_Free(_obj_,(_fl_)->obj_size); \
} \
else { \
RF_ASSERT((_fl_)->free_cnt < (_fl_)->max_free_cnt); \
(_obj_)->_nextp_ = (_fl_)->objlist; \
(_fl_)->objlist = (void *)(_obj_); \
(_fl_)->free_cnt++; \
} \
_n++; \
_obj_ = _no; \
RF_FREELIST_STAT_FREE(_fl_); \
} \
RF_ASSERT(_n==(_num_)); \
RF_UNLOCK_MUTEX((_fl_)->lock); \
}
/*
* fl = freelist
* obj = object to free
* nextp = name of "next" pointer in obj
* clean = undo for init
*/
#define RF_FREELIST_FREE_CLEAN(_fl_,_obj_,_nextp_,_clean_) { \
RF_LOCK_MUTEX((_fl_)->lock); \
if ((_fl_)->free_cnt == (_fl_)->max_free_cnt) { \
_clean_ (_obj_); \
RF_Free(_obj_,(_fl_)->obj_size); \
} \
else { \
RF_ASSERT((_fl_)->free_cnt < (_fl_)->max_free_cnt); \
(_obj_)->_nextp_ = (_fl_)->objlist; \
(_fl_)->objlist = (void *)(_obj_); \
(_fl_)->free_cnt++; \
} \
RF_FREELIST_STAT_FREE(_fl_); \
RF_UNLOCK_MUTEX((_fl_)->lock); \
}
/*
* fl = freelist
* obj = object to free
* nextp = name of "next" pointer in obj
* clean = undo for init
* arg = arg for undo func
*/
#define RF_FREELIST_FREE_CLEAN_ARG(_fl_,_obj_,_nextp_,_clean_,_arg_) { \
RF_LOCK_MUTEX((_fl_)->lock); \
if ((_fl_)->free_cnt == (_fl_)->max_free_cnt) { \
_clean_ (_obj_,_arg_); \
RF_Free(_obj_,(_fl_)->obj_size); \
} \
else { \
RF_ASSERT((_fl_)->free_cnt < (_fl_)->max_free_cnt); \
(_obj_)->_nextp_ = (_fl_)->objlist; \
(_fl_)->objlist = (void *)(_obj_); \
(_fl_)->free_cnt++; \
} \
RF_FREELIST_STAT_FREE(_fl_); \
RF_UNLOCK_MUTEX((_fl_)->lock); \
}
/*
* fl = freelist
* obj = object to free
* nextp = name of "next" pointer in obj
* clean = undo for init
*/
#define RF_FREELIST_FREE_CLEAN_NOUNLOCK(_fl_,_obj_,_nextp_,_clean_) { \
RF_LOCK_MUTEX((_fl_)->lock); \
if ((_fl_)->free_cnt == (_fl_)->max_free_cnt) { \
_clean_ (_obj_); \
RF_Free(_obj_,(_fl_)->obj_size); \
} \
else { \
RF_ASSERT((_fl_)->free_cnt < (_fl_)->max_free_cnt); \
(_obj_)->_nextp_ = (_fl_)->objlist; \
(_fl_)->objlist = (void *)(_obj_); \
(_fl_)->free_cnt++; \
} \
RF_FREELIST_STAT_FREE(_fl_); \
}
/*
* fl = freelist
* nextp = name of "next" pointer in obj
* cast = cast to object type
*/
#define RF_FREELIST_DESTROY(_fl_,_nextp_,_cast_) { \
void *_cur, *_next; \
RF_FREELIST_STAT_REPORT(_fl_); \
rf_mutex_destroy(&((_fl_)->lock)); \
for(_cur=(_fl_)->objlist;_cur;_cur=_next) { \
_next = (_cast_ _cur)->_nextp_; \
RF_Free(_cur,(_fl_)->obj_size); \
} \
RF_Free(_fl_,sizeof(RF_FreeList_t)); \
}
/*
* fl = freelist
* nextp = name of "next" pointer in obj
* cast = cast to object type
* clean = func to undo obj init
*/
#define RF_FREELIST_DESTROY_CLEAN(_fl_,_nextp_,_cast_,_clean_) { \
void *_cur, *_next; \
RF_FREELIST_STAT_REPORT(_fl_); \
rf_mutex_destroy(&((_fl_)->lock)); \
for(_cur=(_fl_)->objlist;_cur;_cur=_next) { \
_next = (_cast_ _cur)->_nextp_; \
_clean_ (_cur); \
RF_Free(_cur,(_fl_)->obj_size); \
} \
RF_Free(_fl_,sizeof(RF_FreeList_t)); \
}
/*
* fl = freelist
* nextp = name of "next" pointer in obj
* cast = cast to object type
* clean = func to undo obj init
* arg = arg for undo func
*/
#define RF_FREELIST_DESTROY_CLEAN_ARG(_fl_,_nextp_,_cast_,_clean_,_arg_) { \
void *_cur, *_next; \
RF_FREELIST_STAT_REPORT(_fl_); \
rf_mutex_destroy(&((_fl_)->lock)); \
for(_cur=(_fl_)->objlist;_cur;_cur=_next) { \
_next = (_cast_ _cur)->_nextp_; \
_clean_ (_cur,_arg_); \
RF_Free(_cur,(_fl_)->obj_size); \
} \
RF_Free(_fl_,sizeof(RF_FreeList_t)); \
}
#endif /* !_RF__RF_FREELIST_H_ */

View File

@ -0,0 +1,268 @@
/* $NetBSD: rf_general.h,v 1.1 1998/11/13 04:20:30 oster Exp $ */
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
*
* Author: Mark Holland
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/*
* rf_general.h -- some general-use definitions
*/
/*
* :
* Log: rf_general.h,v
* Revision 1.26 1996/08/09 16:44:57 jimz
* sunos port
*
* Revision 1.25 1996/08/07 21:08:57 jimz
* get NBPG defined for IRIX
*
* Revision 1.24 1996/08/06 22:02:06 jimz
* include linux/user.h for linux to get NBPG
*
* Revision 1.23 1996/07/27 23:36:08 jimz
* Solaris port of simulator
*
* Revision 1.22 1996/06/09 02:36:46 jimz
* lots of little crufty cleanup- fixup whitespace
* issues, comment #ifdefs, improve typing in some
* places (esp size-related)
*
* Revision 1.21 1996/05/30 23:22:16 jimz
* bugfixes of serialization, timing problems
* more cleanup
*
* Revision 1.20 1996/05/27 18:56:37 jimz
* more code cleanup
* better typing
* compiles in all 3 environments
*
* Revision 1.19 1996/05/24 22:17:04 jimz
* continue code + namespace cleanup
* typed a bunch of flags
*
* Revision 1.18 1996/05/23 21:46:35 jimz
* checkpoint in code cleanup (release prep)
* lots of types, function names have been fixed
*
* Revision 1.17 1996/05/23 00:33:23 jimz
* code cleanup: move all debug decls to rf_options.c, all extern
* debug decls to rf_options.h, all debug vars preceded by rf_
*
* Revision 1.16 1996/05/21 18:53:13 jimz
* be sure that noop macros don't confuse conditionals and loops
*
* Revision 1.15 1996/05/18 19:51:34 jimz
* major code cleanup- fix syntax, make some types consistent,
* add prototypes, clean out dead code, et cetera
*
* Revision 1.14 1996/05/08 21:01:24 jimz
* fixed up enum type names that were conflicting with other
* enums and function names (ie, "panic")
* future naming trends will be towards RF_ and rf_ for
* everything raidframe-related
*
* Revision 1.13 1995/12/12 18:10:06 jimz
* MIN -> RF_MIN, MAX -> RF_MAX, ASSERT -> RF_ASSERT
* fix 80-column brain damage in comments
*
* Revision 1.12 1995/12/01 18:29:08 root
* added copyright info
*
* Revision 1.11 1995/09/19 22:59:52 jimz
* Add kernel macro RF_DKU_END_IO(). When DKUSAGE is not defined,
* this is a no-op. When it is defined, it calls dku_end_io()
* correctly given a raidframe unit number and a buf pointer.
*
* Revision 1.10 1995/07/03 18:13:56 holland
* changed kernel defn of GETTIME
*
* Revision 1.9 1995/07/02 15:07:42 holland
* bug fixes related to getting distributed sparing numbers
*
* Revision 1.8 1995/06/12 15:54:40 rachad
* Added garbege collection for log structured storage
*
* Revision 1.7 1995/06/03 19:18:16 holland
* changes related to kernelization: access traces
* changes related to distributed sparing: some bug fixes
*
* Revision 1.6 1995/05/01 13:28:00 holland
* parity range locks, locking disk requests, recon+parityscan in kernel, etc.
*
* Revision 1.5 1995/04/06 14:47:56 rachad
* merge completed
*
* Revision 1.4 1995/03/15 20:45:23 holland
* distr sparing changes.
*
* Revision 1.3 1995/02/03 22:31:36 holland
* many changes related to kernelization
*
* Revision 1.2 1994/11/29 21:37:10 danner
* Added divide by zero check.
*
*/
/*#define NOASSERT*/
#ifndef _RF__RF_GENERAL_H_
#define _RF__RF_GENERAL_H_
#ifdef _KERNEL
#define KERNEL
#endif
#if !defined(KERNEL) && !defined(NOASSERT)
#include <assert.h>
#endif /* !KERNEL && !NOASSERT */
/* error reporting and handling */
#ifndef KERNEL
#define RF_ERRORMSG(s) fprintf(stderr,(s))
#define RF_ERRORMSG1(s,a) fprintf(stderr,(s),(a))
#define RF_ERRORMSG2(s,a,b) fprintf(stderr,(s),(a),(b))
#define RF_ERRORMSG3(s,a,b,c) fprintf(stderr,(s),(a),(b),(c))
#define RF_ERRORMSG4(s,a,b,c,d) fprintf(stderr,(s),(a),(b),(c),(d))
#define RF_ERRORMSG5(s,a,b,c,d,e) fprintf(stderr,(s),(a),(b),(c),(d),(e))
#ifndef NOASSERT
#define RF_ASSERT(x) {assert(x);}
#else /* !NOASSERT */
#define RF_ASSERT(x) {/*noop*/}
#endif /* !NOASSERT */
#define RF_PANIC() {printf("YIKES! Something terrible happened at line %d of file %s. Use a debugger.\n",__LINE__,__FILE__); abort();}
#else /* !KERNEL */
#if defined(__NetBSD__) && defined(_KERNEL)
#include<sys/systm.h> /* printf, sprintf, and friends */
#endif
#define RF_ERRORMSG(s) printf((s))
#define RF_ERRORMSG1(s,a) printf((s),(a))
#define RF_ERRORMSG2(s,a,b) printf((s),(a),(b))
#define RF_ERRORMSG3(s,a,b,c) printf((s),(a),(b),(c))
#define RF_ERRORMSG4(s,a,b,c,d) printf((s),(a),(b),(c),(d))
#define RF_ERRORMSG5(s,a,b,c,d,e) printf((s),(a),(b),(c),(d),(e))
#define perror(x)
extern char rf_panicbuf[];
#define RF_PANIC() {sprintf(rf_panicbuf,"raidframe error at line %d file %s",__LINE__,__FILE__); panic(rf_panicbuf);}
#ifdef RF_ASSERT
#undef RF_ASSERT
#endif /* RF_ASSERT */
#ifndef NOASSERT
#define RF_ASSERT(_x_) { \
if (!(_x_)) { \
sprintf(rf_panicbuf, \
"raidframe error at line %d file %s (failed asserting %s)\n", \
__LINE__, __FILE__, #_x_); \
panic(rf_panicbuf); \
} \
}
#else /* !NOASSERT */
#define RF_ASSERT(x) {/*noop*/}
#endif /* !NOASSERT */
#endif /* !KERNEL */
/* random stuff */
#define RF_MAX(a,b) (((a) > (b)) ? (a) : (b))
#define RF_MIN(a,b) (((a) < (b)) ? (a) : (b))
/* divide-by-zero check */
#define RF_DB0_CHECK(a,b) ( ((b)==0) ? 0 : (a)/(b) )
/* get time of day */
#ifdef KERNEL
#ifndef __NetBSD__
extern struct timeval time;
#endif /* !__NetBSD__ */
#define RF_GETTIME(_t) microtime(&(_t))
#else /* KERNEL */
#define RF_GETTIME(_t) gettimeofday(&(_t), NULL);
#endif /* KERNEL */
/*
* zero memory- not all bzero calls go through here, only
* those which in the kernel may have a user address
*/
#ifdef KERNEL
#ifndef __NetBSD__
#define RF_BZERO(_bp,_b,_l) if (IS_SYS_VA(_b)) bzero(_b,_l); else rf_BzeroWithRemap(_bp,_b,_l)
#else
#define RF_BZERO(_bp,_b,_l) bzero(_b,_l) /* XXX This is likely incorrect. GO*/
#endif /* __NetBSD__ */
#else /* KERNEL */
#define RF_BZERO(_bp,_b,_l) bzero(_b,_l)
#endif /* KERNEL */
#ifdef sun
#include <sys/param.h>
#ifndef NBPG
#define NBPG PAGESIZE
#endif /* !NBPG */
#endif /* sun */
#ifdef IRIX
#include <sys/tfp.h>
#define NBPG _PAGESZ
#endif /* IRIX */
#ifdef LINUX
#include <linux/user.h>
#endif /* LINUX */
#define RF_UL(x) ((unsigned long) (x))
#define RF_PGMASK RF_UL(NBPG-1)
#define RF_BLIP(x) (NBPG - (RF_UL(x) & RF_PGMASK)) /* bytes left in page */
#define RF_PAGE_ALIGNED(x) ((RF_UL(x) & RF_PGMASK) == 0)
#ifdef KERNEL
#ifndef __NetBSD__
#include <dkusage.h>
#endif
#if DKUSAGE > 0
#define RF_DKU_END_IO(_unit_,_bp_) { \
int s = splbio(); \
dku_end_io(DKU_RAIDFRAME_BUS, _unit_, 0, \
(((_bp_)->b_flags&(B_READ|B_WRITE) == B_READ) ? \
CAM_DIR_IN : CAM_DIR_OUT), \
(_bp_)->b_bcount); \
splx(s); \
}
#else /* DKUSAGE > 0 */
#define RF_DKU_END_IO(unit) { /* noop */ }
#endif /* DKUSAGE > 0 */
#endif /* KERNEL */
#ifdef __STDC__
#define RF_STRING(_str_) #_str_
#else /* __STDC__ */
#define RF_STRING(_str_) "_str_"
#endif /* __STDC__ */
#endif /* !_RF__RF_GENERAL_H_ */

View File

@ -0,0 +1,199 @@
/* $NetBSD: rf_geniq.c,v 1.1 1998/11/13 04:20:30 oster Exp $ */
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
*
* Author: Daniel Stodolsky
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/* rf_geniq.c
* code which implements Reed-Solomon encoding for RAID level 6
*/
/* :
* Log: rf_geniq.c,v
* Revision 1.12 1996/07/29 16:37:00 jimz
* remove archs.h include to avoid VPATH problems in kernel
* rf_invertq.c now must include archs.h before invertq.h
*
* Revision 1.11 1996/07/29 15:04:16 jimz
* correct rf_archs.h path for kernel
*
* Revision 1.10 1996/07/27 23:36:08 jimz
* Solaris port of simulator
*
* Revision 1.9 1996/07/18 22:57:14 jimz
* port simulator to AIX
*
* Revision 1.8 1996/07/15 17:22:18 jimz
* nit-pick code cleanup
* resolve stdlib problems on DEC OSF
*
* Revision 1.7 1996/06/09 02:36:46 jimz
* lots of little crufty cleanup- fixup whitespace
* issues, comment #ifdefs, improve typing in some
* places (esp size-related)
*
* Revision 1.6 1996/05/23 21:46:35 jimz
* checkpoint in code cleanup (release prep)
* lots of types, function names have been fixed
*
* Revision 1.5 1995/12/01 18:29:18 root
* added copyright info
*
*/
#define RF_UTILITY 1
#include "rf_pqdeg.h"
/*
five bit lfsr
poly - feedback connections
val = value;
*/
int lsfr_shift(val,poly)
unsigned val, poly;
{
unsigned new;
unsigned int i;
unsigned high = (val >> 4) & 1;
unsigned bit;
new = (poly & 1) ? high : 0;
for (i=1; i <=4; i++)
{
bit = (val >> (i-1)) & 1;
if (poly & (1<<i)) /* there is a feedback connection */
new = new | ((bit ^ high)<<i);
else
new = new | (bit << i);
}
return new;
}
/* generate Q matricies for the data */
RF_ua32_t rf_qfor[32];
void main()
{
unsigned int i,j,l,a,b;
unsigned int val;
unsigned int r;
unsigned int m,p,q;
RF_ua32_t k;
printf("/*\n");
printf(" * rf_invertq.h\n");
printf(" */\n");
printf("/*\n");
printf(" * GENERATED FILE -- DO NOT EDIT\n");
printf(" */\n");
printf("\n");
printf("#ifndef _RF__RF_INVERTQ_H_\n");
printf("#define _RF__RF_INVERTQ_H_\n");
printf("\n");
printf("/*\n");
printf(" * rf_geniq.c must include rf_archs.h before including\n");
printf(" * this file (to get VPATH magic right with the way we\n");
printf(" * generate this file in kernel trees)\n");
printf(" */\n");
printf("/* #include \"rf_archs.h\" */\n");
printf("\n");
printf("#if (RF_INCLUDE_PQ > 0) || (RF_INCLUDE_RAID6 > 0)\n");
printf("\n");
printf("#define RF_Q_COLS 32\n");
printf("RF_ua32_t rf_rn = {\n");
k[0] = 1;
for (j=0 ; j < 31; j++)
k[j+1] = lsfr_shift(k[j],5);
for (j=0; j < 32; j++)
printf("%d, ",k[j]);
printf("};\n");
printf("RF_ua32_t rf_qfor[32] = {\n");
for (i=0; i < 32; i++)
{
printf("/* i = %d */ { 0, ",i);
rf_qfor[i][0] = 0;
for (j=1; j < 32; j++)
{
val = j;
for (l=0; l < i; l++)
val = lsfr_shift(val,5);
rf_qfor[i][j] = val;
printf("%d, ",val);
}
printf("},\n");
}
printf("};\n");
printf("#define RF_Q_DATA_COL(col_num) rf_rn[col_num],rf_qfor[28-(col_num)]\n");
/* generate the inverse tables. (i,j,p,q) */
/* The table just stores a. Get b back from
the parity */
printf("#ifdef KERNEL\n");
printf("RF_ua1024_t rf_qinv[1]; /* don't compile monster table into kernel */\n");
printf("#elif defined(NO_PQ)\n");
printf("RF_ua1024_t rf_qinv[29*29];\n");
printf("#else /* !KERNEL && NO_PQ */\n");
printf("RF_ua1024_t rf_qinv[29*29] = {\n");
for (i=0; i < 29; i++)
{
for (j =0; j < 29; j++)
{
printf("/* i %d, j %d */{ ",i,j);
if (i==j)
for (l=0; l < 1023; l++) printf("0, ");
else
{
for (p=0; p < 32; p++)
for (q=0; q < 32; q++)
{
/* What are a, b such that
a ^ b = p; and
qfor[(28-i)][a ^ rf_rn[i+1]] ^ qfor[(28-j)][b ^ rf_rn[j+1]] = q.
Solve by guessing a. Then testing.
*/
for ( a =0 ; a < 32; a++ )
{
b = a ^ p;
if ( (rf_qfor[28-i][a^ k[i+1]] ^ rf_qfor[28-j][b ^ k[j+1]]) == q )
break;
}
if (a == 32) printf("unable to solve %d %d %d %d\n",i,j,p,q);
printf("%d,",a);
}
}
printf("},\n");
}
}
printf("};\n");
printf("\n#endif /* (RF_INCLUDE_PQ > 0) || (RF_INCLUDE_RAID6 > 0) */\n\n");
printf("#endif /* !KERNEL && NO_PQ */\n");
printf("#endif /* !_RF__RF_INVERTQ_H_ */\n");
exit(0);
}

View File

@ -0,0 +1,890 @@
/* $NetBSD: rf_geometry.c,v 1.1 1998/11/13 04:20:30 oster Exp $ */
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
*
* Author: Mark Holland
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/*
* Changes:
* 10/24/91 Changes to support disk bus contention model
* (MCH) 1. Added media_done_time param to Access_time()
*
* 08/18/92 Geometry routines have been modified to support zone-bit
* recording.
* (AS) 1. Each routine which originally referenced the variable
* 'disk->geom->sectors_per_track' has been modified,
* since the number of sectors per track varies on disks
* with zone-bit recording.
*/
/* :
* Log: rf_geometry.c,v
* Revision 1.18 1996/08/11 00:40:57 jimz
* fix up broken comment
*
* Revision 1.17 1996/07/28 20:31:39 jimz
* i386netbsd port
* true/false fixup
*
* Revision 1.16 1996/07/18 22:57:14 jimz
* port simulator to AIX
*
* Revision 1.15 1996/06/07 21:33:04 jimz
* begin using consistent types for sector numbers,
* stripe numbers, row+col numbers, recon unit numbers
*
* Revision 1.14 1996/06/05 18:06:02 jimz
* Major code cleanup. The Great Renaming is now done.
* Better modularity. Better typing. Fixed a bunch of
* synchronization bugs. Made a lot of global stuff
* per-desc or per-array. Removed dead code.
*
* Revision 1.13 1996/05/30 23:22:16 jimz
* bugfixes of serialization, timing problems
* more cleanup
*
* Revision 1.12 1996/05/30 11:29:41 jimz
* Numerous bug fixes. Stripe lock release code disagreed with the taking code
* about when stripes should be locked (I made it consistent: no parity, no lock)
* There was a lot of extra serialization of I/Os which I've removed- a lot of
* it was to calculate values for the cache code, which is no longer with us.
* More types, function, macro cleanup. Added code to properly quiesce the array
* on shutdown. Made a lot of stuff array-specific which was (bogusly) general
* before. Fixed memory allocation, freeing bugs.
*
* Revision 1.11 1996/05/24 22:17:04 jimz
* continue code + namespace cleanup
* typed a bunch of flags
*
* Revision 1.10 1996/05/23 00:33:23 jimz
* code cleanup: move all debug decls to rf_options.c, all extern
* debug decls to rf_options.h, all debug vars preceded by rf_
*
* Revision 1.9 1996/05/18 19:51:34 jimz
* major code cleanup- fix syntax, make some types consistent,
* add prototypes, clean out dead code, et cetera
*
* Revision 1.8 1995/12/12 18:10:06 jimz
* MIN -> RF_MIN, MAX -> RF_MAX, ASSERT -> RF_ASSERT
* fix 80-column brain damage in comments
*
* Revision 1.7 1995/12/01 18:29:34 root
* added copyright info
*
*/
#include "rf_types.h"
#include "rf_geometry.h"
#include "rf_raid.h"
#include "rf_general.h"
#include "rf_debugMem.h"
#define DISK_DB "disk_db"
#define DISK_NAME "HP2247"
#define ABS_DIFF(a,b) ( ((a)>(b)) ? ((a)-(b)) : ((b)-(a)) )
static RF_GeometryList_t *geom_list = (RF_GeometryList_t *) NULL;
RF_TICS_t rf_globalSpinup = 1.5;
#define NM_LGTH 80
#define NM_PATN " %80s"
static RF_GeometryList_t *Fetch_geometry_db(FILE *fd);
static void Format_disk(RF_DiskState_t *disk, long sectors_per_block);
static long Find_cyl(RF_SectorNum_t block, RF_DiskState_t *disk);
static long Find_track(RF_SectorNum_t block, RF_DiskState_t *disk);
static long Find_phys_sector(RF_SectorNum_t block, RF_DiskState_t *disk);
static RF_TICS_t Delay_to(RF_TICS_t cur_time, RF_SectorNum_t block,
RF_DiskState_t *disk);
static RF_TICS_t Seek_time(long to_cyl, long to_track, long from_cyl,
long from_track, RF_DiskState_t *disk);
static RF_TICS_t Seek(RF_TICS_t cur_time, RF_SectorNum_t block,
RF_DiskState_t *disk, long update);
static RF_TICS_t Rotate(RF_TICS_t cur_time, RF_SectorNum_t block,
RF_DiskState_t *disk, long update);
static RF_TICS_t Seek_Rotate(RF_TICS_t cur_time, RF_SectorNum_t block,
RF_DiskState_t *disk, long update);
static RF_TICS_t GAP(long sec_per_track, RF_DiskState_t *disk);
static RF_TICS_t Block_access_time(RF_TICS_t cur_time, RF_SectorNum_t block,
RF_SectorCount_t numblocks, RF_DiskState_t *disk, long update);
static void Zero_stats(RF_DiskState_t *disk);
static RF_TICS_t Update_stats(RF_TICS_t cur_time, RF_TICS_t seek, RF_TICS_t rotate,
RF_TICS_t transfer, RF_DiskState_t *disk);
static void rf_DiskParam(long numCyls, RF_TICS_t minSeek, RF_TICS_t avgSeek, RF_TICS_t maxSeek,
RF_TICS_t *a, RF_TICS_t *b, RF_TICS_t *c);
static RF_GeometryList_t *Fetch_geometry_db(fd)
FILE *fd;
{
long ret, lineno;
char name[NM_LGTH], title[20];
RF_GeometryList_t * list = (RF_GeometryList_t *) NULL,
** next_ptr = & list;
if( RF_MAX_DISKNAME_LEN<NM_LGTH ) RF_PANIC();
lineno = 0;
while( (ret = fscanf( fd, " %20s", title )) != EOF ) {
float tmp_f1, tmp_f2, tmp_f3, tmp_f4;
float tmp_f5=0.0;
float tmp_f6=0.0;
RF_Geometry_t *g;
long i, x, y, z, num_cylinders;
RF_ZoneList_t ** znext_ptr;
if( ret == 1 && strncmp( "enddisk", title, 8 ) == 0 ) break;
RF_Calloc(*next_ptr, 1, sizeof(RF_GeometryList_t), (RF_GeometryList_t *));
(*next_ptr)->next = (RF_GeometryList_t *) NULL;
RF_Calloc(g, 1, sizeof(RF_Geometry_t), (RF_Geometry_t *));
(*next_ptr)->disk = g;
next_ptr = &( (*next_ptr)->next ); /*prep for next iteration */
lineno++;
if (fscanf( fd, NM_PATN, name ) != 1) {
fprintf(stderr,"Disk DB Error: Can't get disk name from disk db\n");
fprintf(stderr,"lineno=%d\n", lineno);
fprintf(stderr,"name=\"%s\"\n", name);
exit(1);
}
lineno++;
if ( (fscanf(fd, " tracks per cylinder %ld", &(g->tracks_per_cyl)) != 1) || g->tracks_per_cyl <= 0) {
fprintf(stderr,"Disk DB Error: Missing or invalid tracks/cyl for disk %s\n", name); exit(1);
}
lineno++;
if ( (fscanf(fd, " number of disk zones %ld", &(g->num_zones)) != 1) || g->num_zones <= 0) {
fprintf(stderr,"Disk DB Error: Missing or invalid number of zones for disk %s\n", name); exit(1);
}
/* This section of code creates the linked list which
contains the disk's zone information. */
g->zbr_data = (RF_ZoneList_t *) NULL;
znext_ptr = &(g->zbr_data);
num_cylinders = 0;
/* This for-loop reads in the cylinder count, the sectors
per track, and track skew for each zone on the disk. */
for (i=1; i <= g->num_zones; i++) {
lineno++;
if ( (fscanf(fd, " number of cylinders in zone %ld", &x) != 1) || x < 1) {
fprintf(stderr,"Disk DB Error: Zone %ld: Missing or invalid cyls/zone for disk %s\n", i, name); exit(1);
}
lineno++;
if ( (fscanf(fd, " sectors per track in zone %ld", &y) != 1) || y < 1 ) {
fprintf(stderr,"Disk DB Error: Zone %ld: Missing or invalid sectors/track for disk %s\n", i, name); exit(1);
}
lineno++;
if ( (fscanf(fd, " track skew in zone %ld", &z) != 1) || z < 0 ) {
fprintf(stderr,"Disk DB Error: Zone %ld: Missing or invalid track skew for disk %s\n",i, name); exit(1);
}
RF_Calloc(*znext_ptr, 1, sizeof(RF_ZoneList_t), (RF_ZoneList_t *));
(*znext_ptr)->next = (RF_ZoneList_t *) NULL;
(*znext_ptr)->zone.num_cylinders = x;
(*znext_ptr)->zone.sec_per_track = y;
(*znext_ptr)->zone.track_skew = z;
(*znext_ptr)->zone.num_sectors =
(*znext_ptr)->zone.num_cylinders *
g->tracks_per_cyl *
(*znext_ptr)->zone.sec_per_track;
znext_ptr = &((*znext_ptr)->next);
num_cylinders = num_cylinders + x;
} /* End of for-loop */
lineno++;
if ( (fscanf(fd, " revolution time %f", &tmp_f1) != 1) || tmp_f1 <= 0) {
fprintf(stderr,"Disk DB Error: Missing or invalid revolution time for disk %s\n",name); exit(1);
}
lineno++;
if ( (fscanf(fd, " 1 cylinder seek time %f", &tmp_f2 ) != 1) || tmp_f2 <= 0) {
fprintf(stderr,"Disk DB Error: Missing or invalid 1-cyl seek time for disk %s\n",name); exit(1);
}
lineno++;
if ( (fscanf(fd, " max stroke seek time %f", &tmp_f3) != 1) || tmp_f3 <= 0) {
fprintf(stderr,"Disk DB Error: Missing or invalid max seek time for disk %s\n",name); exit(1);
}
lineno++;
if ( (fscanf(fd, " average seek time %f", &tmp_f4) != 1) || tmp_f4 <= 0) {
fprintf(stderr,"Disk DB Error: Missing or invalid avg seek time for disk %s\n",name); exit(1);
}
lineno++;
if ( (fscanf(fd, " time to sleep %f", &tmp_f5) != 1) || tmp_f4 <= 0) {
fprintf(stderr,"Disk DB Error: Missing or invalid time to sleep for disk %s\n",name); exit(1);
}
lineno++;
if ( (fscanf(fd, " time to spinup %f", &tmp_f6) != 1) || tmp_f4 <= 0) {
fprintf(stderr,"Disk DB Error: Missing or invalid time to sleep for disk %s\n",name); exit(1);
}
strcpy( g->disk_name, name );
g->revolution_time = tmp_f1;
g->seek_one_cyl = tmp_f2;
g->seek_max_stroke = tmp_f3;
g->seek_avg = tmp_f4;
g->time_to_sleep = tmp_f5;
g->time_to_spinup = tmp_f6;
/* convert disk specs to seek equation coeff */
rf_DiskParam( num_cylinders, g->seek_one_cyl,
g->seek_avg, g->seek_max_stroke,
&g->seek_sqrt_coeff, &g->seek_linear_coeff,
&g->seek_constant_coeff );
}
return( list );
}
static void Format_disk(disk, sectors_per_block)
RF_DiskState_t *disk;
long sectors_per_block;
{
long sector_count = 0;
RF_ZoneList_t *z;
if( disk == (RF_DiskState_t *) NULL ) RF_PANIC();
if( disk->geom == (RF_Geometry_t *) NULL ) RF_PANIC();
if( sectors_per_block <=0 ) RF_PANIC();
disk->sectors_per_block = sectors_per_block;
z = disk->geom->zbr_data;
/* This while-loop visits each disk zone and computes the total
number of sectors on the disk. */
while (z != (RF_ZoneList_t *) NULL) {
sector_count = sector_count + (z->zone.num_cylinders *
disk->geom->tracks_per_cyl *
z->zone.sec_per_track);
z = z->next;
}
disk->last_block_index = (sector_count / sectors_per_block) - 1;
}
void rf_InitDisk( disk, disk_db, disk_name, init_cyl, init_track, init_offset, row, col)
RF_DiskState_t *disk;
char *disk_db;
char *disk_name;
long init_cyl;
long init_track;
RF_TICS_t init_offset;
int row;
int col;
{
RF_GeometryList_t *gp;
FILE *f;
RF_ASSERT( disk != (RF_DiskState_t *) NULL );
disk->cur_cyl = init_cyl;
disk->cur_track = init_track;
disk->index_offset = init_offset;
disk->geom = (RF_Geometry_t *) NULL;
disk->queueFinishTime = 0.0;
disk->lastBlock = 0;
disk->row=row;
disk->col=col;
Zero_stats(disk);
if (strncmp(disk_name,"/dev",4 )==0) strcpy(disk_name,"HP2247");
if( geom_list == (RF_GeometryList_t *) NULL ) {
f = fopen(disk_db,"r");
if (f == NULL) {
fprintf(stderr, "ERROR: RAIDframe could not open disk db %s\n", disk_db);
exit(1);
}
geom_list = Fetch_geometry_db( f );
fclose( f );
}
for( gp = geom_list; gp != (RF_GeometryList_t *) NULL; gp = gp->next ) {
RF_ASSERT( gp->disk != (RF_Geometry_t *) NULL
&& gp->disk->disk_name != (char *) NULL );
if( strncmp( disk_name, gp->disk->disk_name, RF_MAX_DISKNAME_LEN )
== 0 ) {
disk->geom = gp->disk;
break;
}
}
if( disk->geom == (RF_Geometry_t *) NULL ) {
fprintf( stderr, "Disk %s not found in database %s\n",
disk_name, disk_db );
exit(1);
}
Format_disk( disk, 1 );
}
static long Find_cyl( block, disk )
RF_SectorNum_t block;
RF_DiskState_t *disk;
{
RF_ZoneList_t * z;
long tmp;
long log_sector = block * disk->sectors_per_block;
long cylinder = 0;
z = disk->geom->zbr_data;
/* This while-loop finds the zone to which log_sector belongs,
computes the starting cylinder number of this zone, and
computes the sector offset into this zone. */
while (log_sector >= z->zone.num_sectors) {
log_sector = log_sector - z->zone.num_sectors;
cylinder = cylinder + z->zone.num_cylinders;
z = z->next;
}
/* The cylinder to which log_sector belongs equals the starting
cylinder number of its zone plus the cylinder offset into
the zone. */
tmp = cylinder + (log_sector / (z->zone.sec_per_track *
disk->geom->tracks_per_cyl));
return( tmp );
}
static long Find_track( block, disk )
RF_SectorNum_t block;
RF_DiskState_t *disk;
{
RF_ZoneList_t * z;
long tmp;
long log_sector = block * disk->sectors_per_block;
long track = 0;
z = disk->geom->zbr_data;
/* This while-loop finds the zone to which log_sector belongs,
computes the starting track number of this zone, and computes
the sector offset into this zone. */
while (log_sector >= z->zone.num_sectors) {
log_sector = log_sector - z->zone.num_sectors;
track = track + (z->zone.num_cylinders *
disk->geom->tracks_per_cyl);
z = z->next;
}
/* The track to which log_sector belongs equals the starting
track number of its zone plus the track offset into the zone,
modulo the number of tracks per cylinder on the disk. */
tmp = (track + (log_sector / z->zone.sec_per_track)) %
disk->geom->tracks_per_cyl;
return( tmp );
}
/*
** The position of a logical sector relative to the index mark on any track
** is not simple. A simple organization would be:
**
** track 0 : 0, 1, 2, 3, ... N-1
** track 1 : N,N+1,N+2,N+3, ... 2N-1
** ^
** Index mark just before this point
**
** This is not good because sequential access of sectors N-1 then N
** will require a full revolution in between (because track switch requires
** a couple of sectors to recalibrate from embedded servo). So frequently
** sequentially numbered sectors are physically skewed so that the next
** accessible sector after N-1 will be N (with a skew of 2)
**
** track 0 : 0, 1, 2, 3, ... N-1
** track 1 : 2N-2,2N-1, N, N+1, ... 2N-3
** ^
** Index mark just before this point
**
** Layout gets even more complex with cylinder boundaries. Seek time
** is A + B*M where M is the number of cylinders to seek over. On a sequential
** access that crosses a cylinder boundary, the disk will rotate for
** A+B seconds, then "track skew" sectors (inter-sector gaps actually)
** before it can access another sector, so the cylinder to cylinder skew
** is "track skew" + CEIL( sectors_per_track*(A+B)/revolution_time ).
**
** So if sector 0 is 0 sectors from the index mark on the first track,
** where is sector X relative to the index mark on its track?
**
** ( ( X % sectors_per_track ) basic relative position **
** + track_skew * ( X / sectors_per_track ) skewed for each track **
** + CEIL( sectors_per_track*(A+B)/revolution_time )
** * ( X / sectors_per_cylinder ) skewed more for each cyl **
** ) % sectors_per_track wrapped around in the track **
**
**
*/
static long Find_phys_sector(block, disk)
RF_SectorNum_t block;
RF_DiskState_t *disk;
{
long phys = 0;
RF_ZoneList_t * z;
long previous_spt = 1;
long sector = block * disk->sectors_per_block;
z = disk->geom->zbr_data;
/* This while-loop finds the zone to which sector belongs,
and computes the physical sector up to that zone. */
while (sector >= z->zone.num_sectors) {
sector = sector - z->zone.num_sectors;
/* By first multiplying 'phys' by the sectors per track in
the current zone divided by the sectors per track in the
previous zone, we convert a given physical sector in one
zone to an equivalent physical sector in another zone. */
phys = ((phys * z->zone.sec_per_track / previous_spt) +
(((z->zone.num_sectors - 1) % z->zone.sec_per_track) +
(z->zone.track_skew * z->zone.num_cylinders *
disk->geom->tracks_per_cyl) +
(long) ceil( (double) z->zone.sec_per_track *
(disk->geom->seek_constant_coeff) /
disk->geom->revolution_time) *
z->zone.num_cylinders)) %
z->zone.sec_per_track;
previous_spt = z->zone.sec_per_track;
z = z->next;
}
/* The final physical sector equals the physical sector up to
the particular zone, plus the physical sector caused by the
sector offset into this zone. */
phys = ((phys * z->zone.sec_per_track / previous_spt) +
((sector % z->zone.sec_per_track) +
(z->zone.track_skew * (sector / z->zone.sec_per_track)) +
(long) ceil( (RF_TICS_t) z->zone.sec_per_track *
(disk->geom->seek_constant_coeff) /
disk->geom->revolution_time) *
(sector / (z->zone.sec_per_track *
disk->geom->tracks_per_cyl)))) %
z->zone.sec_per_track;
return( phys );
}
/*
** When each disk starts up, its index mark is a fraction (f) of a rotation
** ahead from its heads (in the direction of rotation). The sector
** under its heads is at a fraction f of a rotation from the index
** mark. After T time has past, T/rotation_time revolutions have occured, so
** the sector under the heads is at a fraction FRAC(f+T/rotation_time) of a
** rotation from the index mark. If the target block is at physical sector
** X relative to its index mark, then it is at fraction (X/sectors_per_track),
** so the rotational delay is
** ((X/sectors_per_track)-FRAC(f+T/rotation_time)) * revolution_time
** if this is positive, otherwise it is
** (1+(X/sectors_per_track)-FRAC(f+T/rotation_time)) * revolution_time
*/
#define FRAC(a) ( (a) - (long) floor(a) )
static RF_TICS_t Delay_to(cur_time, block, disk)
RF_TICS_t cur_time;
RF_SectorNum_t block;
RF_DiskState_t *disk;
{
RF_TICS_t tmp;
RF_ZoneList_t *z;
long sector = block * disk->sectors_per_block;
z = disk->geom->zbr_data;
/* This while-loop finds the zone to which sector belongs. */
while (sector >= z->zone.num_sectors) {
sector = sector - z->zone.num_sectors;
z = z->next;
}
tmp = (
(RF_TICS_t) Find_phys_sector(block,disk)/z->zone.sec_per_track
- FRAC(disk->index_offset+cur_time/disk->geom->revolution_time)
) * disk->geom->revolution_time;
if( tmp < 0 ) tmp += disk->geom->revolution_time;
if( tmp < 0 ) RF_PANIC();
return( tmp );
}
/* Hmmm...they seem to be computing the head switch time as
* equal to the track skew penalty. Is this an approximation?
* (MCH)
*/
static RF_TICS_t Seek_time( to_cyl, to_track, from_cyl, from_track, disk )
long to_cyl;
long to_track;
long from_cyl;
long from_track;
RF_DiskState_t *disk;
{
long cyls = ABS_DIFF( from_cyl, to_cyl ) - 1;
RF_TICS_t seek = 0.0;
RF_ZoneList_t * z;
/* printf("Seek_time: from_cyl %ld, to_cyl %ld, from_trk %ld, to_trk %ld\n",from_cyl, to_cyl, from_track, to_track); */
if( from_cyl != to_cyl ) {
z = disk->geom->zbr_data;
/* This while-loop finds the zone to which to_cyl belongs. */
while (to_cyl >= z->zone.num_cylinders) {
to_cyl = to_cyl - z->zone.num_cylinders;
z = z->next;
}
seek = disk->geom->seek_constant_coeff
+ disk->geom->seek_linear_coeff * cyls
+ disk->geom->seek_sqrt_coeff * sqrt( (double) cyls )
+ z->zone.track_skew * disk->geom->revolution_time /
z->zone.sec_per_track;
} else if( from_track != to_track ) {
/* from_track and to_track must lie in the same zone. */
z = disk->geom->zbr_data;
/* This while-loop finds the zone to which from_cyl belongs. */
while (from_cyl >= z->zone.num_cylinders) {
from_cyl = from_cyl - z->zone.num_cylinders;
z = z->next;
}
seek = z->zone.track_skew
* disk->geom->revolution_time
/ z->zone.sec_per_track;
}
return( seek );
}
static RF_TICS_t Seek(cur_time, block, disk, update)
RF_TICS_t cur_time;
RF_SectorNum_t block;
RF_DiskState_t *disk;
long update;
{
long cur_cyl, cur_track;
/*
** current location is derived from the time,
** current track and current cylinder
**
** update current location as you go
*/
RF_ASSERT( block <= disk->last_block_index );
cur_cyl = disk->cur_cyl;
cur_track = disk->cur_track;
if (update) {
disk->cur_cyl = Find_cyl( block, disk );
disk->cur_track = Find_track( block, disk );
}
return( Seek_time( disk->cur_cyl, disk->cur_track,
cur_cyl, cur_track, disk ) );
}
static RF_TICS_t Rotate(cur_time, block, disk, update)
RF_TICS_t cur_time;
RF_SectorNum_t block;
RF_DiskState_t *disk;
long update;
{
/*
** current location is derived from the time,
** current track and current cylinder
**
** block the process until at the appropriate block
** updating current location as you go
*/
RF_ASSERT( block <= disk->last_block_index );
return( Delay_to( cur_time, block, disk ) );
}
static RF_TICS_t Seek_Rotate(cur_time, block, disk, update)
RF_TICS_t cur_time;
RF_SectorNum_t block;
RF_DiskState_t *disk;
long update;
{
RF_TICS_t seek, delay;
RF_ASSERT( block <= disk->last_block_index );
seek = Seek( cur_time, block, disk, update );
delay = seek + Rotate( cur_time+seek, block, disk, update );
return( delay );
}
static RF_TICS_t GAP(sec_per_track, disk)
long sec_per_track;
RF_DiskState_t *disk;
{
RF_TICS_t tmp = (disk->geom->revolution_time/(100*sec_per_track));
return (tmp);
}
RF_TICS_t Block_access_time(cur_time, block, numblocks, disk, update)
RF_TICS_t cur_time;
RF_SectorNum_t block;
RF_SectorCount_t numblocks;
RF_DiskState_t *disk;
long update;
{
RF_TICS_t delay = 0;
long cur = block, end = block + numblocks;
long sector, tmp;
RF_ZoneList_t * z;
/*
** this is the same as Seek_Rotate by merit of the mapping
** except that the access ends before the gap to the next block
*/
RF_ASSERT( numblocks > 0 && end-1 <= disk->last_block_index );
while( cur < end ) {
sector = cur * disk->sectors_per_block;
z = disk->geom->zbr_data;
/* This while-loop finds the zone to which sector belongs. */
while (sector >= z->zone.num_sectors) {
sector = sector - z->zone.num_sectors;
z = z->next;
}
tmp = RF_MIN( end - cur, z->zone.sec_per_track
- cur % z->zone.sec_per_track );
delay += tmp * disk->geom->revolution_time /
z->zone.sec_per_track -
GAP(z->zone.sec_per_track, disk);
cur += tmp;
if( cur != end )
delay += Seek_Rotate( cur_time+delay, cur, disk, update );
}
return( delay );
}
static void Zero_stats(disk)
RF_DiskState_t *disk;
{
char traceFileName[64];
disk->stats.num_events = 0;
disk->stats.seek_sum = 0;
disk->stats.seekSq_sum = 0;
disk->stats.rotate_sum = 0;
disk->stats.rotateSq_sum = 0;
disk->stats.transfer_sum = 0;
disk->stats.transferSq_sum = 0;
disk->stats.access_sum = 0;
disk->stats.accessSq_sum = 0;
disk->stats.sleep_sum=0;
disk->stats.idle_sum=0;
disk->stats.rw_sum=0;
disk->stats.spinup_sum=0;
disk->stats.last_acc=0;
if (rf_diskTrace){
sprintf (traceFileName,"rf_diskTracer%dc%d\0",disk->row,disk->col);
if ( (disk->traceFile= fopen(traceFileName, "w")) == NULL) {
perror(traceFileName); RF_PANIC();}
}
}
static RF_TICS_t Update_stats(cur_time, seek, rotate, transfer, disk)
RF_TICS_t cur_time;
RF_TICS_t seek;
RF_TICS_t rotate;
RF_TICS_t transfer;
RF_DiskState_t *disk;
{
RF_TICS_t spinup=0;
RF_TICS_t sleep=0;
RF_TICS_t idle=0;
disk->stats.num_events++;
disk->stats.seek_sum += seek;
disk->stats.seekSq_sum += seek*seek;
disk->stats.rotate_sum += rotate;
disk->stats.rotateSq_sum += rotate*rotate;
disk->stats.transfer_sum += transfer;
disk->stats.transferSq_sum += transfer*transfer;
disk->stats.access_sum += seek+rotate+transfer;
disk->stats.accessSq_sum +=
(seek+rotate+transfer)*(seek+rotate+transfer);
/* ASSERT (cur_time - disk->stats.last_acc >= 0); */
if (cur_time-disk->stats.last_acc>disk->geom->time_to_sleep){
idle=disk->geom->time_to_sleep;
sleep = cur_time - disk->stats.last_acc - idle;
spinup=disk->geom->time_to_spinup;
rf_globalSpinup = spinup;
}
else{
idle=cur_time - disk->stats.last_acc;
}
disk->stats.sleep_sum+=sleep;
disk->stats.idle_sum+=idle;
disk->stats.rw_sum+=seek+rotate+transfer;
disk->stats.spinup_sum+=spinup;
if (rf_diskTrace){
fprintf(disk->traceFile,"%g %g\n",disk->stats.last_acc,2.0);
fprintf(disk->traceFile,"%g %g\n",(disk->stats.last_acc+idle),2.0);
if (sleep){
fprintf(disk->traceFile,"%g %g\n",(disk->stats.last_acc+idle),1.0);
fprintf(disk->traceFile,"%g %g\n",(disk->stats.last_acc+idle+sleep),1.0);
}
if (spinup){
fprintf(disk->traceFile,"%g %g\n",(cur_time),4.0);
fprintf(disk->traceFile,"%g %g\n",(cur_time+spinup),4.0);
}
fprintf(disk->traceFile,"%g %g\n",(cur_time+spinup),3.0);
fprintf(disk->traceFile,"%g %g\n",(cur_time+spinup+seek+rotate+transfer),3.0);
}
disk->stats.last_acc=cur_time+spinup+seek+rotate+transfer;
return(spinup);
}
void rf_StopStats(disk, cur_time)
RF_DiskState_t *disk;
RF_TICS_t cur_time;
{
RF_TICS_t sleep=0;
RF_TICS_t idle=0;
if (cur_time - disk->stats.last_acc > disk->geom->time_to_sleep){
sleep = cur_time - disk->stats.last_acc-disk->geom->time_to_sleep;
idle = disk->geom->time_to_sleep;
}
else{
idle=cur_time - disk->stats.last_acc;
}
disk->stats.sleep_sum+=sleep;
disk->stats.idle_sum+=idle;
if (rf_diskTrace){
fprintf(disk->traceFile,"%g %g\n",disk->stats.last_acc,2.0);
fprintf(disk->traceFile,"%g %g\n",(disk->stats.last_acc+idle),2.0);
if (sleep){
fprintf(disk->traceFile,"%g %g\n",(disk->stats.last_acc+idle),1.0);
fprintf(disk->traceFile,"%g %g\n",(disk->stats.last_acc+idle+sleep),1.0);
}
fclose(disk->traceFile);
}
}
/* Sometimes num_events is zero because the disk was failed at the start
* of the simulation and never replaced. This causes a crash on some
* architectures, which is why we have the conditional.
*/
void rf_Report_stats(
RF_DiskState_t *disk,
long *numEventsPtr,
RF_TICS_t *avgSeekPtr,
RF_TICS_t *avgRotatePtr,
RF_TICS_t *avgTransferPtr,
RF_TICS_t *avgAccessPtr,
RF_TICS_t *SleepPtr,
RF_TICS_t *IdlePtr,
RF_TICS_t *RwPtr,
RF_TICS_t *SpinupPtr)
{
*numEventsPtr = disk->stats.num_events;
if (disk->stats.num_events) {
*avgSeekPtr = disk->stats.seek_sum / disk->stats.num_events;
*avgRotatePtr = disk->stats.rotate_sum / disk->stats.num_events;
*avgTransferPtr = disk->stats.transfer_sum / disk->stats.num_events;
*avgAccessPtr = disk->stats.access_sum / disk->stats.num_events;
} else {
*avgSeekPtr = 0;
*avgRotatePtr = 0;
*avgTransferPtr = 0;
*avgAccessPtr = 0;
}
*SleepPtr = disk->stats.sleep_sum;
*IdlePtr = disk->stats.idle_sum;
*RwPtr = disk->stats.rw_sum ;
*SpinupPtr = disk->stats.spinup_sum ;
}
int rf_Access_time( access_time, cur_time, block, numblocks, disk, media_done_time, update )
RF_TICS_t *access_time;
RF_TICS_t cur_time;
RF_SectorNum_t block;
RF_SectorCount_t numblocks;
RF_DiskState_t *disk;
RF_TICS_t *media_done_time;
long update; /* 1 => update disk state, 0 => don't */
{
/*
* first move to the start of the data, then sweep to the end
*/
RF_TICS_t spinup=0;
RF_TICS_t seek = Seek( cur_time, block, disk, update );
RF_TICS_t rotate = Rotate( cur_time+seek, block, disk, update );
RF_TICS_t transfer = Block_access_time( cur_time+seek+rotate, block,
numblocks, disk, update );
if (update) spinup=Update_stats(cur_time, seek, rotate, transfer, disk );
*media_done_time = seek+rotate+transfer;
*access_time =( seek+rotate+transfer+spinup);
return(0);
}
/* added to take into account the fact that maping code acounts for the disk label */
void rf_GeometryDoReadCapacity(disk, numBlocks, blockSize)
RF_DiskState_t *disk;
RF_SectorCount_t *numBlocks;
int *blockSize;
{
*numBlocks= (disk->last_block_index + 1 )-rf_protectedSectors;
*blockSize= (disk->sectors_per_block*512 );
/* in bytes */
}
/* END GEOMETRY ROUTINES **********************************************/
static void rf_DiskParam(numCyls, minSeek, avgSeek, maxSeek, a, b, c)
long numCyls;
RF_TICS_t minSeek;
RF_TICS_t avgSeek;
RF_TICS_t maxSeek;
RF_TICS_t *a;
RF_TICS_t *b;
RF_TICS_t *c;
{
if (minSeek == avgSeek && minSeek == maxSeek) {
*a = 0.0; *b = 0.0; *c = minSeek;
} else {
*a = ( 15 * avgSeek - 10 * minSeek - 5 * maxSeek ) / ( 3 * sqrt( (double) numCyls ));
*b = ( 7 * minSeek + 8 * maxSeek - 15 * avgSeek ) / ( 3 * numCyls );
*c = minSeek;
}
}

View File

@ -0,0 +1,154 @@
/* $NetBSD: rf_geometry.h,v 1.1 1998/11/13 04:20:30 oster Exp $ */
/* geometry.h
* code from raidSim to model disk behavior
*/
/*
* Changes:
* 8/18/92 Additional structures have been declared and existing
* structures have been modified in order to support zone-
* bit recording.
* (AS) 1. The types 'Zone_data' and 'Zone_list' have been defined.
* (AS) 2. The type 'Geometry' has been modified.
*/
/* :
* Log: rf_geometry.h,v
* Revision 1.10 1996/08/06 22:25:08 jimz
* include raidframe stuff before system stuff
*
* Revision 1.9 1996/06/07 21:33:04 jimz
* begin using consistent types for sector numbers,
* stripe numbers, row+col numbers, recon unit numbers
*
* Revision 1.8 1996/05/31 10:16:14 jimz
* add raidsim note
*
* Revision 1.7 1996/05/30 23:22:16 jimz
* bugfixes of serialization, timing problems
* more cleanup
*
* Revision 1.6 1996/05/30 11:29:41 jimz
* Numerous bug fixes. Stripe lock release code disagreed with the taking code
* about when stripes should be locked (I made it consistent: no parity, no lock)
* There was a lot of extra serialization of I/Os which I've removed- a lot of
* it was to calculate values for the cache code, which is no longer with us.
* More types, function, macro cleanup. Added code to properly quiesce the array
* on shutdown. Made a lot of stuff array-specific which was (bogusly) general
* before. Fixed memory allocation, freeing bugs.
*
* Revision 1.5 1996/05/18 19:51:34 jimz
* major code cleanup- fix syntax, make some types consistent,
* add prototypes, clean out dead code, et cetera
*
* Revision 1.4 1995/12/01 18:29:45 root
* added copyright info
*
*/
#ifndef _RF__RF_GEOMETRY_H_
#define _RF__RF_GEOMETRY_H_
#include "rf_types.h"
#include "rf_sys.h"
#ifndef _KERNEL
#include <string.h>
#include <math.h>
#ifdef __NetBSD__
#include <stdio.h>
#endif /* __NetBSD__ */
#endif
#define RF_MAX_DISKNAME_LEN 80
typedef struct RF_ZoneData_s {
long num_cylinders; /* Number of cylinders in zone */
long sec_per_track; /* Sectors per track in zone */
long track_skew; /* Skew of each track in zone */
long num_sectors; /* Number of sectors in zone */
} RF_ZoneData_t;
/*
* Linked list containing zone data
*/
typedef struct RF_ZoneList_s RF_ZoneList_t;
struct RF_ZoneList_s {
RF_ZoneData_t zone; /* for each disk */
RF_ZoneList_t *next;
};
typedef struct RF_Geometry_s {
char disk_name[RF_MAX_DISKNAME_LEN]; /* name for a type of disk */
long tracks_per_cyl; /* tracks in a cylinder */
/* assume 1 head per track, 1 set of read/write electronics */
long num_zones; /* number of ZBR zones on disk */
RF_TICS_t revolution_time; /* milliseconds per revolution */
RF_TICS_t seek_one_cyl; /* adjacent cylinder seek time */
RF_TICS_t seek_max_stroke; /* end to end seek time */
RF_TICS_t seek_avg; /* random from/to average time */
/*
* seek time = a * (x-1)^0.5 + b * (x-1) + c
* x >= 1 is the seek distance in cylinders
*/
RF_TICS_t seek_sqrt_coeff; /* a */
RF_TICS_t seek_linear_coeff; /* b */
RF_TICS_t seek_constant_coeff; /* c */
RF_ZoneList_t *zbr_data; /* linked list with ZBR data */
RF_TICS_t time_to_sleep; /* seconds of idle time before disks goes to sleep */
RF_TICS_t time_to_spinup; /* seconds spin up takes */
} RF_Geometry_t;
typedef struct RF_GeometryList_s RF_GeometryList_t;
struct RF_GeometryList_s {
RF_Geometry_t *disk;
RF_GeometryList_t *next;
};
typedef struct RF_DiskStats_s {
long num_events;
RF_TICS_t seek_sum;
RF_TICS_t seekSq_sum;
RF_TICS_t rotate_sum;
RF_TICS_t rotateSq_sum;
RF_TICS_t transfer_sum;
RF_TICS_t transferSq_sum;
RF_TICS_t access_sum;
RF_TICS_t accessSq_sum;
RF_TICS_t sleep_sum;
RF_TICS_t idle_sum;
RF_TICS_t rw_sum;
RF_TICS_t spinup_sum;
RF_TICS_t last_acc; /* time the last acces was finished */
} RF_DiskStats_t;
struct RF_DiskState_s {
int row;
int col;
RF_Geometry_t *geom;
long sectors_per_block; /* formatted per disk */
long last_block_index; /* format result for convenience */
RF_TICS_t index_offset; /* powerup head offset to index mark */
long cur_track; /* current track */
long cur_cyl; /* current cylinder */
RF_DiskStats_t stats; /* disk statistics */
RF_TICS_t queueFinishTime; /* used by shortest-seek code */
long lastBlock;
FILE *traceFile;
};
typedef struct RF_DiskState_s RF_DiskState_t;
extern RF_TICS_t rf_globalSpinup;
void rf_InitDisk(RF_DiskState_t *disk, char *disk_name, char *disk_db, long init_cyl,
long init_track, RF_TICS_t init_offset, int row, int col);
void rf_StopStats(RF_DiskState_t *disk, RF_TICS_t cur_time);
void rf_Report_stats(RF_DiskState_t *disk, long *numEventsPtr, RF_TICS_t *avgSeekPtr,
RF_TICS_t *avgRotatePtr, RF_TICS_t *avgTransferPtr, RF_TICS_t *avgAccessPtr,
RF_TICS_t *SleepPtr, RF_TICS_t *IdlePtr, RF_TICS_t *RwPtr, RF_TICS_t *SpinupPtr);
int rf_Access_time(RF_TICS_t *access_time, RF_TICS_t cur_time,
RF_SectorNum_t block, RF_SectorCount_t numblocks, RF_DiskState_t *disk,
RF_TICS_t *media_done_time, long update);
void rf_GeometryDoReadCapacity(RF_DiskState_t *disk, RF_SectorCount_t *numBlocks,
int *blockSize);
#endif /* !_RF__RF_GEOMETRY_H_ */

273
sys/dev/raidframe/rf_heap.c Normal file
View File

@ -0,0 +1,273 @@
/* $NetBSD: rf_heap.c,v 1.1 1998/11/13 04:20:30 oster Exp $ */
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
*
* Author: Mark Holland
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/* We manage a heap of data,key pairs, where the key a simple data type
* and the data is any singular data type. We allow the caller to add
* pairs, remote pairs, peek at the top pair, and do delete/add combinations.
* The latter are efficient because we only reheap once.
*
* David Kotz 1990? and 1993
*
* Modify the heap to work with events, with the smallest time on the top.
* Song Bac Toh, 1994
*/
/* :
* Log: rf_heap.c,v
* Revision 1.8 1996/07/28 20:31:39 jimz
* i386netbsd port
* true/false fixup
*
* Revision 1.7 1996/06/09 02:36:46 jimz
* lots of little crufty cleanup- fixup whitespace
* issues, comment #ifdefs, improve typing in some
* places (esp size-related)
*
* Revision 1.6 1996/05/30 11:29:41 jimz
* Numerous bug fixes. Stripe lock release code disagreed with the taking code
* about when stripes should be locked (I made it consistent: no parity, no lock)
* There was a lot of extra serialization of I/Os which I've removed- a lot of
* it was to calculate values for the cache code, which is no longer with us.
* More types, function, macro cleanup. Added code to properly quiesce the array
* on shutdown. Made a lot of stuff array-specific which was (bogusly) general
* before. Fixed memory allocation, freeing bugs.
*
* Revision 1.5 1996/05/27 18:56:37 jimz
* more code cleanup
* better typing
* compiles in all 3 environments
*
* Revision 1.4 1995/12/01 19:03:58 root
* added copyright info
*
*/
#include "rf_types.h"
#include "rf_heap.h"
#include "rf_general.h"
/* return RF_TRUE if the two requests in the heap match */
#define Matching_REQUESTS(HeapData1, HeapData2) \
((HeapData1->disk == HeapData2->disk) && \
(HeapData1->req_code == HeapData2->req_code))
/* getting around in the heap */
/* we don't use the 0th element of the array */
#define ROOT 1
#define LCHILD(p) (2 * (p))
#define RCHILD(p) (2 * (p) + 1)
#define PARENT(c) ((c) / 2)
/* @SUBTITLE "Debugging macros" */
/* The following are used for debugging our callers
* as well as internal stuff
*/
#define CHECK_INVARIANTS 1
#ifdef CHECK_INVARIANTS
#define INVARIANT2(x, y) \
{ \
if (!(x)) { \
fprintf(stderr, "INVARIANT false: in \"%s\", line %d\n", \
__FILE__, __LINE__); \
fprintf(stderr, (y)); \
exit(1); \
} \
}
/*
#define INVARIANT3(x, y, z) \
{ \
if (!(x)) { \
fprintf(stderr, "INVARIANT false: in \"%s\", line %d\n", \
__FILE__, __LINE__); \
fprintf(stderr, (y), (z)); \
exit(1); \
} \
}
*/
#else /* CHECK_INVARIANTS */
/* #define INVARIANT2(x, y) */
/* #define INVARIANT3(x, y, z) already defined in modularize.h */
#endif /* CHECK_INVARIANTS */
/**** Rachad, must add to general debug structure */
/* @SUBTITLE "InitHeap: Allocate a new heap" */
/* might return NULL if no free memory */
RF_Heap_t rf_InitHeap(int maxsize)
{
RF_Heap_t hp;
RF_ASSERT(maxsize > 0);
RF_Malloc(hp, sizeof(struct RF_Heap_s),(RF_Heap_t));
if (hp == NULL) {
fprintf(stderr, "InitHeap: No memory for heap\n");
return(NULL);
}
RF_Malloc(hp->heap,sizeof(RF_HeapEntry_t)*(maxsize+1),(RF_HeapEntry_t *));
if (hp->heap == NULL) {
fprintf(stderr, "InitHeap: No memory for heap of %d elements\n",
maxsize);
RF_Free(hp,-1); /* -1 means don't cause an error if the size does not match */
return(NULL);
}
hp->numheap = 0;
hp->maxsize = maxsize;
return(hp);
}
/* @SUBTITLE "FreeHeap: delete a heap" */
void rf_FreeHeap(RF_Heap_t hp)
{
if (hp != NULL) {
RF_Free(hp->heap,sizeof(RF_HeapEntry_t)*(hp->maxsize+1));
RF_Free(hp,sizeof(struct RF_Heap_s));
}
}
/* @SUBTITLE "AddHeap: Add an element to the heap" */
void rf_AddHeap(RF_Heap_t hp, RF_HeapData_t *data, RF_HeapKey_t key)
{
int node;
INVARIANT2(hp != NULL, "AddHeap: NULL heap\n");
INVARIANT2((hp->numheap < RF_HEAP_MAX), "AddHeap: Heap overflowed\n");
/* use new space end of heap */
node = ++(hp->numheap);
/* and reheap */
while (node != ROOT && hp->heap[PARENT(node)].key > key) {
hp->heap[node] = hp->heap[PARENT(node)];
node = PARENT(node);
}
hp->heap[node].data = data;
hp->heap[node].key = key;
}
/* @SUBTITLE "TopHeap: Return top element of heap" */
int rf_TopHeap(RF_Heap_t hp, RF_HeapData_t **data, RF_HeapKey_t *key)
{
INVARIANT2(hp != NULL, "TopHeap: NULL heap\n");
if (hp->numheap > 0) {
if (data)
*data = hp->heap[ROOT].data;
if (key)
*key = hp->heap[ROOT].key;
return(RF_HEAP_FOUND);
}
else {
return(RF_HEAP_NONE);
}
}
/* @SUBTITLE "RepHeap: Replace top of heap with given element and reheap" */
/* note that hp->numheap does not change, and should already be > 0 */
void rf_RepHeap(RF_Heap_t hp, RF_HeapData_t *data, RF_HeapKey_t key)
{
int node; /* node in heap */
int lchild, rchild; /* left and right children of node */
int left, right; /* left and right children exist? */
int swapped; /* swap was made? */
RF_HeapEntry_t *heap; /* pointer to the base of this heap array */
INVARIANT2(hp != NULL, "RepHeap: NULL heap\n");
/* If heap is empty just add this element */
/* if used properly this case should never come up */
if (hp->numheap == 0) {
rf_AddHeap(hp, data, key);
return;
}
heap = hp->heap; /* cache the heap base pointer */
node = ROOT;
do {
lchild = LCHILD(node);
rchild = RCHILD(node);
left = (lchild <= hp->numheap);
right = (rchild <= hp->numheap);
/* Both children exist: which is smaller? */
if (left && right)
if (heap[lchild].key < heap[rchild].key)
right = RF_HEAP_NONE;
else
left = RF_HEAP_NONE;
/* Now only one of left and right is true. compare it with us */
if (left && heap[lchild].key < key) {
/* swap with left child */
heap[node] = heap[lchild];
node = lchild;
swapped = RF_HEAP_FOUND;
} else if (right && heap[rchild].key < key) {
/* swap with right child */
heap[node] = heap[rchild];
node = rchild;
swapped = RF_HEAP_FOUND;
} else
swapped = RF_HEAP_NONE;
} while (swapped);
/* final resting place for new element */
heap[node].key = key;
heap[node].data = data;
}
/* @SUBTITLE "RemHeap: Remove top element and reheap" */
int rf_RemHeap(RF_Heap_t hp, RF_HeapData_t **data, RF_HeapKey_t *key)
{
int node;
/* we don't check hp's validity because TopHeap will do it for us */
/* get the top element into data and key, if any */
if (rf_TopHeap(hp, data, key)) {
/* there was something there, so replace top with last element */
node = hp->numheap--;
if (hp->numheap > 0)
rf_RepHeap(hp, hp->heap[node].data, hp->heap[node].key);
return(RF_HEAP_FOUND);
} else{
return(RF_HEAP_NONE);
}
}

127
sys/dev/raidframe/rf_heap.h Normal file
View File

@ -0,0 +1,127 @@
/* $NetBSD: rf_heap.h,v 1.1 1998/11/13 04:20:30 oster Exp $ */
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
*
* Author: Mark Holland
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/* @TITLE "heap.h - interface to heap management implementation */
/* We manage a heap of data,key pairs, where the key could be any
* simple data type
* and the data is any pointer data type. We allow the caller to add
* pairs, remote pairs, peek at the top pair, and do delete/add combinations.
* The latter are efficient because we only reheap once.
*
* David Kotz 1990? and 1993
*/
/* :
* Log: rf_heap.h,v
* Revision 1.8 1996/05/30 11:29:41 jimz
* Numerous bug fixes. Stripe lock release code disagreed with the taking code
* about when stripes should be locked (I made it consistent: no parity, no lock)
* There was a lot of extra serialization of I/Os which I've removed- a lot of
* it was to calculate values for the cache code, which is no longer with us.
* More types, function, macro cleanup. Added code to properly quiesce the array
* on shutdown. Made a lot of stuff array-specific which was (bogusly) general
* before. Fixed memory allocation, freeing bugs.
*
* Revision 1.7 1996/05/27 18:56:37 jimz
* more code cleanup
* better typing
* compiles in all 3 environments
*
* Revision 1.6 1996/05/23 21:46:35 jimz
* checkpoint in code cleanup (release prep)
* lots of types, function names have been fixed
*
* Revision 1.5 1995/12/01 19:04:07 root
* added copyright info
*
*/
#ifndef _RF__RF_HEAP_H_
#define _RF__RF_HEAP_H_
#include "rf_types.h"
#include "rf_raid.h"
#include "rf_dag.h"
#include "rf_desc.h"
#define RF_HEAP_MAX 10240
#define RF_HEAP_FOUND 1
#define RF_HEAP_NONE 0
typedef RF_TICS_t RF_HeapKey_t;
typedef struct RF_HeapData_s RF_HeapData_t;
typedef struct RF_Heap_s *RF_Heap_t;
typedef struct RF_HeapEntry_s RF_HeapEntry_t;
/* heap data */
struct RF_HeapData_s {
RF_TICS_t eventTime;
int disk;
int (*CompleteFunc)(); /* function to be called upon completion */
void *argument; /* argument to be passed to CompleteFunc */
int owner; /* which task is resposable for this request */
int row;
int col; /* coordinates of disk */
RF_Raid_t *raidPtr;
void *diskid;
/* Dag event */
RF_RaidAccessDesc_t *desc;
};
struct RF_HeapEntry_s {
RF_HeapData_t *data; /* the arbitrary data */
RF_HeapKey_t key; /* key for comparison */
};
struct RF_Heap_s {
RF_HeapEntry_t *heap; /* the heap in use (an array) */
int numheap; /* number of elements in heap */
int maxsize;
};
/* set up heap to hold maxsize nodes */
RF_Heap_t rf_InitHeap(int maxsize);
/* delete a heap data structure */
void rf_FreeHeap(RF_Heap_t hp);
/* add the element to the heap */
void rf_AddHeap(RF_Heap_t hp, RF_HeapData_t *data, RF_HeapKey_t key);
/* return top of the heap, without removing it from heap (FALSE if empty) */
int rf_TopHeap(RF_Heap_t hp, RF_HeapData_t **data, RF_HeapKey_t *key);
/* replace the heap's top item with a new item, and reheap */
void rf_RepHeap(RF_Heap_t hp, RF_HeapData_t *data, RF_HeapKey_t key);
/* remove the heap's top item, if any (FALSE if empty heap) */
int rf_RemHeap(RF_Heap_t hp, RF_HeapData_t **data, RF_HeapKey_t *key);
#endif /* !_RF__RF_HEAP_H_ */

View File

@ -0,0 +1,72 @@
/* $NetBSD: rf_hist.h,v 1.1 1998/11/13 04:20:30 oster Exp $ */
/*
* rf_hist.h
*
* Histgram operations for RAIDframe stats
*/
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
*
* Author: Jim Zelenka
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/* :
* Log: rf_hist.h,v
* Revision 1.3 1996/06/09 02:36:46 jimz
* lots of little crufty cleanup- fixup whitespace
* issues, comment #ifdefs, improve typing in some
* places (esp size-related)
*
* Revision 1.2 1996/05/31 22:26:54 jimz
* fix a lot of mapping problems, memory allocation problems
* found some weird lock issues, fixed 'em
* more code cleanup
*
* Revision 1.1 1996/05/31 10:33:05 jimz
* Initial revision
*
*/
#ifndef _RF__RF_HIST_H_
#define _RF__RF_HIST_H_
#include "rf_types.h"
#define RF_HIST_RESOLUTION 5
#define RF_HIST_MIN_VAL 0
#define RF_HIST_MAX_VAL 1000
#define RF_HIST_RANGE (RF_HIST_MAX_VAL - RF_HIST_MIN_VAL)
#define RF_HIST_NUM_BUCKETS (RF_HIST_RANGE / RF_HIST_RESOLUTION + 1)
typedef RF_uint32 RF_Hist_t;
#define RF_HIST_ADD(_hist_,_val_) { \
RF_Hist_t val; \
val = ((RF_Hist_t)(_val_)) / 1000; \
if (val >= RF_HIST_MAX_VAL) \
_hist_[RF_HIST_NUM_BUCKETS-1]++; \
else \
_hist_[(val - RF_HIST_MIN_VAL) / RF_HIST_RESOLUTION]++; \
}
#endif /* !_RF__RF_HIST_H_ */

View File

@ -0,0 +1,360 @@
/* $NetBSD: rf_interdecluster.c,v 1.1 1998/11/13 04:20:30 oster Exp $ */
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
*
* Author: Khalil Amiri
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/************************************************************
*
* rf_interdecluster.c -- implements interleaved declustering
*
************************************************************/
/* :
* Log: rf_interdecluster.c,v
* Revision 1.24 1996/08/02 13:20:38 jimz
* get rid of bogus (long) casts
*
* Revision 1.23 1996/07/31 16:56:18 jimz
* dataBytesPerStripe, sectorsPerDisk init arch-indep.
*
* Revision 1.22 1996/07/29 14:05:12 jimz
* fix numPUs/numRUs confusion (everything is now numRUs)
* clean up some commenting, return values
*
* Revision 1.21 1996/07/22 19:52:16 jimz
* switched node params to RF_DagParam_t, a union of
* a 64-bit int and a void *, for better portability
* attempted hpux port, but failed partway through for
* lack of a single C compiler capable of compiling all
* source files
*
* Revision 1.20 1996/07/18 22:57:14 jimz
* port simulator to AIX
*
* Revision 1.19 1996/07/13 00:00:59 jimz
* sanitized generalized reconstruction architecture
* cleaned up head sep, rbuf problems
*
* Revision 1.18 1996/06/19 17:53:48 jimz
* move GetNumSparePUs, InstallSpareTable ops into layout switch
*
* Revision 1.17 1996/06/11 15:17:55 wvcii
* added include of rf_interdecluster.h
* fixed parameter list of rf_ConfigureInterDecluster
* fixed return type of rf_GetNumSparePUsInterDecluster
* removed include of rf_raid1.h
*
* Revision 1.16 1996/06/11 08:55:15 jimz
* improved error-checking at configuration time
*
* Revision 1.15 1996/06/10 11:55:47 jimz
* Straightened out some per-array/not-per-array distinctions, fixed
* a couple bugs related to confusion. Added shutdown lists. Removed
* layout shutdown function (now subsumed by shutdown lists).
*
* Revision 1.14 1996/06/07 22:26:27 jimz
* type-ify which_ru (RF_ReconUnitNum_t)
*
* Revision 1.13 1996/06/07 21:33:04 jimz
* begin using consistent types for sector numbers,
* stripe numbers, row+col numbers, recon unit numbers
*
* Revision 1.12 1996/06/06 18:41:48 jimz
* add interleaved declustering dag selection
*
* Revision 1.11 1996/06/02 17:31:48 jimz
* Moved a lot of global stuff into array structure, where it belongs.
* Fixed up paritylogging, pss modules in this manner. Some general
* code cleanup. Removed lots of dead code, some dead files.
*
* Revision 1.10 1996/05/31 22:26:54 jimz
* fix a lot of mapping problems, memory allocation problems
* found some weird lock issues, fixed 'em
* more code cleanup
*
* Revision 1.9 1996/05/31 05:03:01 amiri
* fixed a bug related to sparing layout.
*
* Revision 1.8 1996/05/27 18:56:37 jimz
* more code cleanup
* better typing
* compiles in all 3 environments
*
* Revision 1.7 1996/05/24 01:59:45 jimz
* another checkpoint in code cleanup for release
* time to sync kernel tree
*
* Revision 1.6 1996/05/23 00:33:23 jimz
* code cleanup: move all debug decls to rf_options.c, all extern
* debug decls to rf_options.h, all debug vars preceded by rf_
*
* Revision 1.5 1996/05/18 19:51:34 jimz
* major code cleanup- fix syntax, make some types consistent,
* add prototypes, clean out dead code, et cetera
*
* Revision 1.4 1996/05/03 19:50:38 wvcii
* removed include of rf_redstripe.h
* fixed change log parameters in header
*
*/
#include "rf_types.h"
#include "rf_raid.h"
#include "rf_interdecluster.h"
#include "rf_dag.h"
#include "rf_dagutils.h"
#include "rf_dagfuncs.h"
#include "rf_threadid.h"
#include "rf_general.h"
#include "rf_utils.h"
#include "rf_dagffrd.h"
#include "rf_dagdegrd.h"
#include "rf_dagffwr.h"
#include "rf_dagdegwr.h"
typedef struct RF_InterdeclusterConfigInfo_s {
RF_RowCol_t **stripeIdentifier; /* filled in at config time
* and used by IdentifyStripe */
RF_StripeCount_t numSparingRegions;
RF_StripeCount_t stripeUnitsPerSparingRegion;
RF_SectorNum_t mirrorStripeOffset;
} RF_InterdeclusterConfigInfo_t;
int rf_ConfigureInterDecluster(
RF_ShutdownList_t **listp,
RF_Raid_t *raidPtr,
RF_Config_t *cfgPtr)
{
RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
RF_StripeCount_t num_used_stripeUnitsPerDisk;
RF_InterdeclusterConfigInfo_t *info;
RF_RowCol_t i, tmp, SUs_per_region;
/* create an Interleaved Declustering configuration structure */
RF_MallocAndAdd(info, sizeof(RF_InterdeclusterConfigInfo_t), (RF_InterdeclusterConfigInfo_t *),
raidPtr->cleanupList);
if (info == NULL)
return(ENOMEM);
layoutPtr->layoutSpecificInfo = (void *) info;
/* fill in the config structure. */
SUs_per_region = raidPtr->numCol * (raidPtr->numCol - 1);
info->stripeIdentifier = rf_make_2d_array(SUs_per_region, 2 , raidPtr->cleanupList);
if (info->stripeIdentifier == NULL)
return(ENOMEM);
for (i=0; i< SUs_per_region; i++) {
info->stripeIdentifier[i][0] = i / (raidPtr->numCol-1);
tmp = i / raidPtr->numCol;
info->stripeIdentifier[i][1] = (i+1+tmp) % raidPtr->numCol;
}
/* no spare tables */
RF_ASSERT(raidPtr->numRow == 1);
/* fill in the remaining layout parameters */
/* total number of stripes should a multiple of 2*numCol: Each sparing region consists of
2*numCol stripes: n-1 primary copy, n-1 secondary copy and 2 for spare .. */
num_used_stripeUnitsPerDisk = layoutPtr->stripeUnitsPerDisk - (layoutPtr->stripeUnitsPerDisk %
(2*raidPtr->numCol) );
info->numSparingRegions = num_used_stripeUnitsPerDisk / (2*raidPtr->numCol);
/* this is in fact the number of stripe units (that are primary data copies) in the sparing region */
info->stripeUnitsPerSparingRegion = raidPtr->numCol * (raidPtr->numCol - 1);
info->mirrorStripeOffset = info->numSparingRegions * (raidPtr->numCol+1);
layoutPtr->numStripe = info->numSparingRegions * info->stripeUnitsPerSparingRegion;
layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << raidPtr->logBytesPerSector;
layoutPtr->numDataCol = 1;
layoutPtr->dataSectorsPerStripe = layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit;
layoutPtr->numParityCol = 1;
layoutPtr->dataStripeUnitsPerDisk = num_used_stripeUnitsPerDisk;
raidPtr->sectorsPerDisk =
num_used_stripeUnitsPerDisk * layoutPtr->sectorsPerStripeUnit;
raidPtr->totalSectors =
(layoutPtr->numStripe) * layoutPtr->sectorsPerStripeUnit;
layoutPtr->stripeUnitsPerDisk = raidPtr->sectorsPerDisk / layoutPtr->sectorsPerStripeUnit;
return(0);
}
int rf_GetDefaultNumFloatingReconBuffersInterDecluster(RF_Raid_t *raidPtr)
{
return(30);
}
RF_HeadSepLimit_t rf_GetDefaultHeadSepLimitInterDecluster(RF_Raid_t *raidPtr)
{
return(raidPtr->sectorsPerDisk);
}
RF_ReconUnitCount_t rf_GetNumSpareRUsInterDecluster(
RF_Raid_t *raidPtr)
{
RF_InterdeclusterConfigInfo_t *info = (RF_InterdeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
return ( 2 * ((RF_ReconUnitCount_t) info->numSparingRegions) );
/* the layout uses two stripe units per disk as spare within each sparing region */
}
/* Maps to the primary copy of the data, i.e. the first mirror pair */
void rf_MapSectorInterDecluster(
RF_Raid_t *raidPtr,
RF_RaidAddr_t raidSector,
RF_RowCol_t *row,
RF_RowCol_t *col,
RF_SectorNum_t *diskSector,
int remap)
{
RF_InterdeclusterConfigInfo_t *info = (RF_InterdeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit;
RF_StripeNum_t su_offset_into_disk, mirror_su_offset_into_disk;
RF_StripeNum_t sparing_region_id, index_within_region;
int col_before_remap;
*row = 0;
sparing_region_id = SUID / info->stripeUnitsPerSparingRegion;
index_within_region = SUID % info->stripeUnitsPerSparingRegion;
su_offset_into_disk = index_within_region % (raidPtr->numCol-1);
mirror_su_offset_into_disk = index_within_region / raidPtr->numCol;
col_before_remap = index_within_region / (raidPtr->numCol-1);
if (!remap) {
*col = col_before_remap;;
*diskSector = ( su_offset_into_disk + ( (raidPtr->numCol-1) * sparing_region_id) ) *
raidPtr->Layout.sectorsPerStripeUnit;
*diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit);
}
else {
/* remap sector to spare space...*/
*diskSector = sparing_region_id * (raidPtr->numCol+1) * raidPtr->Layout.sectorsPerStripeUnit;
*diskSector += (raidPtr->numCol-1) * raidPtr->Layout.sectorsPerStripeUnit;
*diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit);
*col = (index_within_region + 1 + mirror_su_offset_into_disk) % raidPtr->numCol;
*col = (*col + 1) % raidPtr->numCol;
if (*col == col_before_remap) *col = (*col + 1) % raidPtr->numCol;
}
}
/* Maps to the second copy of the mirror pair. */
void rf_MapParityInterDecluster(
RF_Raid_t *raidPtr,
RF_RaidAddr_t raidSector,
RF_RowCol_t *row,
RF_RowCol_t *col,
RF_SectorNum_t *diskSector,
int remap)
{
RF_InterdeclusterConfigInfo_t *info = (RF_InterdeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
RF_StripeNum_t sparing_region_id, index_within_region, mirror_su_offset_into_disk;
RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit;
int col_before_remap;
sparing_region_id = SUID / info->stripeUnitsPerSparingRegion;
index_within_region = SUID % info->stripeUnitsPerSparingRegion;
mirror_su_offset_into_disk = index_within_region / raidPtr->numCol;
col_before_remap = (index_within_region + 1 + mirror_su_offset_into_disk) % raidPtr->numCol;
*row = 0;
if (!remap) {
*col = col_before_remap;
*diskSector = info->mirrorStripeOffset * raidPtr->Layout.sectorsPerStripeUnit;
*diskSector += sparing_region_id * (raidPtr->numCol-1) * raidPtr->Layout.sectorsPerStripeUnit;
*diskSector += mirror_su_offset_into_disk * raidPtr->Layout.sectorsPerStripeUnit;
*diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit);
}
else {
/* remap parity to spare space ... */
*diskSector = sparing_region_id * (raidPtr->numCol+1) * raidPtr->Layout.sectorsPerStripeUnit;
*diskSector += (raidPtr->numCol) * raidPtr->Layout.sectorsPerStripeUnit;
*diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit);
*col = index_within_region / (raidPtr->numCol-1);
*col = (*col + 1) % raidPtr->numCol;
if (*col == col_before_remap) *col = (*col + 1) % raidPtr->numCol;
}
}
void rf_IdentifyStripeInterDecluster(
RF_Raid_t *raidPtr,
RF_RaidAddr_t addr,
RF_RowCol_t **diskids,
RF_RowCol_t *outRow)
{
RF_InterdeclusterConfigInfo_t *info = (RF_InterdeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
RF_StripeNum_t SUID;
SUID = addr / raidPtr->Layout.sectorsPerStripeUnit;
SUID = SUID % info->stripeUnitsPerSparingRegion;
*outRow = 0;
*diskids = info->stripeIdentifier[ SUID ];
}
void rf_MapSIDToPSIDInterDecluster(
RF_RaidLayout_t *layoutPtr,
RF_StripeNum_t stripeID,
RF_StripeNum_t *psID,
RF_ReconUnitNum_t *which_ru)
{
*which_ru = 0;
*psID = stripeID;
}
/******************************************************************************
* select a graph to perform a single-stripe access
*
* Parameters: raidPtr - description of the physical array
* type - type of operation (read or write) requested
* asmap - logical & physical addresses for this access
* createFunc - name of function to use to create the graph
*****************************************************************************/
void rf_RAIDIDagSelect(
RF_Raid_t *raidPtr,
RF_IoType_t type,
RF_AccessStripeMap_t *asmap,
RF_VoidFuncPtr *createFunc)
{
RF_ASSERT(RF_IO_IS_R_OR_W(type));
if (asmap->numDataFailed + asmap->numParityFailed > 1) {
RF_ERRORMSG("Multiple disks failed in a single group! Aborting I/O operation.\n");
*createFunc = NULL;
return;
}
*createFunc = (type == RF_IO_TYPE_READ) ? (RF_VoidFuncPtr)rf_CreateFaultFreeReadDAG : (RF_VoidFuncPtr)rf_CreateRaidOneWriteDAG;
if (type == RF_IO_TYPE_READ) {
if (asmap->numDataFailed == 0)
*createFunc = (RF_VoidFuncPtr)rf_CreateMirrorPartitionReadDAG;
else
*createFunc = (RF_VoidFuncPtr)rf_CreateRaidOneDegradedReadDAG;
}
else
*createFunc = (RF_VoidFuncPtr)rf_CreateRaidOneWriteDAG;
}

View File

@ -0,0 +1,111 @@
/* $NetBSD: rf_interdecluster.h,v 1.1 1998/11/13 04:20:30 oster Exp $ */
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
*
* Author: Khalil Amiri
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/* rf_interdecluster.h
* header file for Interleaved Declustering
*/
/*
* :
* Log: rf_interdecluster.h,v
* Revision 1.13 1996/07/29 14:05:12 jimz
* fix numPUs/numRUs confusion (everything is now numRUs)
* clean up some commenting, return values
*
* Revision 1.12 1996/07/22 19:52:16 jimz
* switched node params to RF_DagParam_t, a union of
* a 64-bit int and a void *, for better portability
* attempted hpux port, but failed partway through for
* lack of a single C compiler capable of compiling all
* source files
*
* Revision 1.11 1996/07/13 00:00:59 jimz
* sanitized generalized reconstruction architecture
* cleaned up head sep, rbuf problems
*
* Revision 1.10 1996/06/10 11:55:47 jimz
* Straightened out some per-array/not-per-array distinctions, fixed
* a couple bugs related to confusion. Added shutdown lists. Removed
* layout shutdown function (now subsumed by shutdown lists).
*
* Revision 1.9 1996/06/07 22:26:27 jimz
* type-ify which_ru (RF_ReconUnitNum_t)
*
* Revision 1.8 1996/06/07 21:33:04 jimz
* begin using consistent types for sector numbers,
* stripe numbers, row+col numbers, recon unit numbers
*
* Revision 1.7 1996/06/06 18:41:58 jimz
* add RAIDIDagSelect
*
* Revision 1.6 1996/05/31 22:26:54 jimz
* fix a lot of mapping problems, memory allocation problems
* found some weird lock issues, fixed 'em
* more code cleanup
*
* Revision 1.5 1996/05/27 18:56:37 jimz
* more code cleanup
* better typing
* compiles in all 3 environments
*
* Revision 1.4 1996/05/24 01:59:45 jimz
* another checkpoint in code cleanup for release
* time to sync kernel tree
*
* Revision 1.3 1996/05/18 19:51:34 jimz
* major code cleanup- fix syntax, make some types consistent,
* add prototypes, clean out dead code, et cetera
*
* Revision 1.2 1995/12/01 19:07:25 root
* added copyright info
*
* Revision 1.1 1995/11/28 21:38:27 amiri
* Initial revision
*/
#ifndef _RF__RF_INTERDECLUSTER_H_
#define _RF__RF_INTERDECLUSTER_H_
int rf_ConfigureInterDecluster(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
RF_Config_t *cfgPtr);
int rf_GetDefaultNumFloatingReconBuffersInterDecluster(RF_Raid_t *raidPtr);
RF_HeadSepLimit_t rf_GetDefaultHeadSepLimitInterDecluster(RF_Raid_t *raidPtr);
RF_ReconUnitCount_t rf_GetNumSpareRUsInterDecluster(RF_Raid_t *raidPtr);
void rf_MapSectorInterDecluster(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector,
RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap);
void rf_MapParityInterDecluster(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector,
RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap);
void rf_IdentifyStripeInterDecluster(RF_Raid_t *raidPtr, RF_RaidAddr_t addr,
RF_RowCol_t **diskids, RF_RowCol_t *outRow);
void rf_MapSIDToPSIDInterDecluster(RF_RaidLayout_t *layoutPtr,
RF_StripeNum_t stripeID, RF_StripeNum_t *psID,
RF_ReconUnitNum_t *which_ru);
void rf_RAIDIDagSelect(RF_Raid_t *raidPtr, RF_IoType_t type,
RF_AccessStripeMap_t *asmap, RF_VoidFuncPtr *createFunc);
#endif /* !_RF__RF_INTERDECLUSTER_H_ */

View File

@ -0,0 +1,54 @@
/* $NetBSD: rf_invertq.c,v 1.1 1998/11/13 04:20:30 oster Exp $ */
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
*
* Author: Daniel Stodolsky
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/* :
* Log: rf_invertq.c,v
* Revision 1.5 1996/07/29 16:36:36 jimz
* include rf_archs.h here, not rf_invertq.h, to avoid VPATH
* problems in OSF/1 kernel
*
* Revision 1.4 1995/11/30 15:57:27 wvcii
* added copyright info
*
*/
#ifdef _KERNEL
#define KERNEL
#endif
#include "rf_archs.h"
#include "rf_pqdeg.h"
#ifdef KERNEL
#ifndef __NetBSD__
#include <raidframe/du_data/rf_invertq.h>
#else
#include "rf_invertq.h" /* XXX this is a hack. */
#endif /* !__NetBSD__ */
#else /* KERNEL */
#include "rf_invertq.h"
#endif /* KERNEL */

View File

@ -0,0 +1,72 @@
/* $NetBSD: rf_invertq.h,v 1.1 1998/11/13 04:20:30 oster Exp $ */
/*
* rf_invertq.h
*/
/*
* This is normally a generated file. Not so for NetBSD.
*/
#ifndef _RF__RF_INVERTQ_H_
#define _RF__RF_INVERTQ_H_
#ifdef _KERNEL
#define KERNEL
#endif
/*
* rf_geniq.c must include rf_archs.h before including
* this file (to get VPATH magic right with the way we
* generate this file in kernel trees)
*/
/* #include "rf_archs.h" */
#if (RF_INCLUDE_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
#define RF_Q_COLS 32
RF_ua32_t rf_rn = {
1, 2, 4, 8, 16, 5, 10, 20, 13, 26, 17, 7, 14, 28, 29, 31, 27, 19, 3, 6, 12, 24, 21, 15, 30, 25, 23, 11, 22, 9, 18, 1, };
RF_ua32_t rf_qfor[32] = {
/* i = 0 */ { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, },
/* i = 1 */ { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 5, 7, 1, 3, 13, 15, 9, 11, 21, 23, 17, 19, 29, 31, 25, 27, },
/* i = 2 */ { 0, 4, 8, 12, 16, 20, 24, 28, 5, 1, 13, 9, 21, 17, 29, 25, 10, 14, 2, 6, 26, 30, 18, 22, 15, 11, 7, 3, 31, 27, 23, 19, },
/* i = 3 */ { 0, 8, 16, 24, 5, 13, 21, 29, 10, 2, 26, 18, 15, 7, 31, 23, 20, 28, 4, 12, 17, 25, 1, 9, 30, 22, 14, 6, 27, 19, 11, 3, },
/* i = 4 */ { 0, 16, 5, 21, 10, 26, 15, 31, 20, 4, 17, 1, 30, 14, 27, 11, 13, 29, 8, 24, 7, 23, 2, 18, 25, 9, 28, 12, 19, 3, 22, 6, },
/* i = 5 */ { 0, 5, 10, 15, 20, 17, 30, 27, 13, 8, 7, 2, 25, 28, 19, 22, 26, 31, 16, 21, 14, 11, 4, 1, 23, 18, 29, 24, 3, 6, 9, 12, },
/* i = 6 */ { 0, 10, 20, 30, 13, 7, 25, 19, 26, 16, 14, 4, 23, 29, 3, 9, 17, 27, 5, 15, 28, 22, 8, 2, 11, 1, 31, 21, 6, 12, 18, 24, },
/* i = 7 */ { 0, 20, 13, 25, 26, 14, 23, 3, 17, 5, 28, 8, 11, 31, 6, 18, 7, 19, 10, 30, 29, 9, 16, 4, 22, 2, 27, 15, 12, 24, 1, 21, },
/* i = 8 */ { 0, 13, 26, 23, 17, 28, 11, 6, 7, 10, 29, 16, 22, 27, 12, 1, 14, 3, 20, 25, 31, 18, 5, 8, 9, 4, 19, 30, 24, 21, 2, 15, },
/* i = 9 */ { 0, 26, 17, 11, 7, 29, 22, 12, 14, 20, 31, 5, 9, 19, 24, 2, 28, 6, 13, 23, 27, 1, 10, 16, 18, 8, 3, 25, 21, 15, 4, 30, },
/* i = 10 */ { 0, 17, 7, 22, 14, 31, 9, 24, 28, 13, 27, 10, 18, 3, 21, 4, 29, 12, 26, 11, 19, 2, 20, 5, 1, 16, 6, 23, 15, 30, 8, 25, },
/* i = 11 */ { 0, 7, 14, 9, 28, 27, 18, 21, 29, 26, 19, 20, 1, 6, 15, 8, 31, 24, 17, 22, 3, 4, 13, 10, 2, 5, 12, 11, 30, 25, 16, 23, },
/* i = 12 */ { 0, 14, 28, 18, 29, 19, 1, 15, 31, 17, 3, 13, 2, 12, 30, 16, 27, 21, 7, 9, 6, 8, 26, 20, 4, 10, 24, 22, 25, 23, 5, 11, },
/* i = 13 */ { 0, 28, 29, 1, 31, 3, 2, 30, 27, 7, 6, 26, 4, 24, 25, 5, 19, 15, 14, 18, 12, 16, 17, 13, 8, 20, 21, 9, 23, 11, 10, 22, },
/* i = 14 */ { 0, 29, 31, 2, 27, 6, 4, 25, 19, 14, 12, 17, 8, 21, 23, 10, 3, 30, 28, 1, 24, 5, 7, 26, 16, 13, 15, 18, 11, 22, 20, 9, },
/* i = 15 */ { 0, 31, 27, 4, 19, 12, 8, 23, 3, 28, 24, 7, 16, 15, 11, 20, 6, 25, 29, 2, 21, 10, 14, 17, 5, 26, 30, 1, 22, 9, 13, 18, },
/* i = 16 */ { 0, 27, 19, 8, 3, 24, 16, 11, 6, 29, 21, 14, 5, 30, 22, 13, 12, 23, 31, 4, 15, 20, 28, 7, 10, 17, 25, 2, 9, 18, 26, 1, },
/* i = 17 */ { 0, 19, 3, 16, 6, 21, 5, 22, 12, 31, 15, 28, 10, 25, 9, 26, 24, 11, 27, 8, 30, 13, 29, 14, 20, 7, 23, 4, 18, 1, 17, 2, },
/* i = 18 */ { 0, 3, 6, 5, 12, 15, 10, 9, 24, 27, 30, 29, 20, 23, 18, 17, 21, 22, 19, 16, 25, 26, 31, 28, 13, 14, 11, 8, 1, 2, 7, 4, },
/* i = 19 */ { 0, 6, 12, 10, 24, 30, 20, 18, 21, 19, 25, 31, 13, 11, 1, 7, 15, 9, 3, 5, 23, 17, 27, 29, 26, 28, 22, 16, 2, 4, 14, 8, },
/* i = 20 */ { 0, 12, 24, 20, 21, 25, 13, 1, 15, 3, 23, 27, 26, 22, 2, 14, 30, 18, 6, 10, 11, 7, 19, 31, 17, 29, 9, 5, 4, 8, 28, 16, },
/* i = 21 */ { 0, 24, 21, 13, 15, 23, 26, 2, 30, 6, 11, 19, 17, 9, 4, 28, 25, 1, 12, 20, 22, 14, 3, 27, 7, 31, 18, 10, 8, 16, 29, 5, },
/* i = 22 */ { 0, 21, 15, 26, 30, 11, 17, 4, 25, 12, 22, 3, 7, 18, 8, 29, 23, 2, 24, 13, 9, 28, 6, 19, 14, 27, 1, 20, 16, 5, 31, 10, },
/* i = 23 */ { 0, 15, 30, 17, 25, 22, 7, 8, 23, 24, 9, 6, 14, 1, 16, 31, 11, 4, 21, 26, 18, 29, 12, 3, 28, 19, 2, 13, 5, 10, 27, 20, },
/* i = 24 */ { 0, 30, 25, 7, 23, 9, 14, 16, 11, 21, 18, 12, 28, 2, 5, 27, 22, 8, 15, 17, 1, 31, 24, 6, 29, 3, 4, 26, 10, 20, 19, 13, },
/* i = 25 */ { 0, 25, 23, 14, 11, 18, 28, 5, 22, 15, 1, 24, 29, 4, 10, 19, 9, 16, 30, 7, 2, 27, 21, 12, 31, 6, 8, 17, 20, 13, 3, 26, },
/* i = 26 */ { 0, 23, 11, 28, 22, 1, 29, 10, 9, 30, 2, 21, 31, 8, 20, 3, 18, 5, 25, 14, 4, 19, 15, 24, 27, 12, 16, 7, 13, 26, 6, 17, },
/* i = 27 */ { 0, 11, 22, 29, 9, 2, 31, 20, 18, 25, 4, 15, 27, 16, 13, 6, 1, 10, 23, 28, 8, 3, 30, 21, 19, 24, 5, 14, 26, 17, 12, 7, },
/* i = 28 */ { 0, 22, 9, 31, 18, 4, 27, 13, 1, 23, 8, 30, 19, 5, 26, 12, 2, 20, 11, 29, 16, 6, 25, 15, 3, 21, 10, 28, 17, 7, 24, 14, },
/* i = 29 */ { 0, 9, 18, 27, 1, 8, 19, 26, 2, 11, 16, 25, 3, 10, 17, 24, 4, 13, 22, 31, 5, 12, 23, 30, 6, 15, 20, 29, 7, 14, 21, 28, },
/* i = 30 */ { 0, 18, 1, 19, 2, 16, 3, 17, 4, 22, 5, 23, 6, 20, 7, 21, 8, 26, 9, 27, 10, 24, 11, 25, 12, 30, 13, 31, 14, 28, 15, 29, },
/* i = 31 */ { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, },
};
#define RF_Q_DATA_COL(col_num) rf_rn[col_num],rf_qfor[28-(col_num)]
#ifdef KERNEL
RF_ua1024_t rf_qinv[1]; /* don't compile monster table into kernel */
#elif defined(NO_PQ)
RF_ua1024_t rf_qinv[29*29];
#else /* !KERNEL && NO_PQ */
#endif /* !KERNEL && NO_PQ */
#endif /* (RF_INCLUDE_PQ > 0) || (RF_INCLUDE_RAID6 > 0) */
#endif /* !_RF__RF_INVERTQ_H_ */

View File

@ -0,0 +1,70 @@
/* $NetBSD: rf_kintf.h,v 1.1 1998/11/13 04:20:30 oster Exp $ */
/*
* rf_kintf.h
*
* RAIDframe exported kernel interface
*/
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
*
* Author: Jim Zelenka
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/*
* :
* Log: rf_kintf.h,v
* Revision 1.2 1996/06/03 23:28:26 jimz
* more bugfixes
* check in tree to sync for IPDS runs with current bugfixes
* there still may be a problem with threads in the script test
* getting I/Os stuck- not trivially reproducible (runs ~50 times
* in a row without getting stuck)
*
* Revision 1.1 1996/05/31 18:59:14 jimz
* Initial revision
*
*/
#ifndef _RF__RF_KINTF_H_
#define _RF__RF_KINTF_H_
#include "rf_types.h"
int rf_boot(void);
int rf_open(dev_t dev, int flag, int fmt);
int rf_close(dev_t dev, int flag, int fmt);
void rf_strategy(struct buf *bp);
void rf_minphys(struct buf *bp);
int rf_read(dev_t dev, struct uio *uio);
int rf_write(dev_t dev, struct uio *uio);
int rf_size(dev_t dev);
int rf_ioctl(dev_t dev, int cmd, caddr_t data, int flag);
void rf_ReconKernelThread(void);
int rf_GetSpareTableFromDaemon(RF_SparetWait_t *req);
caddr_t rf_MapToKernelSpace(struct buf *bp, caddr_t addr);
int rf_BzeroWithRemap(struct buf *bp, char *databuf, int len);
int rf_DoAccessKernel(RF_Raid_t *raidPtr, struct buf *bp,
RF_RaidAccessFlags_t flags, void (*cbFunc)(struct buf *), void *cbArg);
int rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req);
#endif /* _RF__RF_KINTF_H_ */

View File

@ -0,0 +1,719 @@
/* $NetBSD: rf_layout.c,v 1.1 1998/11/13 04:20:30 oster Exp $ */
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
*
* Author: Mark Holland
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/* rf_layout.c -- driver code dealing with layout and mapping issues
*/
/*
* :
* Log: rf_layout.c,v
* Revision 1.71 1996/08/20 22:41:30 jimz
* add declustered evenodd
*
* Revision 1.70 1996/07/31 16:56:18 jimz
* dataBytesPerStripe, sectorsPerDisk init arch-indep.
*
* Revision 1.69 1996/07/31 15:34:46 jimz
* add EvenOdd
*
* Revision 1.68 1996/07/29 14:05:12 jimz
* fix numPUs/numRUs confusion (everything is now numRUs)
* clean up some commenting, return values
*
* Revision 1.67 1996/07/27 23:36:08 jimz
* Solaris port of simulator
*
* Revision 1.66 1996/07/27 18:40:24 jimz
* cleanup sweep
*
* Revision 1.65 1996/07/18 22:57:14 jimz
* port simulator to AIX
*
* Revision 1.64 1996/07/15 17:22:18 jimz
* nit-pick code cleanup
* resolve stdlib problems on DEC OSF
*
* Revision 1.63 1996/07/13 00:00:59 jimz
* sanitized generalized reconstruction architecture
* cleaned up head sep, rbuf problems
*
* Revision 1.62 1996/07/11 19:08:00 jimz
* generalize reconstruction mechanism
* allow raid1 reconstructs via copyback (done with array
* quiesced, not online, therefore not disk-directed)
*
* Revision 1.61 1996/06/19 22:23:01 jimz
* parity verification is now a layout-configurable thing
* not all layouts currently support it (correctly, anyway)
*
* Revision 1.60 1996/06/19 17:53:48 jimz
* move GetNumSparePUs, InstallSpareTable ops into layout switch
*
* Revision 1.59 1996/06/19 14:57:58 jimz
* move layout-specific config parsing hooks into RF_LayoutSW_t
* table in rf_layout.c
*
* Revision 1.58 1996/06/10 11:55:47 jimz
* Straightened out some per-array/not-per-array distinctions, fixed
* a couple bugs related to confusion. Added shutdown lists. Removed
* layout shutdown function (now subsumed by shutdown lists).
*
* Revision 1.57 1996/06/07 22:26:27 jimz
* type-ify which_ru (RF_ReconUnitNum_t)
*
* Revision 1.56 1996/06/07 21:33:04 jimz
* begin using consistent types for sector numbers,
* stripe numbers, row+col numbers, recon unit numbers
*
* Revision 1.55 1996/06/06 18:41:35 jimz
* change interleaved declustering dag selection to an
* interleaved-declustering-specific routine (so we can
* use the partitioned mirror node)
*
* Revision 1.54 1996/06/05 18:06:02 jimz
* Major code cleanup. The Great Renaming is now done.
* Better modularity. Better typing. Fixed a bunch of
* synchronization bugs. Made a lot of global stuff
* per-desc or per-array. Removed dead code.
*
* Revision 1.53 1996/06/03 23:28:26 jimz
* more bugfixes
* check in tree to sync for IPDS runs with current bugfixes
* there still may be a problem with threads in the script test
* getting I/Os stuck- not trivially reproducible (runs ~50 times
* in a row without getting stuck)
*
* Revision 1.52 1996/06/02 17:31:48 jimz
* Moved a lot of global stuff into array structure, where it belongs.
* Fixed up paritylogging, pss modules in this manner. Some general
* code cleanup. Removed lots of dead code, some dead files.
*
* Revision 1.51 1996/05/31 22:26:54 jimz
* fix a lot of mapping problems, memory allocation problems
* found some weird lock issues, fixed 'em
* more code cleanup
*
* Revision 1.50 1996/05/30 23:22:16 jimz
* bugfixes of serialization, timing problems
* more cleanup
*
* Revision 1.49 1996/05/30 11:29:41 jimz
* Numerous bug fixes. Stripe lock release code disagreed with the taking code
* about when stripes should be locked (I made it consistent: no parity, no lock)
* There was a lot of extra serialization of I/Os which I've removed- a lot of
* it was to calculate values for the cache code, which is no longer with us.
* More types, function, macro cleanup. Added code to properly quiesce the array
* on shutdown. Made a lot of stuff array-specific which was (bogusly) general
* before. Fixed memory allocation, freeing bugs.
*
* Revision 1.48 1996/05/27 18:56:37 jimz
* more code cleanup
* better typing
* compiles in all 3 environments
*
* Revision 1.47 1996/05/24 22:17:04 jimz
* continue code + namespace cleanup
* typed a bunch of flags
*
* Revision 1.46 1996/05/24 01:59:45 jimz
* another checkpoint in code cleanup for release
* time to sync kernel tree
*
* Revision 1.45 1996/05/23 21:46:35 jimz
* checkpoint in code cleanup (release prep)
* lots of types, function names have been fixed
*
* Revision 1.44 1996/05/18 19:51:34 jimz
* major code cleanup- fix syntax, make some types consistent,
* add prototypes, clean out dead code, et cetera
*
* Revision 1.43 1996/02/22 16:46:35 amiri
* modified chained declustering to use a seperate DAG selection routine
*
* Revision 1.42 1995/12/01 19:16:11 root
* added copyright info
*
* Revision 1.41 1995/11/28 21:31:02 amiri
* added Interleaved Declustering to switch table
*
* Revision 1.40 1995/11/20 14:35:17 arw
* moved rf_StartThroughputStats in DefaultWrite and DefaultRead
*
* Revision 1.39 1995/11/19 16:28:46 wvcii
* replaced LaunchDAGState with CreateDAGState, ExecuteDAGState
*
* Revision 1.38 1995/11/17 19:00:41 wvcii
* added MapQ entries to switch table
*
* Revision 1.37 1995/11/17 16:58:13 amiri
* Added the Chained Declustering architecture ('C'),
* essentially a variant of mirroring.
*
* Revision 1.36 1995/11/16 16:16:10 amiri
* Added RAID5 with rotated sparing ('R' configuration)
*
* Revision 1.35 1995/11/07 15:41:17 wvcii
* modified state lists: DefaultStates, VSReadStates
* necessary to support new states (LaunchDAGState, ProcessDAGState)
*
* Revision 1.34 1995/10/18 01:23:20 amiri
* added ifndef SIMULATE wrapper around rf_StartThroughputStats()
*
* Revision 1.33 1995/10/13 15:05:46 arw
* added rf_StartThroughputStats to DefaultRead and DefaultWrite
*
* Revision 1.32 1995/10/12 16:04:23 jimz
* added config names to mapsw entires
*
* Revision 1.31 1995/10/04 03:57:48 wvcii
* added raid level 1 to mapsw
*
* Revision 1.30 1995/09/07 01:26:55 jimz
* Achive basic compilation in kernel. Kernel functionality
* is not guaranteed at all, but it'll compile. Mostly. I hope.
*
* Revision 1.29 1995/07/28 21:43:42 robby
* checkin after leaving for Rice. Bye
*
* Revision 1.28 1995/07/26 03:26:14 robby
* *** empty log message ***
*
* Revision 1.27 1995/07/21 19:47:52 rachad
* Added raid 0 /5 with caching architectures
*
* Revision 1.26 1995/07/21 19:29:27 robby
* added virtual striping states
*
* Revision 1.25 1995/07/10 21:41:47 robby
* switched to have my own virtual stripng write function from the cache
*
* Revision 1.24 1995/07/10 20:51:59 robby
* added virtual striping states
*
* Revision 1.23 1995/07/10 16:57:42 robby
* updated alloclistelem struct to the correct struct name
*
* Revision 1.22 1995/07/08 20:06:11 rachad
* *** empty log message ***
*
* Revision 1.21 1995/07/08 19:43:16 cfb
* *** empty log message ***
*
* Revision 1.20 1995/07/08 18:05:39 rachad
* Linked up Claudsons code with the real cache
*
* Revision 1.19 1995/07/06 14:29:36 robby
* added defaults states list to the layout switch
*
* Revision 1.18 1995/06/23 13:40:34 robby
* updeated to prototypes in rf_layout.h
*
*/
#include "rf_types.h"
#include "rf_archs.h"
#include "rf_raid.h"
#include "rf_configure.h"
#include "rf_dag.h"
#include "rf_desc.h"
#include "rf_decluster.h"
#include "rf_pq.h"
#include "rf_declusterPQ.h"
#include "rf_raid0.h"
#include "rf_raid1.h"
#include "rf_raid4.h"
#include "rf_raid5.h"
#include "rf_states.h"
#if RF_INCLUDE_RAID5_RS > 0
#include "rf_raid5_rotatedspare.h"
#endif /* RF_INCLUDE_RAID5_RS > 0 */
#if RF_INCLUDE_CHAINDECLUSTER > 0
#include "rf_chaindecluster.h"
#endif /* RF_INCLUDE_CHAINDECLUSTER > 0 */
#if RF_INCLUDE_INTERDECLUSTER > 0
#include "rf_interdecluster.h"
#endif /* RF_INCLUDE_INTERDECLUSTER > 0 */
#if RF_INCLUDE_PARITYLOGGING > 0
#include "rf_paritylogging.h"
#endif /* RF_INCLUDE_PARITYLOGGING > 0 */
#if RF_INCLUDE_EVENODD > 0
#include "rf_evenodd.h"
#endif /* RF_INCLUDE_EVENODD > 0 */
#include "rf_general.h"
#include "rf_driver.h"
#include "rf_parityscan.h"
#include "rf_reconbuffer.h"
#include "rf_reconutil.h"
/***********************************************************************
*
* the layout switch defines all the layouts that are supported.
* fields are: layout ID, init routine, shutdown routine, map
* sector, map parity, identify stripe, dag selection, map stripeid
* to parity stripe id (optional), num faults tolerated, special
* flags.
*
***********************************************************************/
static RF_AccessState_t DefaultStates[] = {rf_QuiesceState,
rf_IncrAccessesCountState, rf_MapState, rf_LockState, rf_CreateDAGState,
rf_ExecuteDAGState, rf_ProcessDAGState, rf_DecrAccessesCountState,
rf_CleanupState, rf_LastState};
#if defined(__NetBSD__) && !defined(_KERNEL)
/* XXX Gross hack to shutup gcc -- it complains that DefaultStates is not
used when compiling this in userland.. I hate to burst it's bubble, but
DefaultStates is used all over the place here in the initialization of
lots of data structures. GO */
RF_AccessState_t *NothingAtAll = DefaultStates;
#endif
#if defined(__NetBSD__) && defined(_KERNEL)
/* XXX Remove static so GCC doesn't complain about these being unused! */
int distSpareYes = 1;
int distSpareNo = 0;
#else
static int distSpareYes = 1;
static int distSpareNo = 0;
#endif
#ifdef KERNEL
#define RF_NK2(a,b)
#else /* KERNEL */
#define RF_NK2(a,b) a,b,
#endif /* KERNEL */
#if RF_UTILITY > 0
#define RF_NU(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p)
#else /* RF_UTILITY > 0 */
#define RF_NU(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p
#endif /* RF_UTILITY > 0 */
static RF_LayoutSW_t mapsw[] = {
/* parity declustering */
{'T', "Parity declustering",
RF_NK2(rf_MakeLayoutSpecificDeclustered, &distSpareNo)
RF_NU(
rf_ConfigureDeclustered,
rf_MapSectorDeclustered, rf_MapParityDeclustered, NULL,
rf_IdentifyStripeDeclustered,
rf_RaidFiveDagSelect,
rf_MapSIDToPSIDDeclustered,
rf_GetDefaultHeadSepLimitDeclustered,
rf_GetDefaultNumFloatingReconBuffersDeclustered,
NULL, NULL,
rf_SubmitReconBufferBasic,
rf_VerifyParityBasic,
1,
DefaultStates,
0)
},
/* parity declustering with distributed sparing */
{'D', "Distributed sparing parity declustering",
RF_NK2(rf_MakeLayoutSpecificDeclustered, &distSpareYes)
RF_NU(
rf_ConfigureDeclusteredDS,
rf_MapSectorDeclustered, rf_MapParityDeclustered, NULL,
rf_IdentifyStripeDeclustered,
rf_RaidFiveDagSelect,
rf_MapSIDToPSIDDeclustered,
rf_GetDefaultHeadSepLimitDeclustered,
rf_GetDefaultNumFloatingReconBuffersDeclustered,
rf_GetNumSpareRUsDeclustered, rf_InstallSpareTable,
rf_SubmitReconBufferBasic,
rf_VerifyParityBasic,
1,
DefaultStates,
RF_DISTRIBUTE_SPARE|RF_BD_DECLUSTERED)
},
#if RF_INCLUDE_DECL_PQ > 0
/* declustered P+Q */
{'Q', "Declustered P+Q",
RF_NK2(rf_MakeLayoutSpecificDeclustered, &distSpareNo)
RF_NU(
rf_ConfigureDeclusteredPQ,
rf_MapSectorDeclusteredPQ, rf_MapParityDeclusteredPQ, rf_MapQDeclusteredPQ,
rf_IdentifyStripeDeclusteredPQ,
rf_PQDagSelect,
rf_MapSIDToPSIDDeclustered,
rf_GetDefaultHeadSepLimitDeclustered,
rf_GetDefaultNumFloatingReconBuffersPQ,
NULL, NULL,
NULL,
rf_VerifyParityBasic,
2,
DefaultStates,
0)
},
#endif /* RF_INCLUDE_DECL_PQ > 0 */
#if RF_INCLUDE_RAID5_RS > 0
/* RAID 5 with rotated sparing */
{'R', "RAID Level 5 rotated sparing",
RF_NK2(rf_MakeLayoutSpecificNULL, NULL)
RF_NU(
rf_ConfigureRAID5_RS,
rf_MapSectorRAID5_RS, rf_MapParityRAID5_RS, NULL,
rf_IdentifyStripeRAID5_RS,
rf_RaidFiveDagSelect,
rf_MapSIDToPSIDRAID5_RS,
rf_GetDefaultHeadSepLimitRAID5,
rf_GetDefaultNumFloatingReconBuffersRAID5,
rf_GetNumSpareRUsRAID5_RS, NULL,
rf_SubmitReconBufferBasic,
rf_VerifyParityBasic,
1,
DefaultStates,
RF_DISTRIBUTE_SPARE)
},
#endif /* RF_INCLUDE_RAID5_RS > 0 */
#if RF_INCLUDE_CHAINDECLUSTER > 0
/* Chained Declustering */
{'C', "Chained Declustering",
RF_NK2(rf_MakeLayoutSpecificNULL, NULL)
RF_NU(
rf_ConfigureChainDecluster,
rf_MapSectorChainDecluster, rf_MapParityChainDecluster, NULL,
rf_IdentifyStripeChainDecluster,
rf_RAIDCDagSelect,
rf_MapSIDToPSIDChainDecluster,
NULL,
NULL,
rf_GetNumSpareRUsChainDecluster, NULL,
rf_SubmitReconBufferBasic,
rf_VerifyParityBasic,
1,
DefaultStates,
0)
},
#endif /* RF_INCLUDE_CHAINDECLUSTER > 0 */
#if RF_INCLUDE_INTERDECLUSTER > 0
/* Interleaved Declustering */
{'I', "Interleaved Declustering",
RF_NK2(rf_MakeLayoutSpecificNULL, NULL)
RF_NU(
rf_ConfigureInterDecluster,
rf_MapSectorInterDecluster, rf_MapParityInterDecluster, NULL,
rf_IdentifyStripeInterDecluster,
rf_RAIDIDagSelect,
rf_MapSIDToPSIDInterDecluster,
rf_GetDefaultHeadSepLimitInterDecluster,
rf_GetDefaultNumFloatingReconBuffersInterDecluster,
rf_GetNumSpareRUsInterDecluster, NULL,
rf_SubmitReconBufferBasic,
rf_VerifyParityBasic,
1,
DefaultStates,
RF_DISTRIBUTE_SPARE)
},
#endif /* RF_INCLUDE_INTERDECLUSTER > 0 */
#if RF_INCLUDE_RAID0 > 0
/* RAID level 0 */
{'0', "RAID Level 0",
RF_NK2(rf_MakeLayoutSpecificNULL, NULL)
RF_NU(
rf_ConfigureRAID0,
rf_MapSectorRAID0, rf_MapParityRAID0, NULL,
rf_IdentifyStripeRAID0,
rf_RAID0DagSelect,
rf_MapSIDToPSIDRAID0,
NULL,
NULL,
NULL, NULL,
NULL,
rf_VerifyParityRAID0,
0,
DefaultStates,
0)
},
#endif /* RF_INCLUDE_RAID0 > 0 */
#if RF_INCLUDE_RAID1 > 0
/* RAID level 1 */
{'1', "RAID Level 1",
RF_NK2(rf_MakeLayoutSpecificNULL, NULL)
RF_NU(
rf_ConfigureRAID1,
rf_MapSectorRAID1, rf_MapParityRAID1, NULL,
rf_IdentifyStripeRAID1,
rf_RAID1DagSelect,
rf_MapSIDToPSIDRAID1,
NULL,
NULL,
NULL, NULL,
rf_SubmitReconBufferRAID1,
rf_VerifyParityRAID1,
1,
DefaultStates,
0)
},
#endif /* RF_INCLUDE_RAID1 > 0 */
#if RF_INCLUDE_RAID4 > 0
/* RAID level 4 */
{'4', "RAID Level 4",
RF_NK2(rf_MakeLayoutSpecificNULL, NULL)
RF_NU(
rf_ConfigureRAID4,
rf_MapSectorRAID4, rf_MapParityRAID4, NULL,
rf_IdentifyStripeRAID4,
rf_RaidFiveDagSelect,
rf_MapSIDToPSIDRAID4,
rf_GetDefaultHeadSepLimitRAID4,
rf_GetDefaultNumFloatingReconBuffersRAID4,
NULL, NULL,
rf_SubmitReconBufferBasic,
rf_VerifyParityBasic,
1,
DefaultStates,
0)
},
#endif /* RF_INCLUDE_RAID4 > 0 */
#if RF_INCLUDE_RAID5 > 0
/* RAID level 5 */
{'5', "RAID Level 5",
RF_NK2(rf_MakeLayoutSpecificNULL, NULL)
RF_NU(
rf_ConfigureRAID5,
rf_MapSectorRAID5, rf_MapParityRAID5, NULL,
rf_IdentifyStripeRAID5,
rf_RaidFiveDagSelect,
rf_MapSIDToPSIDRAID5,
rf_GetDefaultHeadSepLimitRAID5,
rf_GetDefaultNumFloatingReconBuffersRAID5,
NULL, NULL,
rf_SubmitReconBufferBasic,
rf_VerifyParityBasic,
1,
DefaultStates,
0)
},
#endif /* RF_INCLUDE_RAID5 > 0 */
#if RF_INCLUDE_EVENODD > 0
/* Evenodd */
{'E', "EvenOdd",
RF_NK2(rf_MakeLayoutSpecificNULL, NULL)
RF_NU(
rf_ConfigureEvenOdd,
rf_MapSectorRAID5, rf_MapParityEvenOdd, rf_MapEEvenOdd,
rf_IdentifyStripeEvenOdd,
rf_EODagSelect,
rf_MapSIDToPSIDRAID5,
NULL,
NULL,
NULL, NULL,
NULL, /* no reconstruction, yet */
rf_VerifyParityEvenOdd,
2,
DefaultStates,
0)
},
#endif /* RF_INCLUDE_EVENODD > 0 */
#if RF_INCLUDE_EVENODD > 0
/* Declustered Evenodd */
{'e', "Declustered EvenOdd",
RF_NK2(rf_MakeLayoutSpecificDeclustered, &distSpareNo)
RF_NU(
rf_ConfigureDeclusteredPQ,
rf_MapSectorDeclusteredPQ, rf_MapParityDeclusteredPQ, rf_MapQDeclusteredPQ,
rf_IdentifyStripeDeclusteredPQ,
rf_EODagSelect,
rf_MapSIDToPSIDRAID5,
rf_GetDefaultHeadSepLimitDeclustered,
rf_GetDefaultNumFloatingReconBuffersPQ,
NULL, NULL,
NULL, /* no reconstruction, yet */
rf_VerifyParityEvenOdd,
2,
DefaultStates,
0)
},
#endif /* RF_INCLUDE_EVENODD > 0 */
#if RF_INCLUDE_PARITYLOGGING > 0
/* parity logging */
{'L', "Parity logging",
RF_NK2(rf_MakeLayoutSpecificNULL, NULL)
RF_NU(
rf_ConfigureParityLogging,
rf_MapSectorParityLogging, rf_MapParityParityLogging, NULL,
rf_IdentifyStripeParityLogging,
rf_ParityLoggingDagSelect,
rf_MapSIDToPSIDParityLogging,
rf_GetDefaultHeadSepLimitParityLogging,
rf_GetDefaultNumFloatingReconBuffersParityLogging,
NULL, NULL,
rf_SubmitReconBufferBasic,
NULL,
1,
DefaultStates,
0)
},
#endif /* RF_INCLUDE_PARITYLOGGING > 0 */
/* end-of-list marker */
{ '\0', NULL,
RF_NK2(NULL, NULL)
RF_NU(
NULL,
NULL, NULL, NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL, NULL,
NULL,
NULL,
0,
NULL,
0)
}
};
RF_LayoutSW_t *rf_GetLayout(RF_ParityConfig_t parityConfig)
{
RF_LayoutSW_t *p;
/* look up the specific layout */
for (p=&mapsw[0]; p->parityConfig; p++)
if (p->parityConfig == parityConfig)
break;
if (!p->parityConfig)
return(NULL);
RF_ASSERT(p->parityConfig == parityConfig);
return(p);
}
#if RF_UTILITY == 0
/*****************************************************************************************
*
* ConfigureLayout --
*
* read the configuration file and set up the RAID layout parameters. After reading
* common params, invokes the layout-specific configuration routine to finish
* the configuration.
*
****************************************************************************************/
int rf_ConfigureLayout(
RF_ShutdownList_t **listp,
RF_Raid_t *raidPtr,
RF_Config_t *cfgPtr)
{
RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
RF_ParityConfig_t parityConfig;
RF_LayoutSW_t *p;
int retval;
layoutPtr->sectorsPerStripeUnit = cfgPtr->sectPerSU;
layoutPtr->SUsPerPU = cfgPtr->SUsPerPU;
layoutPtr->SUsPerRU = cfgPtr->SUsPerRU;
parityConfig = cfgPtr->parityConfig;
layoutPtr->stripeUnitsPerDisk = raidPtr->sectorsPerDisk / layoutPtr->sectorsPerStripeUnit;
p = rf_GetLayout(parityConfig);
if (p == NULL) {
RF_ERRORMSG1("Unknown parity configuration '%c'", parityConfig);
return(EINVAL);
}
RF_ASSERT(p->parityConfig == parityConfig);
layoutPtr->map = p;
/* initialize the specific layout */
retval = (p->Configure)(listp, raidPtr, cfgPtr);
if (retval)
return(retval);
layoutPtr->dataBytesPerStripe = layoutPtr->dataSectorsPerStripe << raidPtr->logBytesPerSector;
raidPtr->sectorsPerDisk = layoutPtr->stripeUnitsPerDisk * layoutPtr->sectorsPerStripeUnit;
if (rf_forceNumFloatingReconBufs >= 0) {
raidPtr->numFloatingReconBufs = rf_forceNumFloatingReconBufs;
}
else {
raidPtr->numFloatingReconBufs = rf_GetDefaultNumFloatingReconBuffers(raidPtr);
}
if (rf_forceHeadSepLimit >= 0) {
raidPtr->headSepLimit = rf_forceHeadSepLimit;
}
else {
raidPtr->headSepLimit = rf_GetDefaultHeadSepLimit(raidPtr);
}
printf("RAIDFRAME: Configure (%s): total number of sectors is %lu (%lu MB)\n",
layoutPtr->map->configName,
(unsigned long)raidPtr->totalSectors,
(unsigned long)(raidPtr->totalSectors / 1024 * (1<<raidPtr->logBytesPerSector) / 1024));
if (raidPtr->headSepLimit >= 0) {
printf("RAIDFRAME(%s): Using %ld floating recon bufs with head sep limit %ld\n",
layoutPtr->map->configName, (long)raidPtr->numFloatingReconBufs, (long)raidPtr->headSepLimit);
}
else {
printf("RAIDFRAME(%s): Using %ld floating recon bufs with no head sep limit\n",
layoutPtr->map->configName, (long)raidPtr->numFloatingReconBufs);
}
return(0);
}
/* typically there is a 1-1 mapping between stripes and parity stripes.
* however, the declustering code supports packing multiple stripes into
* a single parity stripe, so as to increase the size of the reconstruction
* unit without affecting the size of the stripe unit. This routine finds
* the parity stripe identifier associated with a stripe ID. There is also
* a RaidAddressToParityStripeID macro in layout.h
*/
RF_StripeNum_t rf_MapStripeIDToParityStripeID(layoutPtr, stripeID, which_ru)
RF_RaidLayout_t *layoutPtr;
RF_StripeNum_t stripeID;
RF_ReconUnitNum_t *which_ru;
{
RF_StripeNum_t parityStripeID;
/* quick exit in the common case of SUsPerPU==1 */
if ((layoutPtr->SUsPerPU == 1) || !layoutPtr->map->MapSIDToPSID) {
*which_ru = 0;
return(stripeID);
}
else {
(layoutPtr->map->MapSIDToPSID)(layoutPtr, stripeID, &parityStripeID, which_ru);
}
return(parityStripeID);
}
#endif /* RF_UTILITY == 0 */

View File

@ -0,0 +1,492 @@
/* $NetBSD: rf_layout.h,v 1.1 1998/11/13 04:20:30 oster Exp $ */
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
*
* Author: Mark Holland
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/* rf_layout.h -- header file defining layout data structures
*/
/*
* :
* Log: rf_layout.h,v
* Revision 1.50 1996/11/05 21:10:40 jimz
* failed pda generalization
*
* Revision 1.49 1996/07/29 14:05:12 jimz
* fix numPUs/numRUs confusion (everything is now numRUs)
* clean up some commenting, return values
*
* Revision 1.48 1996/07/22 19:52:16 jimz
* switched node params to RF_DagParam_t, a union of
* a 64-bit int and a void *, for better portability
* attempted hpux port, but failed partway through for
* lack of a single C compiler capable of compiling all
* source files
*
* Revision 1.47 1996/07/18 22:57:14 jimz
* port simulator to AIX
*
* Revision 1.46 1996/07/13 00:00:59 jimz
* sanitized generalized reconstruction architecture
* cleaned up head sep, rbuf problems
*
* Revision 1.45 1996/07/11 19:08:00 jimz
* generalize reconstruction mechanism
* allow raid1 reconstructs via copyback (done with array
* quiesced, not online, therefore not disk-directed)
*
* Revision 1.44 1996/06/19 22:23:01 jimz
* parity verification is now a layout-configurable thing
* not all layouts currently support it (correctly, anyway)
*
* Revision 1.43 1996/06/19 17:53:48 jimz
* move GetNumSparePUs, InstallSpareTable ops into layout switch
*
* Revision 1.42 1996/06/19 14:56:48 jimz
* move layout-specific config parsing hooks into RF_LayoutSW_t
* table in rf_layout.c
*
* Revision 1.41 1996/06/10 11:55:47 jimz
* Straightened out some per-array/not-per-array distinctions, fixed
* a couple bugs related to confusion. Added shutdown lists. Removed
* layout shutdown function (now subsumed by shutdown lists).
*
* Revision 1.40 1996/06/07 22:26:27 jimz
* type-ify which_ru (RF_ReconUnitNum_t)
*
* Revision 1.39 1996/06/07 21:33:04 jimz
* begin using consistent types for sector numbers,
* stripe numbers, row+col numbers, recon unit numbers
*
* Revision 1.38 1996/06/03 23:28:26 jimz
* more bugfixes
* check in tree to sync for IPDS runs with current bugfixes
* there still may be a problem with threads in the script test
* getting I/Os stuck- not trivially reproducible (runs ~50 times
* in a row without getting stuck)
*
* Revision 1.37 1996/05/31 22:26:54 jimz
* fix a lot of mapping problems, memory allocation problems
* found some weird lock issues, fixed 'em
* more code cleanup
*
* Revision 1.36 1996/05/30 11:29:41 jimz
* Numerous bug fixes. Stripe lock release code disagreed with the taking code
* about when stripes should be locked (I made it consistent: no parity, no lock)
* There was a lot of extra serialization of I/Os which I've removed- a lot of
* it was to calculate values for the cache code, which is no longer with us.
* More types, function, macro cleanup. Added code to properly quiesce the array
* on shutdown. Made a lot of stuff array-specific which was (bogusly) general
* before. Fixed memory allocation, freeing bugs.
*
* Revision 1.35 1996/05/27 18:56:37 jimz
* more code cleanup
* better typing
* compiles in all 3 environments
*
* Revision 1.34 1996/05/24 22:17:04 jimz
* continue code + namespace cleanup
* typed a bunch of flags
*
* Revision 1.33 1996/05/24 04:28:55 jimz
* release cleanup ckpt
*
* Revision 1.32 1996/05/24 01:59:45 jimz
* another checkpoint in code cleanup for release
* time to sync kernel tree
*
* Revision 1.31 1996/05/23 21:46:35 jimz
* checkpoint in code cleanup (release prep)
* lots of types, function names have been fixed
*
* Revision 1.30 1996/05/18 19:51:34 jimz
* major code cleanup- fix syntax, make some types consistent,
* add prototypes, clean out dead code, et cetera
*
* Revision 1.29 1995/12/01 19:16:19 root
* added copyright info
*
* Revision 1.28 1995/11/28 21:26:49 amiri
* defined a declustering flag RF_BD_DECLUSTERED
*
* Revision 1.27 1995/11/17 19:00:59 wvcii
* created MapQ entry in switch table
* added prototyping to MapParity
*
* Revision 1.26 1995/11/07 15:40:27 wvcii
* changed prototype of SeclectionFunc in mapsw
* function no longer returns numHdrSucc, numTermAnt
*
* Revision 1.25 1995/10/12 20:57:08 arw
* added lots of comments
*
* Revision 1.24 1995/10/12 16:04:08 jimz
* added config name to mapsw
*
* Revision 1.23 1995/07/26 03:28:31 robby
* intermediary checkin
*
* Revision 1.22 1995/07/10 20:51:08 robby
* added to the asm info for the virtual striping locks
*
* Revision 1.21 1995/07/10 16:57:47 robby
* updated alloclistelem struct to the correct struct name
*
* Revision 1.20 1995/07/08 20:06:11 rachad
* *** empty log message ***
*
* Revision 1.19 1995/07/08 18:05:39 rachad
* Linked up Claudsons code with the real cache
*
* Revision 1.18 1995/07/06 14:29:36 robby
* added defaults states list to the layout switch
*
* Revision 1.17 1995/06/23 13:40:14 robby
* updeated to prototypes in rf_layout.h
*
* Revision 1.16 1995/06/08 22:11:03 holland
* bug fixes related to mutiple-row arrays
*
* Revision 1.15 1995/05/24 21:43:23 wvcii
* added field numParityLogCol to RaidLayout
*
* Revision 1.14 95/05/02 22:46:53 holland
* minor code cleanups.
*
* Revision 1.13 1995/05/02 12:48:01 holland
* eliminated some unused code.
*
* Revision 1.12 1995/05/01 13:28:00 holland
* parity range locks, locking disk requests, recon+parityscan in kernel, etc.
*
* Revision 1.11 1995/03/15 20:01:17 holland
* added REMAP and DONT_REMAP
*
* Revision 1.10 1995/03/09 19:54:11 rachad
* Added suport for threadless simulator
*
* Revision 1.9 1995/03/03 21:48:58 holland
* minor changes.
*
* Revision 1.8 1995/03/01 20:25:48 holland
* kernelization changes
*
* Revision 1.7 1995/02/03 22:31:36 holland
* many changes related to kernelization
*
* Revision 1.6 1995/01/30 14:53:46 holland
* extensive changes related to making DoIO non-blocking
*
* Revision 1.5 1995/01/24 23:58:46 holland
* multi-way recon XOR, plus various small changes
*
* Revision 1.4 1995/01/04 19:28:35 holland
* corrected comments around mapsw
*
* Revision 1.3 1994/11/28 22:15:45 danner
* Added type field to the physdiskaddr struct.
*
*/
#ifndef _RF__RF_LAYOUT_H_
#define _RF__RF_LAYOUT_H_
#include "rf_types.h"
#include "rf_archs.h"
#include "rf_alloclist.h"
/*****************************************************************************************
*
* This structure identifies all layout-specific operations and parameters.
*
****************************************************************************************/
typedef struct RF_LayoutSW_s {
RF_ParityConfig_t parityConfig;
char *configName;
#ifndef KERNEL
/* layout-specific parsing */
int (*MakeLayoutSpecific)(FILE *fp, RF_Config_t *cfgPtr, void *arg);
void *makeLayoutSpecificArg;
#endif /* !KERNEL */
#if RF_UTILITY == 0
/* initialization routine */
int (*Configure)(RF_ShutdownList_t **shutdownListp, RF_Raid_t *raidPtr, RF_Config_t *cfgPtr);
/* routine to map RAID sector address -> physical (row, col, offset) */
void (*MapSector)(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector,
RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap);
/* routine to map RAID sector address -> physical (r,c,o) of parity unit */
void (*MapParity)(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector,
RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap);
/* routine to map RAID sector address -> physical (r,c,o) of Q unit */
void (*MapQ)(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector, RF_RowCol_t *row,
RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap);
/* routine to identify the disks comprising a stripe */
void (*IdentifyStripe)(RF_Raid_t *raidPtr, RF_RaidAddr_t addr,
RF_RowCol_t **diskids, RF_RowCol_t *outRow);
/* routine to select a dag */
void (*SelectionFunc)(RF_Raid_t *raidPtr, RF_IoType_t type,
RF_AccessStripeMap_t *asmap,
RF_VoidFuncPtr *);
#if 0
void (**createFunc)(RF_Raid_t *,
RF_AccessStripeMap_t *,
RF_DagHeader_t *, void *,
RF_RaidAccessFlags_t,
RF_AllocListElem_t *));
#endif
/* map a stripe ID to a parity stripe ID. This is typically the identity mapping */
void (*MapSIDToPSID)(RF_RaidLayout_t *layoutPtr, RF_StripeNum_t stripeID,
RF_StripeNum_t *psID, RF_ReconUnitNum_t *which_ru);
/* get default head separation limit (may be NULL) */
RF_HeadSepLimit_t (*GetDefaultHeadSepLimit)(RF_Raid_t *raidPtr);
/* get default num recon buffers (may be NULL) */
int (*GetDefaultNumFloatingReconBuffers)(RF_Raid_t *raidPtr);
/* get number of spare recon units (may be NULL) */
RF_ReconUnitCount_t (*GetNumSpareRUs)(RF_Raid_t *raidPtr);
/* spare table installation (may be NULL) */
int (*InstallSpareTable)(RF_Raid_t *raidPtr, RF_RowCol_t frow, RF_RowCol_t fcol);
/* recon buffer submission function */
int (*SubmitReconBuffer)(RF_ReconBuffer_t *rbuf, int keep_it,
int use_committed);
/*
* verify that parity information for a stripe is correct
* see rf_parityscan.h for return vals
*/
int (*VerifyParity)(RF_Raid_t *raidPtr, RF_RaidAddr_t raidAddr,
RF_PhysDiskAddr_t *parityPDA, int correct_it, RF_RaidAccessFlags_t flags);
/* number of faults tolerated by this mapping */
int faultsTolerated;
/* states to step through in an access. Must end with "LastState".
* The default is DefaultStates in rf_layout.c */
RF_AccessState_t *states;
RF_AccessStripeMapFlags_t flags;
#endif /* RF_UTILITY == 0 */
} RF_LayoutSW_t;
/* enables remapping to spare location under dist sparing */
#define RF_REMAP 1
#define RF_DONT_REMAP 0
/*
* Flags values for RF_AccessStripeMapFlags_t
*/
#define RF_NO_STRIPE_LOCKS 0x0001 /* suppress stripe locks */
#define RF_DISTRIBUTE_SPARE 0x0002 /* distribute spare space in archs that support it */
#define RF_BD_DECLUSTERED 0x0004 /* declustering uses block designs */
/*************************************************************************
*
* this structure forms the layout component of the main Raid
* structure. It describes everything needed to define and perform
* the mapping of logical RAID addresses <-> physical disk addresses.
*
*************************************************************************/
struct RF_RaidLayout_s {
/* configuration parameters */
RF_SectorCount_t sectorsPerStripeUnit; /* number of sectors in one stripe unit */
RF_StripeCount_t SUsPerPU; /* stripe units per parity unit */
RF_StripeCount_t SUsPerRU; /* stripe units per reconstruction unit */
/* redundant-but-useful info computed from the above, used in all layouts */
RF_StripeCount_t numStripe; /* total number of stripes in the array */
RF_SectorCount_t dataSectorsPerStripe;
RF_StripeCount_t dataStripeUnitsPerDisk;
u_int bytesPerStripeUnit;
u_int dataBytesPerStripe;
RF_StripeCount_t numDataCol; /* number of SUs of data per stripe (name here is a la RAID4) */
RF_StripeCount_t numParityCol; /* number of SUs of parity per stripe. Always 1 for now */
RF_StripeCount_t numParityLogCol; /* number of SUs of parity log per stripe. Always 1 for now */
RF_StripeCount_t stripeUnitsPerDisk;
RF_LayoutSW_t *map; /* ptr to struct holding mapping fns and information */
void *layoutSpecificInfo; /* ptr to a structure holding layout-specific params */
};
/*****************************************************************************************
*
* The mapping code returns a pointer to a list of AccessStripeMap structures, which
* describes all the mapping information about an access. The list contains one
* AccessStripeMap structure per stripe touched by the access. Each element in the list
* contains a stripe identifier and a pointer to a list of PhysDiskAddr structuress. Each
* element in this latter list describes the physical location of a stripe unit accessed
* within the corresponding stripe.
*
****************************************************************************************/
#define RF_PDA_TYPE_DATA 0
#define RF_PDA_TYPE_PARITY 1
#define RF_PDA_TYPE_Q 2
struct RF_PhysDiskAddr_s {
RF_RowCol_t row,col; /* disk identifier */
RF_SectorNum_t startSector; /* sector offset into the disk */
RF_SectorCount_t numSector; /* number of sectors accessed */
int type; /* used by higher levels: currently, data, parity, or q */
caddr_t bufPtr; /* pointer to buffer supplying/receiving data */
RF_RaidAddr_t raidAddress; /* raid address corresponding to this physical disk address */
RF_PhysDiskAddr_t *next;
};
#define RF_MAX_FAILED_PDA RF_MAXCOL
struct RF_AccessStripeMap_s {
RF_StripeNum_t stripeID; /* the stripe index */
RF_RaidAddr_t raidAddress; /* the starting raid address within this stripe */
RF_RaidAddr_t endRaidAddress; /* raid address one sector past the end of the access */
RF_SectorCount_t totalSectorsAccessed; /* total num sectors identified in physInfo list */
RF_StripeCount_t numStripeUnitsAccessed; /* total num elements in physInfo list */
int numDataFailed; /* number of failed data disks accessed */
int numParityFailed; /* number of failed parity disks accessed (0 or 1) */
int numQFailed; /* number of failed Q units accessed (0 or 1) */
RF_AccessStripeMapFlags_t flags; /* various flags */
#if 0
RF_PhysDiskAddr_t *failedPDA; /* points to the PDA that has failed */
RF_PhysDiskAddr_t *failedPDAtwo; /* points to the second PDA that has failed, if any */
#else
int numFailedPDAs; /* number of failed phys addrs */
RF_PhysDiskAddr_t *failedPDAs[RF_MAX_FAILED_PDA]; /* array of failed phys addrs */
#endif
RF_PhysDiskAddr_t *physInfo; /* a list of PhysDiskAddr structs */
RF_PhysDiskAddr_t *parityInfo; /* list of physical addrs for the parity (P of P + Q ) */
RF_PhysDiskAddr_t *qInfo; /* list of physical addrs for the Q of P + Q */
RF_LockReqDesc_t lockReqDesc; /* used for stripe locking */
RF_RowCol_t origRow; /* the original row: we may redirect the acc to a different row */
RF_AccessStripeMap_t *next;
};
/* flag values */
#define RF_ASM_REDIR_LARGE_WRITE 0x00000001 /* allows large-write creation code to redirect failed accs */
#define RF_ASM_BAILOUT_DAG_USED 0x00000002 /* allows us to detect recursive calls to the bailout write dag */
#define RF_ASM_FLAGS_LOCK_TRIED 0x00000004 /* we've acquired the lock on the first parity range in this parity stripe */
#define RF_ASM_FLAGS_LOCK_TRIED2 0x00000008 /* we've acquired the lock on the 2nd parity range in this parity stripe */
#define RF_ASM_FLAGS_FORCE_TRIED 0x00000010 /* we've done the force-recon call on this parity stripe */
#define RF_ASM_FLAGS_RECON_BLOCKED 0x00000020 /* we blocked recon => we must unblock it later */
struct RF_AccessStripeMapHeader_s {
RF_StripeCount_t numStripes; /* total number of stripes touched by this acc */
RF_AccessStripeMap_t *stripeMap; /* pointer to the actual map. Also used for making lists */
RF_AccessStripeMapHeader_t *next;
};
/*****************************************************************************************
*
* various routines mapping addresses in the RAID address space. These work across
* all layouts. DON'T PUT ANY LAYOUT-SPECIFIC CODE HERE.
*
****************************************************************************************/
/* return the identifier of the stripe containing the given address */
#define rf_RaidAddressToStripeID(_layoutPtr_, _addr_) \
( ((_addr_) / (_layoutPtr_)->sectorsPerStripeUnit) / (_layoutPtr_)->numDataCol )
/* return the raid address of the start of the indicates stripe ID */
#define rf_StripeIDToRaidAddress(_layoutPtr_, _sid_) \
( ((_sid_) * (_layoutPtr_)->sectorsPerStripeUnit) * (_layoutPtr_)->numDataCol )
/* return the identifier of the stripe containing the given stripe unit id */
#define rf_StripeUnitIDToStripeID(_layoutPtr_, _addr_) \
( (_addr_) / (_layoutPtr_)->numDataCol )
/* return the identifier of the stripe unit containing the given address */
#define rf_RaidAddressToStripeUnitID(_layoutPtr_, _addr_) \
( ((_addr_) / (_layoutPtr_)->sectorsPerStripeUnit) )
/* return the RAID address of next stripe boundary beyond the given address */
#define rf_RaidAddressOfNextStripeBoundary(_layoutPtr_, _addr_) \
( (((_addr_)/(_layoutPtr_)->dataSectorsPerStripe)+1) * (_layoutPtr_)->dataSectorsPerStripe )
/* return the RAID address of the start of the stripe containing the given address */
#define rf_RaidAddressOfPrevStripeBoundary(_layoutPtr_, _addr_) \
( (((_addr_)/(_layoutPtr_)->dataSectorsPerStripe)+0) * (_layoutPtr_)->dataSectorsPerStripe )
/* return the RAID address of next stripe unit boundary beyond the given address */
#define rf_RaidAddressOfNextStripeUnitBoundary(_layoutPtr_, _addr_) \
( (((_addr_)/(_layoutPtr_)->sectorsPerStripeUnit)+1L)*(_layoutPtr_)->sectorsPerStripeUnit )
/* return the RAID address of the start of the stripe unit containing RAID address _addr_ */
#define rf_RaidAddressOfPrevStripeUnitBoundary(_layoutPtr_, _addr_) \
( (((_addr_)/(_layoutPtr_)->sectorsPerStripeUnit)+0)*(_layoutPtr_)->sectorsPerStripeUnit )
/* returns the offset into the stripe. used by RaidAddressStripeAligned */
#define rf_RaidAddressStripeOffset(_layoutPtr_, _addr_) \
( (_addr_) % ((_layoutPtr_)->dataSectorsPerStripe) )
/* returns the offset into the stripe unit. */
#define rf_StripeUnitOffset(_layoutPtr_, _addr_) \
( (_addr_) % ((_layoutPtr_)->sectorsPerStripeUnit) )
/* returns nonzero if the given RAID address is stripe-aligned */
#define rf_RaidAddressStripeAligned( __layoutPtr__, __addr__ ) \
( rf_RaidAddressStripeOffset(__layoutPtr__, __addr__) == 0 )
/* returns nonzero if the given address is stripe-unit aligned */
#define rf_StripeUnitAligned( __layoutPtr__, __addr__ ) \
( rf_StripeUnitOffset(__layoutPtr__, __addr__) == 0 )
/* convert an address expressed in RAID blocks to/from an addr expressed in bytes */
#define rf_RaidAddressToByte(_raidPtr_, _addr_) \
( (_addr_) << ( (_raidPtr_)->logBytesPerSector ) )
#define rf_ByteToRaidAddress(_raidPtr_, _addr_) \
( (_addr_) >> ( (_raidPtr_)->logBytesPerSector ) )
/* convert a raid address to/from a parity stripe ID. Conversion to raid address is easy,
* since we're asking for the address of the first sector in the parity stripe. Conversion to a
* parity stripe ID is more complex, since stripes are not contiguously allocated in
* parity stripes.
*/
#define rf_RaidAddressToParityStripeID(_layoutPtr_, _addr_, _ru_num_) \
rf_MapStripeIDToParityStripeID( (_layoutPtr_), rf_RaidAddressToStripeID( (_layoutPtr_), (_addr_) ), (_ru_num_) )
#define rf_ParityStripeIDToRaidAddress(_layoutPtr_, _psid_) \
( (_psid_) * (_layoutPtr_)->SUsPerPU * (_layoutPtr_)->numDataCol * (_layoutPtr_)->sectorsPerStripeUnit )
RF_LayoutSW_t *rf_GetLayout(RF_ParityConfig_t parityConfig);
int rf_ConfigureLayout(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
RF_Config_t *cfgPtr);
RF_StripeNum_t rf_MapStripeIDToParityStripeID(RF_RaidLayout_t *layoutPtr,
RF_StripeNum_t stripeID, RF_ReconUnitNum_t *which_ru);
#endif /* !_RF__RF_LAYOUT_H_ */

975
sys/dev/raidframe/rf_map.c Normal file
View File

@ -0,0 +1,975 @@
/* $NetBSD: rf_map.c,v 1.1 1998/11/13 04:20:31 oster Exp $ */
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
*
* Author: Mark Holland
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/**************************************************************************
*
* map.c -- main code for mapping RAID addresses to physical disk addresses
*
**************************************************************************/
/*
* :
* Log: rf_map.c,v
* Revision 1.53 1996/11/05 21:10:40 jimz
* failed pda generalization
*
* Revision 1.52 1996/08/20 19:58:39 jimz
* initialize numParityFailed and numQFailed to 0 in MarkFailuresInASMList
*
* Revision 1.51 1996/08/19 22:26:31 jimz
* add Chang's bugfixes for double-disk failures in MarkFailuresInASMList
*
* Revision 1.50 1996/08/19 21:38:06 jimz
* stripeOffset was uninitialized in CheckStripeForFailures
*
* Revision 1.49 1996/07/31 15:34:56 jimz
* evenodd changes; bugfixes for double-degraded archs, generalize
* some formerly PQ-only functions
*
* Revision 1.48 1996/07/27 23:36:08 jimz
* Solaris port of simulator
*
* Revision 1.47 1996/07/22 19:52:16 jimz
* switched node params to RF_DagParam_t, a union of
* a 64-bit int and a void *, for better portability
* attempted hpux port, but failed partway through for
* lack of a single C compiler capable of compiling all
* source files
*
* Revision 1.46 1996/06/10 12:50:57 jimz
* Add counters to freelists to track number of allocations, frees,
* grows, max size, etc. Adjust a couple sets of PRIME params based
* on the results.
*
* Revision 1.45 1996/06/10 11:55:47 jimz
* Straightened out some per-array/not-per-array distinctions, fixed
* a couple bugs related to confusion. Added shutdown lists. Removed
* layout shutdown function (now subsumed by shutdown lists).
*
* Revision 1.44 1996/06/09 02:36:46 jimz
* lots of little crufty cleanup- fixup whitespace
* issues, comment #ifdefs, improve typing in some
* places (esp size-related)
*
* Revision 1.43 1996/06/07 21:33:04 jimz
* begin using consistent types for sector numbers,
* stripe numbers, row+col numbers, recon unit numbers
*
* Revision 1.42 1996/06/05 18:06:02 jimz
* Major code cleanup. The Great Renaming is now done.
* Better modularity. Better typing. Fixed a bunch of
* synchronization bugs. Made a lot of global stuff
* per-desc or per-array. Removed dead code.
*
* Revision 1.41 1996/06/03 23:28:26 jimz
* more bugfixes
* check in tree to sync for IPDS runs with current bugfixes
* there still may be a problem with threads in the script test
* getting I/Os stuck- not trivially reproducible (runs ~50 times
* in a row without getting stuck)
*
* Revision 1.40 1996/05/31 22:26:54 jimz
* fix a lot of mapping problems, memory allocation problems
* found some weird lock issues, fixed 'em
* more code cleanup
*
* Revision 1.39 1996/05/30 23:22:16 jimz
* bugfixes of serialization, timing problems
* more cleanup
*
* Revision 1.38 1996/05/30 11:29:41 jimz
* Numerous bug fixes. Stripe lock release code disagreed with the taking code
* about when stripes should be locked (I made it consistent: no parity, no lock)
* There was a lot of extra serialization of I/Os which I've removed- a lot of
* it was to calculate values for the cache code, which is no longer with us.
* More types, function, macro cleanup. Added code to properly quiesce the array
* on shutdown. Made a lot of stuff array-specific which was (bogusly) general
* before. Fixed memory allocation, freeing bugs.
*
* Revision 1.37 1996/05/27 18:56:37 jimz
* more code cleanup
* better typing
* compiles in all 3 environments
*
* Revision 1.36 1996/05/23 21:46:35 jimz
* checkpoint in code cleanup (release prep)
* lots of types, function names have been fixed
*
* Revision 1.35 1996/05/23 00:33:23 jimz
* code cleanup: move all debug decls to rf_options.c, all extern
* debug decls to rf_options.h, all debug vars preceded by rf_
*
* Revision 1.34 1996/05/20 16:14:45 jimz
* switch to rf_{mutex,cond}_{init,destroy}
*
* Revision 1.33 1996/05/18 19:51:34 jimz
* major code cleanup- fix syntax, make some types consistent,
* add prototypes, clean out dead code, et cetera
*
* Revision 1.32 1996/05/17 00:51:47 jimz
* reformat for readability
*
* Revision 1.31 1996/05/16 23:06:26 jimz
* convert asmhdr to use RF_FREELIST stuff
*
* Revision 1.30 1996/05/16 19:09:42 jimz
* grow init asm freelist to 32
*
* Revision 1.29 1996/05/16 15:27:55 jimz
* prime freelist pumps for asm and pda lists
*
* Revision 1.28 1996/05/02 14:58:35 jimz
* legibility cleanup
*
* Revision 1.27 1995/12/12 18:10:06 jimz
* MIN -> RF_MIN, MAX -> RF_MAX, ASSERT -> RF_ASSERT
* fix 80-column brain damage in comments
*
* Revision 1.26 1995/12/01 19:25:06 root
* added copyright info
*
* Revision 1.25 1995/11/17 19:01:57 wvcii
* added call to MapQ in two fault tolerant case
*
* Revision 1.24 1995/11/17 15:10:53 wvcii
* fixed bug in ASMCheckStatus - ASSERT was using disk sector addresses
* rather than raidAddress
*
* Revision 1.23 1995/07/26 03:26:51 robby
* map the allocation and freeing routines for some stuff non-static
*
* Revision 1.22 1995/06/28 09:33:45 holland
* bug fixes related to dist sparing and multiple-row arrays
*
* Revision 1.21 1995/06/28 04:51:08 holland
* added some asserts against zero-length accesses
*
* Revision 1.20 1995/06/23 13:40:06 robby
* updeated to prototypes in rf_layout.h
*
*/
#include "rf_types.h"
#include "rf_threadstuff.h"
#include "rf_raid.h"
#include "rf_general.h"
#include "rf_map.h"
#include "rf_freelist.h"
#include "rf_shutdown.h"
#include "rf_sys.h"
static void rf_FreePDAList(RF_PhysDiskAddr_t *start, RF_PhysDiskAddr_t *end, int count);
static void rf_FreeASMList(RF_AccessStripeMap_t *start, RF_AccessStripeMap_t *end,
int count);
/*****************************************************************************************
*
* MapAccess -- main 1st order mapping routine.
*
* Maps an access in the RAID address space to the corresponding set of physical disk
* addresses. The result is returned as a list of AccessStripeMap structures, one per
* stripe accessed. Each ASM structure contains a pointer to a list of PhysDiskAddr
* structures, which describe the physical locations touched by the user access. Note
* that this routine returns only static mapping information, i.e. the list of physical
* addresses returned does not necessarily identify the set of physical locations that
* will actually be read or written.
*
* The routine also maps the parity. The physical disk location returned always
* indicates the entire parity unit, even when only a subset of it is being accessed.
* This is because an access that is not stripe unit aligned but that spans a stripe
* unit boundary may require access two distinct portions of the parity unit, and we
* can't yet tell which portion(s) we'll actually need. We leave it up to the algorithm
* selection code to decide what subset of the parity unit to access.
*
* Note that addresses in the RAID address space must always be maintained as
* longs, instead of ints.
*
* This routine returns NULL if numBlocks is 0
*
****************************************************************************************/
RF_AccessStripeMapHeader_t *rf_MapAccess(raidPtr, raidAddress, numBlocks, buffer, remap)
RF_Raid_t *raidPtr;
RF_RaidAddr_t raidAddress; /* starting address in RAID address space */
RF_SectorCount_t numBlocks; /* number of blocks in RAID address space to access */
caddr_t buffer; /* buffer to supply/receive data */
int remap; /* 1 => remap addresses to spare space */
{
RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
RF_AccessStripeMapHeader_t *asm_hdr = NULL;
RF_AccessStripeMap_t *asm_list = NULL, *asm_p = NULL;
int faultsTolerated = layoutPtr->map->faultsTolerated;
RF_RaidAddr_t startAddress = raidAddress; /* we'll change raidAddress along the way */
RF_RaidAddr_t endAddress = raidAddress + numBlocks;
RF_RaidDisk_t **disks = raidPtr->Disks;
RF_PhysDiskAddr_t *pda_p, *pda_q;
RF_StripeCount_t numStripes = 0;
RF_RaidAddr_t stripeRealEndAddress, stripeEndAddress, nextStripeUnitAddress;
RF_RaidAddr_t startAddrWithinStripe, lastRaidAddr;
RF_StripeCount_t totStripes;
RF_StripeNum_t stripeID, lastSID, SUID, lastSUID;
RF_AccessStripeMap_t *asmList, *t_asm;
RF_PhysDiskAddr_t *pdaList, *t_pda;
/* allocate all the ASMs and PDAs up front */
lastRaidAddr = raidAddress + numBlocks - 1 ;
stripeID = rf_RaidAddressToStripeID(layoutPtr, raidAddress);
lastSID = rf_RaidAddressToStripeID(layoutPtr, lastRaidAddr);
totStripes = lastSID - stripeID + 1;
SUID = rf_RaidAddressToStripeUnitID(layoutPtr, raidAddress);
lastSUID = rf_RaidAddressToStripeUnitID(layoutPtr, lastRaidAddr);
asmList = rf_AllocASMList(totStripes);
pdaList = rf_AllocPDAList(lastSUID - SUID + 1 + faultsTolerated * totStripes); /* may also need pda(s) per stripe for parity */
if (raidAddress+numBlocks > raidPtr->totalSectors) {
RF_ERRORMSG1("Unable to map access because offset (%d) was invalid\n",
(int)raidAddress);
return(NULL);
}
if (rf_mapDebug)
rf_PrintRaidAddressInfo(raidPtr, raidAddress, numBlocks);
for (; raidAddress < endAddress; ) {
/* make the next stripe structure */
RF_ASSERT(asmList);
t_asm = asmList;
asmList = asmList->next;
bzero((char *)t_asm, sizeof(RF_AccessStripeMap_t));
if (!asm_p)
asm_list = asm_p = t_asm;
else {
asm_p->next = t_asm;
asm_p = asm_p->next;
}
numStripes++;
/* map SUs from current location to the end of the stripe */
asm_p->stripeID = /*rf_RaidAddressToStripeID(layoutPtr, raidAddress)*/ stripeID++;
stripeRealEndAddress = rf_RaidAddressOfNextStripeBoundary(layoutPtr, raidAddress);
stripeEndAddress = RF_MIN(endAddress,stripeRealEndAddress );
asm_p->raidAddress = raidAddress;
asm_p->endRaidAddress = stripeEndAddress;
/* map each stripe unit in the stripe */
pda_p = NULL;
startAddrWithinStripe = raidAddress; /* Raid addr of start of portion of access that is within this stripe */
for (; raidAddress < stripeEndAddress; ) {
RF_ASSERT(pdaList);
t_pda = pdaList;
pdaList = pdaList->next;
bzero((char *)t_pda, sizeof(RF_PhysDiskAddr_t));
if (!pda_p)
asm_p->physInfo = pda_p = t_pda;
else {
pda_p->next = t_pda;
pda_p = pda_p->next;
}
pda_p->type = RF_PDA_TYPE_DATA;
(layoutPtr->map->MapSector)(raidPtr, raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), remap);
/* mark any failures we find. failedPDA is don't-care if there is more than one failure */
pda_p->raidAddress = raidAddress; /* the RAID address corresponding to this physical disk address */
nextStripeUnitAddress = rf_RaidAddressOfNextStripeUnitBoundary(layoutPtr, raidAddress);
pda_p->numSector = RF_MIN(endAddress, nextStripeUnitAddress) - raidAddress;
RF_ASSERT(pda_p->numSector != 0);
rf_ASMCheckStatus(raidPtr,pda_p,asm_p,disks,0);
pda_p->bufPtr = buffer + rf_RaidAddressToByte(raidPtr, (raidAddress - startAddress));
asm_p->totalSectorsAccessed += pda_p->numSector;
asm_p->numStripeUnitsAccessed++;
asm_p->origRow = pda_p->row; /* redundant but harmless to do this in every loop iteration */
raidAddress = RF_MIN(endAddress, nextStripeUnitAddress);
}
/* Map the parity. At this stage, the startSector and numSector fields
* for the parity unit are always set to indicate the entire parity unit.
* We may modify this after mapping the data portion.
*/
switch (faultsTolerated)
{
case 0:
break;
case 1: /* single fault tolerant */
RF_ASSERT(pdaList);
t_pda = pdaList;
pdaList = pdaList->next;
bzero((char *)t_pda, sizeof(RF_PhysDiskAddr_t));
pda_p = asm_p->parityInfo = t_pda;
pda_p->type = RF_PDA_TYPE_PARITY;
(layoutPtr->map->MapParity)(raidPtr, rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, startAddrWithinStripe),
&(pda_p->row), &(pda_p->col), &(pda_p->startSector), remap);
pda_p->numSector = layoutPtr->sectorsPerStripeUnit;
/* raidAddr may be needed to find unit to redirect to */
pda_p->raidAddress = rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, startAddrWithinStripe);
rf_ASMCheckStatus(raidPtr,pda_p,asm_p,disks,1);
rf_ASMParityAdjust(asm_p->parityInfo,startAddrWithinStripe,endAddress,layoutPtr,asm_p);
break;
case 2: /* two fault tolerant */
RF_ASSERT(pdaList && pdaList->next);
t_pda = pdaList;
pdaList = pdaList->next;
bzero((char *)t_pda, sizeof(RF_PhysDiskAddr_t));
pda_p = asm_p->parityInfo = t_pda;
pda_p->type = RF_PDA_TYPE_PARITY;
t_pda = pdaList;
pdaList = pdaList->next;
bzero((char *)t_pda, sizeof(RF_PhysDiskAddr_t));
pda_q = asm_p->qInfo = t_pda;
pda_q->type = RF_PDA_TYPE_Q;
(layoutPtr->map->MapParity)(raidPtr, rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, startAddrWithinStripe),
&(pda_p->row), &(pda_p->col), &(pda_p->startSector), remap);
(layoutPtr->map->MapQ)(raidPtr, rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, startAddrWithinStripe),
&(pda_q->row), &(pda_q->col), &(pda_q->startSector), remap);
pda_q->numSector = pda_p->numSector = layoutPtr->sectorsPerStripeUnit;
/* raidAddr may be needed to find unit to redirect to */
pda_p->raidAddress = rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, startAddrWithinStripe);
pda_q->raidAddress = rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, startAddrWithinStripe);
/* failure mode stuff */
rf_ASMCheckStatus(raidPtr,pda_p,asm_p,disks,1);
rf_ASMCheckStatus(raidPtr,pda_q,asm_p,disks,1);
rf_ASMParityAdjust(asm_p->parityInfo,startAddrWithinStripe,endAddress,layoutPtr,asm_p);
rf_ASMParityAdjust(asm_p->qInfo,startAddrWithinStripe,endAddress,layoutPtr,asm_p);
break;
}
}
RF_ASSERT(asmList == NULL && pdaList == NULL);
/* make the header structure */
asm_hdr = rf_AllocAccessStripeMapHeader();
RF_ASSERT(numStripes == totStripes);
asm_hdr->numStripes = numStripes;
asm_hdr->stripeMap = asm_list;
if (rf_mapDebug)
rf_PrintAccessStripeMap(asm_hdr);
return(asm_hdr);
}
/*****************************************************************************************
* This routine walks through an ASM list and marks the PDAs that have failed.
* It's called only when a disk failure causes an in-flight DAG to fail.
* The parity may consist of two components, but we want to use only one failedPDA
* pointer. Thus we set failedPDA to point to the first parity component, and rely
* on the rest of the code to do the right thing with this.
****************************************************************************************/
void rf_MarkFailuresInASMList(raidPtr, asm_h)
RF_Raid_t *raidPtr;
RF_AccessStripeMapHeader_t *asm_h;
{
RF_RaidDisk_t **disks = raidPtr->Disks;
RF_AccessStripeMap_t *asmap;
RF_PhysDiskAddr_t *pda;
for (asmap = asm_h->stripeMap; asmap; asmap = asmap->next) {
asmap->numDataFailed = asmap->numParityFailed = asmap->numQFailed = 0;
asmap->numFailedPDAs = 0;
bzero((char *)asmap->failedPDAs,
RF_MAX_FAILED_PDA*sizeof(RF_PhysDiskAddr_t *));
for (pda = asmap->physInfo; pda; pda=pda->next) {
if (RF_DEAD_DISK(disks[pda->row][pda->col].status)) {
printf("DEAD DISK BOGUSLY DETECTED!!\n");
asmap->numDataFailed++;
asmap->failedPDAs[asmap->numFailedPDAs] = pda;
asmap->numFailedPDAs++;
}
}
pda = asmap->parityInfo;
if (pda && RF_DEAD_DISK(disks[pda->row][pda->col].status)) {
asmap->numParityFailed++;
asmap->failedPDAs[asmap->numFailedPDAs] = pda;
asmap->numFailedPDAs++;
}
pda = asmap->qInfo;
if (pda && RF_DEAD_DISK(disks[pda->row][pda->col].status)) {
asmap->numQFailed++;
asmap->failedPDAs[asmap->numFailedPDAs] = pda;
asmap->numFailedPDAs++;
}
}
}
/*****************************************************************************************
*
* DuplicateASM -- duplicates an ASM and returns the new one
*
****************************************************************************************/
RF_AccessStripeMap_t *rf_DuplicateASM(asmap)
RF_AccessStripeMap_t *asmap;
{
RF_AccessStripeMap_t *new_asm;
RF_PhysDiskAddr_t *pda, *new_pda, *t_pda;
new_pda = NULL;
new_asm = rf_AllocAccessStripeMapComponent();
bcopy((char *)asmap, (char *)new_asm, sizeof(RF_AccessStripeMap_t));
new_asm->numFailedPDAs = 0; /* ??? */
new_asm->failedPDAs[0] = NULL;
new_asm->physInfo = NULL;
new_asm->parityInfo = NULL;
new_asm->next = NULL;
for (pda = asmap->physInfo; pda; pda=pda->next) { /* copy the physInfo list */
t_pda = rf_AllocPhysDiskAddr();
bcopy((char *)pda, (char *)t_pda, sizeof(RF_PhysDiskAddr_t));
t_pda->next = NULL;
if (!new_asm->physInfo) {new_asm->physInfo = t_pda; new_pda = t_pda;}
else {new_pda->next = t_pda; new_pda = new_pda->next;}
if (pda == asmap->failedPDAs[0])
new_asm->failedPDAs[0] = t_pda;
}
for (pda = asmap->parityInfo; pda; pda=pda->next) { /* copy the parityInfo list */
t_pda = rf_AllocPhysDiskAddr();
bcopy((char *)pda, (char *)t_pda, sizeof(RF_PhysDiskAddr_t));
t_pda->next = NULL;
if (!new_asm->parityInfo) {new_asm->parityInfo = t_pda; new_pda = t_pda;}
else {new_pda->next = t_pda; new_pda = new_pda->next;}
if (pda == asmap->failedPDAs[0])
new_asm->failedPDAs[0] = t_pda;
}
return(new_asm);
}
/*****************************************************************************************
*
* DuplicatePDA -- duplicates a PDA and returns the new one
*
****************************************************************************************/
RF_PhysDiskAddr_t *rf_DuplicatePDA(pda)
RF_PhysDiskAddr_t *pda;
{
RF_PhysDiskAddr_t *new;
new = rf_AllocPhysDiskAddr();
bcopy((char *)pda, (char *)new, sizeof(RF_PhysDiskAddr_t));
return(new);
}
/*****************************************************************************************
*
* routines to allocate and free list elements. All allocation routines zero the
* structure before returning it.
*
* FreePhysDiskAddr is static. It should never be called directly, because
* FreeAccessStripeMap takes care of freeing the PhysDiskAddr list.
*
****************************************************************************************/
static RF_FreeList_t *rf_asmhdr_freelist;
#define RF_MAX_FREE_ASMHDR 128
#define RF_ASMHDR_INC 16
#define RF_ASMHDR_INITIAL 32
static RF_FreeList_t *rf_asm_freelist;
#define RF_MAX_FREE_ASM 192
#define RF_ASM_INC 24
#define RF_ASM_INITIAL 64
static RF_FreeList_t *rf_pda_freelist;
#define RF_MAX_FREE_PDA 192
#define RF_PDA_INC 24
#define RF_PDA_INITIAL 64
/* called at shutdown time. So far, all that is necessary is to release all the free lists */
static void rf_ShutdownMapModule(void *);
static void rf_ShutdownMapModule(ignored)
void *ignored;
{
RF_FREELIST_DESTROY(rf_asmhdr_freelist,next,(RF_AccessStripeMapHeader_t *));
RF_FREELIST_DESTROY(rf_pda_freelist,next,(RF_PhysDiskAddr_t *));
RF_FREELIST_DESTROY(rf_asm_freelist,next,(RF_AccessStripeMap_t *));
}
int rf_ConfigureMapModule(listp)
RF_ShutdownList_t **listp;
{
int rc;
RF_FREELIST_CREATE(rf_asmhdr_freelist, RF_MAX_FREE_ASMHDR,
RF_ASMHDR_INC, sizeof(RF_AccessStripeMapHeader_t));
if (rf_asmhdr_freelist == NULL) {
return(ENOMEM);
}
RF_FREELIST_CREATE(rf_asm_freelist, RF_MAX_FREE_ASM,
RF_ASM_INC, sizeof(RF_AccessStripeMap_t));
if (rf_asm_freelist == NULL) {
RF_FREELIST_DESTROY(rf_asmhdr_freelist,next,(RF_AccessStripeMapHeader_t *));
return(ENOMEM);
}
RF_FREELIST_CREATE(rf_pda_freelist, RF_MAX_FREE_PDA,
RF_PDA_INC, sizeof(RF_PhysDiskAddr_t));
if (rf_pda_freelist == NULL) {
RF_FREELIST_DESTROY(rf_asmhdr_freelist,next,(RF_AccessStripeMapHeader_t *));
RF_FREELIST_DESTROY(rf_pda_freelist,next,(RF_PhysDiskAddr_t *));
return(ENOMEM);
}
rc = rf_ShutdownCreate(listp, rf_ShutdownMapModule, NULL);
if (rc) {
RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", __FILE__,
__LINE__, rc);
rf_ShutdownMapModule(NULL);
return(rc);
}
RF_FREELIST_PRIME(rf_asmhdr_freelist, RF_ASMHDR_INITIAL,next,
(RF_AccessStripeMapHeader_t *));
RF_FREELIST_PRIME(rf_asm_freelist, RF_ASM_INITIAL,next,
(RF_AccessStripeMap_t *));
RF_FREELIST_PRIME(rf_pda_freelist, RF_PDA_INITIAL,next,
(RF_PhysDiskAddr_t *));
return(0);
}
RF_AccessStripeMapHeader_t *rf_AllocAccessStripeMapHeader()
{
RF_AccessStripeMapHeader_t *p;
RF_FREELIST_GET(rf_asmhdr_freelist,p,next,(RF_AccessStripeMapHeader_t *));
bzero((char *)p, sizeof(RF_AccessStripeMapHeader_t));
return(p);
}
void rf_FreeAccessStripeMapHeader(p)
RF_AccessStripeMapHeader_t *p;
{
RF_FREELIST_FREE(rf_asmhdr_freelist,p,next);
}
RF_PhysDiskAddr_t *rf_AllocPhysDiskAddr()
{
RF_PhysDiskAddr_t *p;
RF_FREELIST_GET(rf_pda_freelist,p,next,(RF_PhysDiskAddr_t *));
bzero((char *)p, sizeof(RF_PhysDiskAddr_t));
return(p);
}
/* allocates a list of PDAs, locking the free list only once
* when we have to call calloc, we do it one component at a time to simplify
* the process of freeing the list at program shutdown. This should not be
* much of a performance hit, because it should be very infrequently executed.
*/
RF_PhysDiskAddr_t *rf_AllocPDAList(count)
int count;
{
RF_PhysDiskAddr_t *p = NULL;
RF_FREELIST_GET_N(rf_pda_freelist,p,next,(RF_PhysDiskAddr_t *),count);
return(p);
}
void rf_FreePhysDiskAddr(p)
RF_PhysDiskAddr_t *p;
{
RF_FREELIST_FREE(rf_pda_freelist,p,next);
}
static void rf_FreePDAList(l_start, l_end, count)
RF_PhysDiskAddr_t *l_start, *l_end; /* pointers to start and end of list */
int count; /* number of elements in list */
{
RF_FREELIST_FREE_N(rf_pda_freelist,l_start,next,(RF_PhysDiskAddr_t *),count);
}
RF_AccessStripeMap_t *rf_AllocAccessStripeMapComponent()
{
RF_AccessStripeMap_t *p;
RF_FREELIST_GET(rf_asm_freelist,p,next,(RF_AccessStripeMap_t *));
bzero((char *)p, sizeof(RF_AccessStripeMap_t));
return(p);
}
/* this is essentially identical to AllocPDAList. I should combine the two.
* when we have to call calloc, we do it one component at a time to simplify
* the process of freeing the list at program shutdown. This should not be
* much of a performance hit, because it should be very infrequently executed.
*/
RF_AccessStripeMap_t *rf_AllocASMList(count)
int count;
{
RF_AccessStripeMap_t *p = NULL;
RF_FREELIST_GET_N(rf_asm_freelist,p,next,(RF_AccessStripeMap_t *),count);
return(p);
}
void rf_FreeAccessStripeMapComponent(p)
RF_AccessStripeMap_t *p;
{
RF_FREELIST_FREE(rf_asm_freelist,p,next);
}
static void rf_FreeASMList(l_start, l_end, count)
RF_AccessStripeMap_t *l_start, *l_end;
int count;
{
RF_FREELIST_FREE_N(rf_asm_freelist,l_start,next,(RF_AccessStripeMap_t *),count);
}
void rf_FreeAccessStripeMap(hdr)
RF_AccessStripeMapHeader_t *hdr;
{
RF_AccessStripeMap_t *p, *pt = NULL;
RF_PhysDiskAddr_t *pdp, *trailer, *pdaList = NULL, *pdaEnd = NULL;
int count = 0, t, asm_count = 0;
for (p = hdr->stripeMap; p; p=p->next) {
/* link the 3 pda lists into the accumulating pda list */
if (!pdaList) pdaList = p->qInfo; else pdaEnd->next = p->qInfo;
for (trailer=NULL,pdp=p->qInfo; pdp; ) {trailer = pdp; pdp=pdp->next; count++;}
if (trailer) pdaEnd = trailer;
if (!pdaList) pdaList = p->parityInfo; else pdaEnd->next = p->parityInfo;
for (trailer=NULL,pdp=p->parityInfo; pdp; ) {trailer = pdp; pdp=pdp->next; count++;}
if (trailer) pdaEnd = trailer;
if (!pdaList) pdaList = p->physInfo; else pdaEnd->next = p->physInfo;
for (trailer=NULL,pdp=p->physInfo; pdp; ) {trailer = pdp; pdp=pdp->next; count++;}
if (trailer) pdaEnd = trailer;
pt = p;
asm_count++;
}
/* debug only */
for (t=0,pdp=pdaList; pdp; pdp=pdp->next)
t++;
RF_ASSERT(t == count);
if (pdaList)
rf_FreePDAList(pdaList, pdaEnd, count);
rf_FreeASMList(hdr->stripeMap, pt, asm_count);
rf_FreeAccessStripeMapHeader(hdr);
}
/* We can't use the large write optimization if there are any failures in the stripe.
* In the declustered layout, there is no way to immediately determine what disks
* constitute a stripe, so we actually have to hunt through the stripe looking for failures.
* The reason we map the parity instead of just using asm->parityInfo->col is because
* the latter may have been already redirected to a spare drive, which would
* mess up the computation of the stripe offset.
*
* ASSUMES AT MOST ONE FAILURE IN THE STRIPE.
*/
int rf_CheckStripeForFailures(raidPtr, asmap)
RF_Raid_t *raidPtr;
RF_AccessStripeMap_t *asmap;
{
RF_RowCol_t trow, tcol, prow, pcol, *diskids, row, i;
RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
RF_StripeCount_t stripeOffset;
int numFailures;
RF_RaidAddr_t sosAddr;
RF_SectorNum_t diskOffset, poffset;
RF_RowCol_t testrow;
/* quick out in the fault-free case. */
RF_LOCK_MUTEX(raidPtr->mutex);
numFailures = raidPtr->numFailures;
RF_UNLOCK_MUTEX(raidPtr->mutex);
if (numFailures == 0) return(0);
sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress);
row = asmap->physInfo->row;
(layoutPtr->map->IdentifyStripe)(raidPtr, asmap->raidAddress, &diskids, &testrow);
(layoutPtr->map->MapParity)(raidPtr, asmap->raidAddress, &prow, &pcol, &poffset, 0); /* get pcol */
/* this need not be true if we've redirected the access to a spare in another row
RF_ASSERT(row == testrow);
*/
stripeOffset = 0;
for (i=0; i<layoutPtr->numDataCol+layoutPtr->numParityCol; i++) {
if (diskids[i] != pcol) {
if (RF_DEAD_DISK(raidPtr->Disks[testrow][diskids[i]].status)) {
if (raidPtr->status[testrow] != rf_rs_reconstructing)
return(1);
RF_ASSERT(raidPtr->reconControl[testrow]->fcol == diskids[i]);
layoutPtr->map->MapSector(raidPtr,
sosAddr + stripeOffset * layoutPtr->sectorsPerStripeUnit,
&trow, &tcol, &diskOffset, 0);
RF_ASSERT( (trow == testrow) && (tcol == diskids[i]) );
if (!rf_CheckRUReconstructed(raidPtr->reconControl[testrow]->reconMap, diskOffset))
return(1);
asmap->flags |= RF_ASM_REDIR_LARGE_WRITE;
return(0);
}
stripeOffset++;
}
}
return(0);
}
/*
return the number of failed data units in the stripe.
*/
int rf_NumFailedDataUnitsInStripe(raidPtr, asmap)
RF_Raid_t *raidPtr;
RF_AccessStripeMap_t *asmap;
{
RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
RF_RowCol_t trow, tcol, row, i;
RF_SectorNum_t diskOffset;
RF_RaidAddr_t sosAddr;
int numFailures;
/* quick out in the fault-free case. */
RF_LOCK_MUTEX(raidPtr->mutex);
numFailures = raidPtr->numFailures;
RF_UNLOCK_MUTEX(raidPtr->mutex);
if (numFailures == 0) return(0);
numFailures = 0;
sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress);
row = asmap->physInfo->row;
for (i=0; i<layoutPtr->numDataCol; i++)
{
(layoutPtr->map->MapSector)(raidPtr, sosAddr + i * layoutPtr->sectorsPerStripeUnit,
&trow, &tcol, &diskOffset, 0);
if (RF_DEAD_DISK(raidPtr->Disks[trow][tcol].status))
numFailures++;
}
return numFailures;
}
/*****************************************************************************************
*
* debug routines
*
****************************************************************************************/
void rf_PrintAccessStripeMap(asm_h)
RF_AccessStripeMapHeader_t *asm_h;
{
rf_PrintFullAccessStripeMap(asm_h, 0);
}
void rf_PrintFullAccessStripeMap(asm_h, prbuf)
RF_AccessStripeMapHeader_t *asm_h;
int prbuf; /* flag to print buffer pointers */
{
int i;
RF_AccessStripeMap_t *asmap = asm_h->stripeMap;
RF_PhysDiskAddr_t *p;
printf("%d stripes total\n", (int)asm_h->numStripes);
for (; asmap; asmap = asmap->next) {
/* printf("Num failures: %d\n",asmap->numDataFailed); */
/* printf("Num sectors: %d\n",(int)asmap->totalSectorsAccessed); */
printf("Stripe %d (%d sectors), failures: %d data, %d parity: ",
(int) asmap->stripeID,
(int) asmap->totalSectorsAccessed,
(int) asmap->numDataFailed,
(int) asmap->numParityFailed);
if (asmap->parityInfo) {
printf("Parity [r%d c%d s%d-%d", asmap->parityInfo->row, asmap->parityInfo->col,
(int)asmap->parityInfo->startSector,
(int)(asmap->parityInfo->startSector +
asmap->parityInfo->numSector - 1));
if (prbuf) printf(" b0x%lx",(unsigned long) asmap->parityInfo->bufPtr);
if (asmap->parityInfo->next) {
printf(", r%d c%d s%d-%d", asmap->parityInfo->next->row,
asmap->parityInfo->next->col,
(int) asmap->parityInfo->next->startSector,
(int)(asmap->parityInfo->next->startSector +
asmap->parityInfo->next->numSector - 1));
if (prbuf) printf(" b0x%lx",(unsigned long) asmap->parityInfo->next->bufPtr);
RF_ASSERT(asmap->parityInfo->next->next == NULL);
}
printf("]\n\t");
}
for (i=0,p=asmap->physInfo; p; p=p->next,i++) {
printf("SU r%d c%d s%d-%d ", p->row, p->col, (int)p->startSector,
(int)(p->startSector + p->numSector - 1));
if (prbuf) printf("b0x%lx ", (unsigned long) p->bufPtr);
if (i && !(i&1)) printf("\n\t");
}
printf("\n");
p = asm_h->stripeMap->failedPDAs[0];
if (asm_h->stripeMap->numDataFailed + asm_h->stripeMap->numParityFailed > 1) printf("[multiple failures]\n");
else if (asm_h->stripeMap->numDataFailed + asm_h->stripeMap->numParityFailed > 0)
printf("\t[Failed PDA: r%d c%d s%d-%d]\n",p->row, p->col,
(int)p->startSector, (int)(p->startSector + p->numSector-1));
}
}
void rf_PrintRaidAddressInfo(raidPtr, raidAddr, numBlocks)
RF_Raid_t *raidPtr;
RF_RaidAddr_t raidAddr;
RF_SectorCount_t numBlocks;
{
RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
RF_RaidAddr_t ra, sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, raidAddr);
printf("Raid addrs of SU boundaries from start of stripe to end of access:\n\t");
for (ra = sosAddr; ra <= raidAddr + numBlocks; ra += layoutPtr->sectorsPerStripeUnit) {
printf("%d (0x%x), ",(int)ra, (int)ra);
}
printf("\n");
printf("Offset into stripe unit: %d (0x%x)\n",
(int)(raidAddr % layoutPtr->sectorsPerStripeUnit),
(int)(raidAddr % layoutPtr->sectorsPerStripeUnit));
}
/*
given a parity descriptor and the starting address within a stripe,
range restrict the parity descriptor to touch only the correct stuff.
*/
void rf_ASMParityAdjust(
RF_PhysDiskAddr_t *toAdjust,
RF_StripeNum_t startAddrWithinStripe,
RF_SectorNum_t endAddress,
RF_RaidLayout_t *layoutPtr,
RF_AccessStripeMap_t *asm_p)
{
RF_PhysDiskAddr_t *new_pda;
/* when we're accessing only a portion of one stripe unit, we want the parity descriptor
* to identify only the chunk of parity associated with the data. When the access spans
* exactly one stripe unit boundary and is less than a stripe unit in size, it uses two disjoint
* regions of the parity unit. When an access spans more than one stripe unit boundary, it
* uses all of the parity unit.
*
* To better handle the case where stripe units are small, we may eventually want to change
* the 2nd case so that if the SU size is below some threshold, we just read/write the whole
* thing instead of breaking it up into two accesses.
*/
if (asm_p->numStripeUnitsAccessed == 1)
{
int x = (startAddrWithinStripe % layoutPtr->sectorsPerStripeUnit);
toAdjust->startSector += x;
toAdjust->raidAddress += x;
toAdjust->numSector = asm_p->physInfo->numSector;
RF_ASSERT(toAdjust->numSector != 0);
}
else
if (asm_p->numStripeUnitsAccessed == 2 && asm_p->totalSectorsAccessed < layoutPtr->sectorsPerStripeUnit)
{
int x = (startAddrWithinStripe % layoutPtr->sectorsPerStripeUnit);
/* create a second pda and copy the parity map info into it */
RF_ASSERT(toAdjust->next == NULL);
new_pda = toAdjust->next = rf_AllocPhysDiskAddr();
*new_pda = *toAdjust; /* structure assignment */
new_pda->next = NULL;
/* adjust the start sector & number of blocks for the first parity pda */
toAdjust->startSector += x;
toAdjust->raidAddress += x;
toAdjust->numSector = rf_RaidAddressOfNextStripeUnitBoundary(layoutPtr, startAddrWithinStripe) - startAddrWithinStripe;
RF_ASSERT(toAdjust->numSector != 0);
/* adjust the second pda */
new_pda->numSector = endAddress - rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, endAddress);
/*new_pda->raidAddress = rf_RaidAddressOfNextStripeUnitBoundary(layoutPtr, toAdjust->raidAddress);*/
RF_ASSERT(new_pda->numSector != 0);
}
}
/*
Check if a disk has been spared or failed. If spared,
redirect the I/O.
If it has been failed, record it in the asm pointer.
Fourth arg is whether data or parity.
*/
void rf_ASMCheckStatus(
RF_Raid_t *raidPtr,
RF_PhysDiskAddr_t *pda_p,
RF_AccessStripeMap_t *asm_p,
RF_RaidDisk_t **disks,
int parity)
{
RF_DiskStatus_t dstatus;
RF_RowCol_t frow, fcol;
dstatus = disks[pda_p->row][pda_p->col].status;
if (dstatus == rf_ds_spared) {
/* if the disk has been spared, redirect access to the spare */
frow = pda_p->row; fcol = pda_p->col;
pda_p->row = disks[frow][fcol].spareRow;
pda_p->col = disks[frow][fcol].spareCol;
}
else if (dstatus == rf_ds_dist_spared) {
/* ditto if disk has been spared to dist spare space */
RF_RowCol_t or = pda_p->row, oc=pda_p->col;
RF_SectorNum_t oo = pda_p->startSector;
if (pda_p -> type == RF_PDA_TYPE_DATA)
raidPtr->Layout.map->MapSector(raidPtr, pda_p->raidAddress, &pda_p->row, &pda_p->col, &pda_p->startSector, RF_REMAP);
else
raidPtr->Layout.map->MapParity(raidPtr, pda_p->raidAddress, &pda_p->row, &pda_p->col, &pda_p->startSector, RF_REMAP);
if (rf_mapDebug) {
printf("Redirected r %d c %d o %d -> r%d c %d o %d\n",or,oc,(int)oo,
pda_p->row,pda_p->col,(int)pda_p->startSector);
}
} else if (RF_DEAD_DISK(dstatus)) {
/* if the disk is inaccessible, mark the failure */
if (parity)
asm_p->numParityFailed++;
else {
asm_p->numDataFailed++;
#if 0
/* XXX Do we really want this spewing out on the console? GO */
printf("DATA_FAILED!\n");
#endif
}
asm_p->failedPDAs[asm_p->numFailedPDAs] = pda_p;
asm_p->numFailedPDAs++;
#if 0
switch (asm_p->numParityFailed + asm_p->numDataFailed)
{
case 1:
asm_p->failedPDAs[0] = pda_p;
break;
case 2:
asm_p->failedPDAs[1] = pda_p;
default:
break;
}
#endif
}
/* the redirected access should never span a stripe unit boundary */
RF_ASSERT(rf_RaidAddressToStripeUnitID(&raidPtr->Layout,pda_p->raidAddress) ==
rf_RaidAddressToStripeUnitID(&raidPtr->Layout,pda_p->raidAddress + pda_p->numSector -1));
RF_ASSERT(pda_p->col != -1);
}

133
sys/dev/raidframe/rf_map.h Normal file
View File

@ -0,0 +1,133 @@
/* $NetBSD: rf_map.h,v 1.1 1998/11/13 04:20:31 oster Exp $ */
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
*
* Author: Mark Holland
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/* rf_map.h */
/* :
* Log: rf_map.h,v
* Revision 1.9 1996/07/22 19:52:16 jimz
* switched node params to RF_DagParam_t, a union of
* a 64-bit int and a void *, for better portability
* attempted hpux port, but failed partway through for
* lack of a single C compiler capable of compiling all
* source files
*
* Revision 1.8 1996/06/10 11:55:47 jimz
* Straightened out some per-array/not-per-array distinctions, fixed
* a couple bugs related to confusion. Added shutdown lists. Removed
* layout shutdown function (now subsumed by shutdown lists).
*
* Revision 1.7 1996/06/07 21:33:04 jimz
* begin using consistent types for sector numbers,
* stripe numbers, row+col numbers, recon unit numbers
*
* Revision 1.6 1996/05/31 22:26:54 jimz
* fix a lot of mapping problems, memory allocation problems
* found some weird lock issues, fixed 'em
* more code cleanup
*
* Revision 1.5 1996/05/30 11:29:41 jimz
* Numerous bug fixes. Stripe lock release code disagreed with the taking code
* about when stripes should be locked (I made it consistent: no parity, no lock)
* There was a lot of extra serialization of I/Os which I've removed- a lot of
* it was to calculate values for the cache code, which is no longer with us.
* More types, function, macro cleanup. Added code to properly quiesce the array
* on shutdown. Made a lot of stuff array-specific which was (bogusly) general
* before. Fixed memory allocation, freeing bugs.
*
* Revision 1.4 1996/05/23 21:46:35 jimz
* checkpoint in code cleanup (release prep)
* lots of types, function names have been fixed
*
* Revision 1.3 1996/05/18 19:51:34 jimz
* major code cleanup- fix syntax, make some types consistent,
* add prototypes, clean out dead code, et cetera
*
* Revision 1.2 1995/12/01 19:25:14 root
* added copyright info
*
*/
#ifndef _RF__RF_MAP_H_
#define _RF__RF_MAP_H_
#include "rf_types.h"
#include "rf_alloclist.h"
#include "rf_raid.h"
/* mapping structure allocation and free routines */
RF_AccessStripeMapHeader_t *rf_MapAccess(RF_Raid_t *raidPtr,
RF_RaidAddr_t raidAddress, RF_SectorCount_t numBlocks,
caddr_t buffer, int remap);
void rf_MarkFailuresInASMList(RF_Raid_t *raidPtr,
RF_AccessStripeMapHeader_t *asm_h);
RF_AccessStripeMap_t *rf_DuplicateASM(RF_AccessStripeMap_t *asmap);
RF_PhysDiskAddr_t *rf_DuplicatePDA(RF_PhysDiskAddr_t *pda);
int rf_ConfigureMapModule(RF_ShutdownList_t **listp);
RF_AccessStripeMapHeader_t *rf_AllocAccessStripeMapHeader(void);
void rf_FreeAccessStripeMapHeader(RF_AccessStripeMapHeader_t *p);
RF_PhysDiskAddr_t *rf_AllocPhysDiskAddr(void);
RF_PhysDiskAddr_t *rf_AllocPDAList(int count);
void rf_FreePhysDiskAddr(RF_PhysDiskAddr_t *p);
RF_AccessStripeMap_t *rf_AllocAccessStripeMapComponent(void);
RF_AccessStripeMap_t *rf_AllocASMList(int count);
void rf_FreeAccessStripeMapComponent(RF_AccessStripeMap_t *p);
void rf_FreeAccessStripeMap(RF_AccessStripeMapHeader_t *hdr);
int rf_CheckStripeForFailures(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap);
int rf_NumFailedDataUnitsInStripe(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap);
void rf_PrintAccessStripeMap(RF_AccessStripeMapHeader_t *asm_h);
void rf_PrintFullAccessStripeMap(RF_AccessStripeMapHeader_t *asm_h, int prbuf);
void rf_PrintRaidAddressInfo(RF_Raid_t *raidPtr, RF_RaidAddr_t raidAddr,
RF_SectorCount_t numBlocks);
void rf_ASMParityAdjust(RF_PhysDiskAddr_t *toAdjust,
RF_StripeNum_t startAddrWithinStripe, RF_SectorNum_t endAddress,
RF_RaidLayout_t *layoutPtr, RF_AccessStripeMap_t *asm_p);
void rf_ASMCheckStatus(RF_Raid_t *raidPtr, RF_PhysDiskAddr_t *pda_p,
RF_AccessStripeMap_t *asm_p, RF_RaidDisk_t **disks, int parity);
#endif /* !_RF__RF_MAP_H_ */

View File

@ -0,0 +1,197 @@
/* $NetBSD: rf_mcpair.c,v 1.1 1998/11/13 04:20:31 oster Exp $ */
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
*
* Author: Jim Zelenka
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/* rf_mcpair.c
* an mcpair is a structure containing a mutex and a condition variable.
* it's used to block the current thread until some event occurs.
*/
/* :
* Log: rf_mcpair.c,v
* Revision 1.16 1996/06/19 22:23:01 jimz
* parity verification is now a layout-configurable thing
* not all layouts currently support it (correctly, anyway)
*
* Revision 1.15 1996/06/17 03:18:04 jimz
* include shutdown.h for macroized ShutdownCreate
*
* Revision 1.14 1996/06/10 11:55:47 jimz
* Straightened out some per-array/not-per-array distinctions, fixed
* a couple bugs related to confusion. Added shutdown lists. Removed
* layout shutdown function (now subsumed by shutdown lists).
*
* Revision 1.13 1996/06/05 18:06:02 jimz
* Major code cleanup. The Great Renaming is now done.
* Better modularity. Better typing. Fixed a bunch of
* synchronization bugs. Made a lot of global stuff
* per-desc or per-array. Removed dead code.
*
* Revision 1.12 1996/06/02 17:31:48 jimz
* Moved a lot of global stuff into array structure, where it belongs.
* Fixed up paritylogging, pss modules in this manner. Some general
* code cleanup. Removed lots of dead code, some dead files.
*
* Revision 1.11 1996/05/30 23:22:16 jimz
* bugfixes of serialization, timing problems
* more cleanup
*
* Revision 1.10 1996/05/20 16:15:22 jimz
* switch to rf_{mutex,cond}_{init,destroy}
*
* Revision 1.9 1996/05/18 19:51:34 jimz
* major code cleanup- fix syntax, make some types consistent,
* add prototypes, clean out dead code, et cetera
*
* Revision 1.8 1996/05/16 16:04:42 jimz
* convert to return-val on FREELIST init
*
* Revision 1.7 1996/05/16 14:47:21 jimz
* rewrote to use RF_FREELIST
*
* Revision 1.6 1995/12/01 19:25:43 root
* added copyright info
*
*/
#include "rf_types.h"
#include "rf_threadstuff.h"
#include "rf_mcpair.h"
#include "rf_debugMem.h"
#include "rf_freelist.h"
#include "rf_shutdown.h"
#if defined(__NetBSD__) && defined(_KERNEL)
#include <sys/proc.h>
#endif
static RF_FreeList_t *rf_mcpair_freelist;
#define RF_MAX_FREE_MCPAIR 128
#define RF_MCPAIR_INC 16
#define RF_MCPAIR_INITIAL 24
static int init_mcpair(RF_MCPair_t *);
static void clean_mcpair(RF_MCPair_t *);
static void rf_ShutdownMCPair(void *);
static int init_mcpair(t)
RF_MCPair_t *t;
{
int rc;
rc = rf_mutex_init(&t->mutex);
if (rc) {
RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__,
__LINE__, rc);
return(rc);
}
rc = rf_cond_init(&t->cond);
if (rc) {
RF_ERRORMSG3("Unable to init cond file %s line %d rc=%d\n", __FILE__,
__LINE__, rc);
rf_mutex_destroy(&t->mutex);
return(rc);
}
return(0);
}
static void clean_mcpair(t)
RF_MCPair_t *t;
{
rf_mutex_destroy(&t->mutex);
rf_cond_destroy(&t->cond);
}
static void rf_ShutdownMCPair(ignored)
void *ignored;
{
RF_FREELIST_DESTROY_CLEAN(rf_mcpair_freelist,next,(RF_MCPair_t *),clean_mcpair);
}
int rf_ConfigureMCPair(listp)
RF_ShutdownList_t **listp;
{
int rc;
RF_FREELIST_CREATE(rf_mcpair_freelist, RF_MAX_FREE_MCPAIR,
RF_MCPAIR_INC, sizeof(RF_MCPair_t));
rc = rf_ShutdownCreate(listp, rf_ShutdownMCPair, NULL);
if (rc) {
RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n",
__FILE__, __LINE__, rc);
rf_ShutdownMCPair(NULL);
return(rc);
}
RF_FREELIST_PRIME_INIT(rf_mcpair_freelist, RF_MCPAIR_INITIAL,next,
(RF_MCPair_t *),init_mcpair);
return(0);
}
RF_MCPair_t *rf_AllocMCPair()
{
RF_MCPair_t *t;
RF_FREELIST_GET_INIT(rf_mcpair_freelist,t,next,(RF_MCPair_t *),init_mcpair);
if (t) {
t->flag = 0;
t->next = NULL;
}
return(t);
}
void rf_FreeMCPair(t)
RF_MCPair_t *t;
{
RF_FREELIST_FREE_CLEAN(rf_mcpair_freelist,t,next,clean_mcpair);
}
/* the callback function used to wake you up when you use an mcpair to wait for something */
void rf_MCPairWakeupFunc(mcpair)
RF_MCPair_t *mcpair;
{
RF_LOCK_MUTEX(mcpair->mutex);
mcpair->flag = 1;
#if 0
printf("MCPairWakeupFunc called!\n");
#endif
#ifdef KERNEL
wakeup(&(mcpair->flag)); /* XXX Does this do anything useful!! GO */
/* XXX Looks like the following is needed to truly get the
functionality they were looking for here... This could be a side-effect
of my using a tsleep in the NetBSD port though... XXX */
#if defined(__NetBSD__) && defined(_KERNEL)
wakeup(&(mcpair->cond)); /* XXX XXX XXX GO */
#endif
#else /* KERNEL */
RF_SIGNAL_COND(mcpair->cond);
#endif /* KERNEL */
RF_UNLOCK_MUTEX(mcpair->mutex);
}

View File

@ -0,0 +1,61 @@
/* $NetBSD: rf_mcpair.h,v 1.1 1998/11/13 04:20:31 oster Exp $ */
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
*
* Author: Mark Holland
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/* rf_mcpair.h
* see comments in rf_mcpair.c
*/
#ifndef _RF__RF_MCPAIR_H_
#define _RF__RF_MCPAIR_H_
#include "rf_types.h"
#include "rf_threadstuff.h"
struct RF_MCPair_s {
RF_DECLARE_MUTEX(mutex)
RF_DECLARE_COND(cond)
int flag;
RF_MCPair_t *next;
};
#ifdef KERNEL
#ifndef __NetBSD__
#define RF_WAIT_MCPAIR(_mcp) mpsleep(&((_mcp)->flag), PZERO, "mcpair", 0, (void *) simple_lock_addr((_mcp)->mutex), MS_LOCK_SIMPLE)
#else
#define RF_WAIT_MCPAIR(_mcp) tsleep(&((_mcp)->flag), PRIBIO | PCATCH, "mcpair", 0)
#endif
#else /* KERNEL */
#define RF_WAIT_MCPAIR(_mcp) RF_WAIT_COND((_mcp)->cond, (_mcp)->mutex)
#endif /* KERNEL */
int rf_ConfigureMCPair(RF_ShutdownList_t **listp);
RF_MCPair_t *rf_AllocMCPair(void);
void rf_FreeMCPair(RF_MCPair_t *t);
void rf_MCPairWakeupFunc(RF_MCPair_t *t);
#endif /* !_RF__RF_MCPAIR_H_ */

View File

@ -0,0 +1,255 @@
/* $NetBSD: rf_memchunk.c,v 1.1 1998/11/13 04:20:31 oster Exp $ */
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
*
* Author: Mark Holland
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/*********************************************************************************
* rf_memchunk.c
*
* experimental code. I've found that the malloc and free calls in the DAG
* creation code are very expensive. Since for any given workload the DAGs
* created for different accesses are likely to be similar to each other, the
* amount of memory used for any given DAG data structure is likely to be one
* of a small number of values. For example, in UNIX, all reads and writes will
* be less than 8k and will not span stripe unit boundaries. Thus in the absence
* of failure, the only DAGs that will ever get created are single-node reads
* and single-stripe-unit atomic read-modify-writes. So, I'm very likely to
* be continually asking for chunks of memory equal to the sizes of these two
* DAGs.
*
* This leads to the idea of holding on to these chunks of memory when the DAG is
* freed and then, when a new DAG is created, trying to find such a chunk before
* calling malloc.
*
* the "chunk list" is a list of lists. Each header node contains a size value
* and a pointer to a list of chunk descriptors, each of which holds a pointer
* to a chunk of memory of the indicated size.
*
* There is currently no way to purge memory out of the chunk list. My
* initial thought on this is to have a low-priority thread that wakes up every
* 1 or 2 seconds, purges all the chunks with low reuse counts, and sets all
* the reuse counts to zero.
*
* This whole idea may be bad, since malloc may be able to do this more efficiently.
* It's worth a try, though, and it can be turned off by setting useMemChunks to 0.
*
********************************************************************************/
/* :
* Log: rf_memchunk.c,v
* Revision 1.17 1996/07/27 23:36:08 jimz
* Solaris port of simulator
*
* Revision 1.16 1996/06/10 11:55:47 jimz
* Straightened out some per-array/not-per-array distinctions, fixed
* a couple bugs related to confusion. Added shutdown lists. Removed
* layout shutdown function (now subsumed by shutdown lists).
*
* Revision 1.15 1996/06/09 02:36:46 jimz
* lots of little crufty cleanup- fixup whitespace
* issues, comment #ifdefs, improve typing in some
* places (esp size-related)
*
* Revision 1.14 1996/06/05 18:06:02 jimz
* Major code cleanup. The Great Renaming is now done.
* Better modularity. Better typing. Fixed a bunch of
* synchronization bugs. Made a lot of global stuff
* per-desc or per-array. Removed dead code.
*
* Revision 1.13 1996/06/02 17:31:48 jimz
* Moved a lot of global stuff into array structure, where it belongs.
* Fixed up paritylogging, pss modules in this manner. Some general
* code cleanup. Removed lots of dead code, some dead files.
*
* Revision 1.12 1996/05/30 23:22:16 jimz
* bugfixes of serialization, timing problems
* more cleanup
*
* Revision 1.11 1996/05/27 18:56:37 jimz
* more code cleanup
* better typing
* compiles in all 3 environments
*
* Revision 1.10 1996/05/23 00:33:23 jimz
* code cleanup: move all debug decls to rf_options.c, all extern
* debug decls to rf_options.h, all debug vars preceded by rf_
*
* Revision 1.9 1996/05/20 16:15:45 jimz
* switch to rf_{mutex,cond}_{init,destroy}
*
* Revision 1.8 1996/05/18 19:51:34 jimz
* major code cleanup- fix syntax, make some types consistent,
* add prototypes, clean out dead code, et cetera
*
* Revision 1.7 1995/12/01 19:26:07 root
* added copyright info
*
*/
#include "rf_types.h"
#include "rf_threadstuff.h"
#include "rf_debugMem.h"
#include "rf_memchunk.h"
#include "rf_general.h"
#include "rf_options.h"
#include "rf_shutdown.h"
#include "rf_sys.h"
typedef struct RF_ChunkHdr_s RF_ChunkHdr_t;
struct RF_ChunkHdr_s {
int size;
RF_ChunkDesc_t *list;
RF_ChunkHdr_t *next;
};
static RF_ChunkHdr_t *chunklist, *chunk_hdr_free_list;
static RF_ChunkDesc_t *chunk_desc_free_list;
RF_DECLARE_STATIC_MUTEX(chunkmutex)
static void rf_ShutdownMemChunk(void *);
static RF_ChunkDesc_t *NewMemChunk(int, char *);
static void rf_ShutdownMemChunk(ignored)
void *ignored;
{
RF_ChunkDesc_t *pt, *p;
RF_ChunkHdr_t *hdr, *ht;
if (rf_memChunkDebug)
printf("Chunklist:\n");
for (hdr = chunklist; hdr;) {
for (p = hdr->list; p; ) {
if (rf_memChunkDebug)
printf("Size %d reuse count %d\n",p->size, p->reuse_count);
pt = p; p=p->next;
RF_Free(pt->buf, pt->size);
RF_Free(pt, sizeof(*pt));
}
ht = hdr; hdr=hdr->next;
RF_Free(ht, sizeof(*ht));
}
rf_mutex_destroy(&chunkmutex);
}
int rf_ConfigureMemChunk(listp)
RF_ShutdownList_t **listp;
{
int rc;
chunklist = NULL;
chunk_hdr_free_list = NULL;
chunk_desc_free_list = NULL;
rc = rf_mutex_init(&chunkmutex);
if (rc) {
RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__,
__LINE__, rc);
}
rc = rf_ShutdownCreate(listp, rf_ShutdownMemChunk, NULL);
if (rc) {
RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", __FILE__,
__LINE__, rc);
rf_mutex_destroy(&chunkmutex);
}
return(rc);
}
/* called to get a chunk descriptor for a newly-allocated chunk of memory
* MUTEX MUST BE LOCKED
*
* free list is not currently used
*/
static RF_ChunkDesc_t *NewMemChunk(size, buf)
int size;
char *buf;
{
RF_ChunkDesc_t *p;
if (chunk_desc_free_list) {p = chunk_desc_free_list; chunk_desc_free_list = p->next;}
else RF_Malloc(p, sizeof(RF_ChunkDesc_t), (RF_ChunkDesc_t *));
p->size = size;
p->buf = buf;
p->next = NULL;
p->reuse_count = 0;
return(p);
}
/* looks for a chunk of memory of acceptable size. If none, allocates one and returns
* a chunk descriptor for it, but does not install anything in the list. This is done
* when the chunk is released.
*/
RF_ChunkDesc_t *rf_GetMemChunk(size)
int size;
{
RF_ChunkHdr_t *hdr = chunklist;
RF_ChunkDesc_t *p = NULL;
char *buf;
RF_LOCK_MUTEX(chunkmutex);
for (hdr = chunklist; hdr; hdr = hdr->next) if (hdr->size >= size) {
p = hdr->list;
if (p) {
hdr->list = p->next;
p->next = NULL;
p->reuse_count++;
}
break;
}
if (!p) {
RF_Malloc(buf, size, (char *));
p = NewMemChunk(size, buf);
}
RF_UNLOCK_MUTEX(chunkmutex);
(void) bzero(p->buf, size);
return(p);
}
void rf_ReleaseMemChunk(chunk)
RF_ChunkDesc_t *chunk;
{
RF_ChunkHdr_t *hdr, *ht = NULL, *new;
RF_LOCK_MUTEX(chunkmutex);
for (hdr = chunklist; hdr && hdr->size < chunk->size; ht=hdr,hdr=hdr->next);
if (hdr && hdr->size == chunk->size) {
chunk->next = hdr->list;
hdr->list = chunk;
}
else {
RF_Malloc(new, sizeof(RF_ChunkHdr_t), (RF_ChunkHdr_t *));
new->size = chunk->size; new->list = chunk; chunk->next = NULL;
if (ht) {
new->next = ht->next;
ht->next = new;
}
else {
new->next = hdr;
chunklist = new;
}
}
RF_UNLOCK_MUTEX(chunkmutex);
}

View File

@ -0,0 +1,79 @@
/* $NetBSD: rf_memchunk.h,v 1.1 1998/11/13 04:20:31 oster Exp $ */
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
*
* Author: Mark Holland
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/* header file for rf_memchunk.c. See comments there */
/* :
* Log: rf_memchunk.h,v
* Revision 1.8 1996/06/10 11:55:47 jimz
* Straightened out some per-array/not-per-array distinctions, fixed
* a couple bugs related to confusion. Added shutdown lists. Removed
* layout shutdown function (now subsumed by shutdown lists).
*
* Revision 1.7 1996/06/02 17:31:48 jimz
* Moved a lot of global stuff into array structure, where it belongs.
* Fixed up paritylogging, pss modules in this manner. Some general
* code cleanup. Removed lots of dead code, some dead files.
*
* Revision 1.6 1996/05/24 04:28:55 jimz
* release cleanup ckpt
*
* Revision 1.5 1996/05/23 21:46:35 jimz
* checkpoint in code cleanup (release prep)
* lots of types, function names have been fixed
*
* Revision 1.4 1996/05/23 00:33:23 jimz
* code cleanup: move all debug decls to rf_options.c, all extern
* debug decls to rf_options.h, all debug vars preceded by rf_
*
* Revision 1.3 1996/05/18 19:51:34 jimz
* major code cleanup- fix syntax, make some types consistent,
* add prototypes, clean out dead code, et cetera
*
* Revision 1.2 1995/12/01 19:25:56 root
* added copyright info
*
*/
#ifndef _RF__RF_MEMCHUNK_H_
#define _RF__RF_MEMCHUNK_H_
#include "rf_types.h"
struct RF_ChunkDesc_s {
int size;
int reuse_count;
char *buf;
RF_ChunkDesc_t *next;
};
int rf_ConfigureMemChunk(RF_ShutdownList_t **listp);
RF_ChunkDesc_t *rf_GetMemChunk(int size);
void rf_ReleaseMemChunk(RF_ChunkDesc_t *chunk);
#endif /* !_RF__RF_MEMCHUNK_H_ */

View File

@ -0,0 +1,97 @@
/* $NetBSD: rf_netbsd.h,v 1.1 1998/11/13 04:20:31 oster Exp $ */
/*-
* Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Greg Oster
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the NetBSD
* Foundation, Inc. and its contributors.
* 4. Neither the name of The NetBSD Foundation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/*-
* Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Jason R. Thorpe.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the NetBSD
* Foundation, Inc. and its contributors.
* 4. Neither the name of The NetBSD Foundation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef _RF__RF_NETBSDSTUFF_H_
#define _RF__RF_NETBSDSTUFF_H_
#include <sys/fcntl.h>
#include <sys/systm.h>
#include <sys/namei.h>
#include <sys/vnode.h>
#if defined(__NetBSD__) && defined(_KERNEL)
struct raidcinfo {
struct vnode *ci_vp; /* device's vnode */
dev_t ci_dev; /* XXX: device's dev_t */
#if 0
size_t ci_size; /* size */
char *ci_path; /* path to component */
size_t ci_pathlen; /* length of component path */
#endif
};
#endif
#endif /* _RF__RF_NETBSDSTUFF_H_ */

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,453 @@
/* $NetBSD: rf_nwayxor.c,v 1.1 1998/11/13 04:20:31 oster Exp $ */
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
*
* Author: Mark Holland, Daniel Stodolsky
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/************************************************************
*
* nwayxor.c -- code to do N-way xors for reconstruction
*
* nWayXorN xors N input buffers into the destination buffer.
* adapted from danner's longword_bxor code.
*
************************************************************/
/* :
* Log: rf_nwayxor.c,v
* Revision 1.6 1996/06/12 03:31:18 jimz
* only print call counts if rf_showXorCallCounts != 0
*
* Revision 1.5 1996/06/10 11:55:47 jimz
* Straightened out some per-array/not-per-array distinctions, fixed
* a couple bugs related to confusion. Added shutdown lists. Removed
* layout shutdown function (now subsumed by shutdown lists).
*
* Revision 1.4 1996/06/02 17:31:48 jimz
* Moved a lot of global stuff into array structure, where it belongs.
* Fixed up paritylogging, pss modules in this manner. Some general
* code cleanup. Removed lots of dead code, some dead files.
*
* Revision 1.3 1996/05/18 19:51:34 jimz
* major code cleanup- fix syntax, make some types consistent,
* add prototypes, clean out dead code, et cetera
*
* Revision 1.2 1995/12/01 19:29:14 root
* added copyright info
*
*/
#include "rf_nwayxor.h"
#include "rf_shutdown.h"
static int callcount[10];
static void rf_ShutdownNWayXor(void *);
static void rf_ShutdownNWayXor(ignored)
void *ignored;
{
int i;
if (rf_showXorCallCounts == 0)
return;
printf("Call counts for n-way xor routines: ");
for (i=0; i<10; i++)
printf("%d ",callcount[i]);
printf("\n");
}
int rf_ConfigureNWayXor(listp)
RF_ShutdownList_t **listp;
{
int i, rc;
for (i=0; i<10; i++)
callcount[i] = 0;
rc = rf_ShutdownCreate(listp, rf_ShutdownNWayXor, NULL);
return(rc);
}
void rf_nWayXor1(src_rbs, dest_rb, len)
RF_ReconBuffer_t **src_rbs;
RF_ReconBuffer_t *dest_rb;
int len;
{
register unsigned long *src = (unsigned long *) src_rbs[0]->buffer;
register unsigned long *dest= (unsigned long *) dest_rb->buffer;
register unsigned long *end = src+len;
register unsigned long d0, d1, d2, d3, s0, s1, s2, s3;
callcount[1]++;
while (len >= 4 )
{
d0 = dest[0];
d1 = dest[1];
d2 = dest[2];
d3 = dest[3];
s0 = src[0];
s1 = src[1];
s2 = src[2];
s3 = src[3];
dest[0] = d0 ^ s0;
dest[1] = d1 ^ s1;
dest[2] = d2 ^ s2;
dest[3] = d3 ^ s3;
src += 4;
dest += 4;
len -= 4;
}
while (src < end) {*dest++ ^= *src++;}
}
void rf_nWayXor2(src_rbs, dest_rb, len)
RF_ReconBuffer_t **src_rbs;
RF_ReconBuffer_t *dest_rb;
int len;
{
register unsigned long *dst = (unsigned long *) dest_rb->buffer;
register unsigned long *a = dst;
register unsigned long *b = (unsigned long *) src_rbs[0]->buffer;
register unsigned long *c = (unsigned long *) src_rbs[1]->buffer;
unsigned long a0,a1,a2,a3, b0,b1,b2,b3;
callcount[2]++;
/* align dest to cache line */
while ((((unsigned long) dst) & 0x1f))
{
*dst++ = *a++ ^ *b++ ^ *c++;
len--;
}
while (len > 4 )
{
a0 = a[0]; len -= 4;
a1 = a[1];
a2 = a[2];
a3 = a[3]; a += 4;
b0 = b[0];
b1 = b[1];
b2 = b[2];
b3 = b[3];
/* start dual issue */
a0 ^= b0; b0 = c[0];
b += 4; a1 ^= b1;
a2 ^= b2; a3 ^= b3;
b1 = c[1]; a0 ^= b0;
b2 = c[2]; a1 ^= b1;
b3 = c[3]; a2 ^= b2;
dst[0] = a0; a3 ^= b3;
dst[1] = a1; c += 4;
dst[2] = a2;
dst[3] = a3; dst += 4;
}
while (len)
{
*dst++ = *a++ ^ *b++ ^ *c++;
len--;
}
}
/* note that first arg is not incremented but 2nd arg is */
#define LOAD_FIRST(_dst,_b) \
a0 = _dst[0]; len -= 4; \
a1 = _dst[1]; \
a2 = _dst[2]; \
a3 = _dst[3]; \
b0 = _b[0]; \
b1 = _b[1]; \
b2 = _b[2]; \
b3 = _b[3]; _b += 4;
/* note: arg is incremented */
#define XOR_AND_LOAD_NEXT(_n) \
a0 ^= b0; b0 = _n[0]; \
a1 ^= b1; b1 = _n[1]; \
a2 ^= b2; b2 = _n[2]; \
a3 ^= b3; b3 = _n[3]; \
_n += 4;
/* arg is incremented */
#define XOR_AND_STORE(_dst) \
a0 ^= b0; _dst[0] = a0; \
a1 ^= b1; _dst[1] = a1; \
a2 ^= b2; _dst[2] = a2; \
a3 ^= b3; _dst[3] = a3; \
_dst += 4;
void rf_nWayXor3(src_rbs, dest_rb, len)
RF_ReconBuffer_t **src_rbs;
RF_ReconBuffer_t *dest_rb;
int len;
{
register unsigned long *dst = (unsigned long *) dest_rb->buffer;
register unsigned long *b = (unsigned long *) src_rbs[0]->buffer;
register unsigned long *c = (unsigned long *) src_rbs[1]->buffer;
register unsigned long *d = (unsigned long *) src_rbs[2]->buffer;
unsigned long a0,a1,a2,a3, b0,b1,b2,b3;
callcount[3]++;
/* align dest to cache line */
while ((((unsigned long) dst) & 0x1f)) {
*dst++ ^= *b++ ^ *c++ ^ *d++;
len--;
}
while (len > 4 ) {
LOAD_FIRST(dst,b);
XOR_AND_LOAD_NEXT(c);
XOR_AND_LOAD_NEXT(d);
XOR_AND_STORE(dst);
}
while (len) {
*dst++ ^= *b++ ^ *c++ ^ *d++;
len--;
}
}
void rf_nWayXor4(src_rbs, dest_rb, len)
RF_ReconBuffer_t **src_rbs;
RF_ReconBuffer_t *dest_rb;
int len;
{
register unsigned long *dst = (unsigned long *) dest_rb->buffer;
register unsigned long *b = (unsigned long *) src_rbs[0]->buffer;
register unsigned long *c = (unsigned long *) src_rbs[1]->buffer;
register unsigned long *d = (unsigned long *) src_rbs[2]->buffer;
register unsigned long *e = (unsigned long *) src_rbs[3]->buffer;
unsigned long a0,a1,a2,a3, b0,b1,b2,b3;
callcount[4]++;
/* align dest to cache line */
while ((((unsigned long) dst) & 0x1f)) {
*dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++;
len--;
}
while (len > 4 ) {
LOAD_FIRST(dst,b);
XOR_AND_LOAD_NEXT(c);
XOR_AND_LOAD_NEXT(d);
XOR_AND_LOAD_NEXT(e);
XOR_AND_STORE(dst);
}
while (len) {
*dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++;
len--;
}
}
void rf_nWayXor5(src_rbs, dest_rb, len)
RF_ReconBuffer_t **src_rbs;
RF_ReconBuffer_t *dest_rb;
int len;
{
register unsigned long *dst = (unsigned long *) dest_rb->buffer;
register unsigned long *b = (unsigned long *) src_rbs[0]->buffer;
register unsigned long *c = (unsigned long *) src_rbs[1]->buffer;
register unsigned long *d = (unsigned long *) src_rbs[2]->buffer;
register unsigned long *e = (unsigned long *) src_rbs[3]->buffer;
register unsigned long *f = (unsigned long *) src_rbs[4]->buffer;
unsigned long a0,a1,a2,a3, b0,b1,b2,b3;
callcount[5]++;
/* align dest to cache line */
while ((((unsigned long) dst) & 0x1f)) {
*dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++;
len--;
}
while (len > 4 ) {
LOAD_FIRST(dst,b);
XOR_AND_LOAD_NEXT(c);
XOR_AND_LOAD_NEXT(d);
XOR_AND_LOAD_NEXT(e);
XOR_AND_LOAD_NEXT(f);
XOR_AND_STORE(dst);
}
while (len) {
*dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++;
len--;
}
}
void rf_nWayXor6(src_rbs, dest_rb, len)
RF_ReconBuffer_t **src_rbs;
RF_ReconBuffer_t *dest_rb;
int len;
{
register unsigned long *dst = (unsigned long *) dest_rb->buffer;
register unsigned long *b = (unsigned long *) src_rbs[0]->buffer;
register unsigned long *c = (unsigned long *) src_rbs[1]->buffer;
register unsigned long *d = (unsigned long *) src_rbs[2]->buffer;
register unsigned long *e = (unsigned long *) src_rbs[3]->buffer;
register unsigned long *f = (unsigned long *) src_rbs[4]->buffer;
register unsigned long *g = (unsigned long *) src_rbs[5]->buffer;
unsigned long a0,a1,a2,a3, b0,b1,b2,b3;
callcount[6]++;
/* align dest to cache line */
while ((((unsigned long) dst) & 0x1f)) {
*dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++;
len--;
}
while (len > 4 ) {
LOAD_FIRST(dst,b);
XOR_AND_LOAD_NEXT(c);
XOR_AND_LOAD_NEXT(d);
XOR_AND_LOAD_NEXT(e);
XOR_AND_LOAD_NEXT(f);
XOR_AND_LOAD_NEXT(g);
XOR_AND_STORE(dst);
}
while (len) {
*dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++;
len--;
}
}
void rf_nWayXor7(src_rbs, dest_rb, len)
RF_ReconBuffer_t **src_rbs;
RF_ReconBuffer_t *dest_rb;
int len;
{
register unsigned long *dst = (unsigned long *) dest_rb->buffer;
register unsigned long *b = (unsigned long *) src_rbs[0]->buffer;
register unsigned long *c = (unsigned long *) src_rbs[1]->buffer;
register unsigned long *d = (unsigned long *) src_rbs[2]->buffer;
register unsigned long *e = (unsigned long *) src_rbs[3]->buffer;
register unsigned long *f = (unsigned long *) src_rbs[4]->buffer;
register unsigned long *g = (unsigned long *) src_rbs[5]->buffer;
register unsigned long *h = (unsigned long *) src_rbs[6]->buffer;
unsigned long a0,a1,a2,a3, b0,b1,b2,b3;
callcount[7]++;
/* align dest to cache line */
while ((((unsigned long) dst) & 0x1f)) {
*dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++;
len--;
}
while (len > 4 ) {
LOAD_FIRST(dst,b);
XOR_AND_LOAD_NEXT(c);
XOR_AND_LOAD_NEXT(d);
XOR_AND_LOAD_NEXT(e);
XOR_AND_LOAD_NEXT(f);
XOR_AND_LOAD_NEXT(g);
XOR_AND_LOAD_NEXT(h);
XOR_AND_STORE(dst);
}
while (len) {
*dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++;
len--;
}
}
void rf_nWayXor8(src_rbs, dest_rb, len)
RF_ReconBuffer_t **src_rbs;
RF_ReconBuffer_t *dest_rb;
int len;
{
register unsigned long *dst = (unsigned long *) dest_rb->buffer;
register unsigned long *b = (unsigned long *) src_rbs[0]->buffer;
register unsigned long *c = (unsigned long *) src_rbs[1]->buffer;
register unsigned long *d = (unsigned long *) src_rbs[2]->buffer;
register unsigned long *e = (unsigned long *) src_rbs[3]->buffer;
register unsigned long *f = (unsigned long *) src_rbs[4]->buffer;
register unsigned long *g = (unsigned long *) src_rbs[5]->buffer;
register unsigned long *h = (unsigned long *) src_rbs[6]->buffer;
register unsigned long *i = (unsigned long *) src_rbs[7]->buffer;
unsigned long a0,a1,a2,a3, b0,b1,b2,b3;
callcount[8]++;
/* align dest to cache line */
while ((((unsigned long) dst) & 0x1f)) {
*dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++ ^ *i++;
len--;
}
while (len > 4 ) {
LOAD_FIRST(dst,b);
XOR_AND_LOAD_NEXT(c);
XOR_AND_LOAD_NEXT(d);
XOR_AND_LOAD_NEXT(e);
XOR_AND_LOAD_NEXT(f);
XOR_AND_LOAD_NEXT(g);
XOR_AND_LOAD_NEXT(h);
XOR_AND_LOAD_NEXT(i);
XOR_AND_STORE(dst);
}
while (len) {
*dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++ ^ *i++;
len--;
}
}
void rf_nWayXor9(src_rbs, dest_rb, len)
RF_ReconBuffer_t **src_rbs;
RF_ReconBuffer_t *dest_rb;
int len;
{
register unsigned long *dst = (unsigned long *) dest_rb->buffer;
register unsigned long *b = (unsigned long *) src_rbs[0]->buffer;
register unsigned long *c = (unsigned long *) src_rbs[1]->buffer;
register unsigned long *d = (unsigned long *) src_rbs[2]->buffer;
register unsigned long *e = (unsigned long *) src_rbs[3]->buffer;
register unsigned long *f = (unsigned long *) src_rbs[4]->buffer;
register unsigned long *g = (unsigned long *) src_rbs[5]->buffer;
register unsigned long *h = (unsigned long *) src_rbs[6]->buffer;
register unsigned long *i = (unsigned long *) src_rbs[7]->buffer;
register unsigned long *j = (unsigned long *) src_rbs[8]->buffer;
unsigned long a0,a1,a2,a3, b0,b1,b2,b3;
callcount[9]++;
/* align dest to cache line */
while ((((unsigned long) dst) & 0x1f)) {
*dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++ ^ *i++ ^ *j++;
len--;
}
while (len > 4 ) {
LOAD_FIRST(dst,b);
XOR_AND_LOAD_NEXT(c);
XOR_AND_LOAD_NEXT(d);
XOR_AND_LOAD_NEXT(e);
XOR_AND_LOAD_NEXT(f);
XOR_AND_LOAD_NEXT(g);
XOR_AND_LOAD_NEXT(h);
XOR_AND_LOAD_NEXT(i);
XOR_AND_LOAD_NEXT(j);
XOR_AND_STORE(dst);
}
while (len) {
*dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++ ^ *i++ ^ *j++;
len--;
}
}

View File

@ -0,0 +1,74 @@
/* $NetBSD: rf_nwayxor.h,v 1.1 1998/11/13 04:20:31 oster Exp $ */
/*
* rf_nwayxor.h
*/
/*
* Copyright (c) 1996 Carnegie-Mellon University.
* All rights reserved.
*
* Author: Jim Zelenka
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/*
* rf_nwayxor.h -- types and prototypes for nwayxor module
*/
/*
* :
* Log: rf_nwayxor.h,v
* Revision 1.4 1996/06/10 11:55:47 jimz
* Straightened out some per-array/not-per-array distinctions, fixed
* a couple bugs related to confusion. Added shutdown lists. Removed
* layout shutdown function (now subsumed by shutdown lists).
*
* Revision 1.3 1996/06/02 17:31:48 jimz
* Moved a lot of global stuff into array structure, where it belongs.
* Fixed up paritylogging, pss modules in this manner. Some general
* code cleanup. Removed lots of dead code, some dead files.
*
* Revision 1.2 1996/05/23 21:46:35 jimz
* checkpoint in code cleanup (release prep)
* lots of types, function names have been fixed
*
* Revision 1.1 1996/05/18 19:56:47 jimz
* Initial revision
*
*/
#ifndef _RF__RF_NWAYXOR_H_
#define _RF__RF_NWAYXOR_H_
#include "rf_types.h"
#include "rf_raid.h"
#include "rf_reconstruct.h"
int rf_ConfigureNWayXor(RF_ShutdownList_t **listp);
void rf_nWayXor1(RF_ReconBuffer_t **src_rbs, RF_ReconBuffer_t *dest_rb, int len);
void rf_nWayXor2(RF_ReconBuffer_t **src_rbs, RF_ReconBuffer_t *dest_rb, int len);
void rf_nWayXor3(RF_ReconBuffer_t **src_rbs, RF_ReconBuffer_t *dest_rb, int len);
void rf_nWayXor4(RF_ReconBuffer_t **src_rbs, RF_ReconBuffer_t *dest_rb, int len);
void rf_nWayXor5(RF_ReconBuffer_t **src_rbs, RF_ReconBuffer_t *dest_rb, int len);
void rf_nWayXor6(RF_ReconBuffer_t **src_rbs, RF_ReconBuffer_t *dest_rb, int len);
void rf_nWayXor7(RF_ReconBuffer_t **src_rbs, RF_ReconBuffer_t *dest_rb, int len);
void rf_nWayXor8(RF_ReconBuffer_t **src_rbs, RF_ReconBuffer_t *dest_rb, int len);
void rf_nWayXor9(RF_ReconBuffer_t **src_rbs, RF_ReconBuffer_t *dest_rb, int len);
#endif /* !_RF__RF_NWAYXOR_H_ */

View File

@ -0,0 +1,84 @@
/* $NetBSD: rf_options.c,v 1.1 1998/11/13 04:20:31 oster Exp $ */
/*
* rf_options.c
*/
/*
* Copyright (c) 1996 Carnegie-Mellon University.
* All rights reserved.
*
* Author: Jim Zelenka
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
#ifdef _KERNEL
#define KERNEL
#endif
#ifdef KERNEL
#ifndef __NetBSD__
#include <dfstrace.h>
#endif /* !__NetBSD__ */
#endif /* KERNEL */
#include "rf_threadstuff.h"
#include "rf_types.h"
#include "rf_archs.h"
#include "rf_general.h"
#include "rf_options.h"
#ifdef RF_DBG_OPTION
#undef RF_DBG_OPTION
#endif /* RF_DBG_OPTION */
#ifdef __STDC__
#define RF_DBG_OPTION(_option_,_defval_) long rf_##_option_ = _defval_;
#else /* __STDC__ */
#define RF_DBG_OPTION(_option_,_defval_) long rf_/**/_option_ = _defval_;
#endif /* __STDC__ */
#include "rf_optnames.h"
#undef RF_DBG_OPTION
#ifdef __STDC__
#define RF_DBG_OPTION(_option_,_defval_) { RF_STRING(_option_), &rf_##_option_ },
#else /* __STDC__ */
#define RF_DBG_OPTION(_option_,_defval_) { RF_STRING(_option_), &rf_/**/_option_ },
#endif /* __STDC__ */
RF_DebugName_t rf_debugNames[] = {
#include "rf_optnames.h"
{NULL, NULL}
};
#undef RF_DBG_OPTION
#ifdef __STDC__
#define RF_DBG_OPTION(_option_,_defval_) rf_##_option_ = _defval_ ;
#else /* __STDC__ */
#define RF_DBG_OPTION(_option_,_defval_) rf_/**/_option_ = _defval_ ;
#endif /* __STDC__ */
void rf_ResetDebugOptions()
{
#include "rf_optnames.h"
}

View File

@ -0,0 +1,67 @@
/* $NetBSD: rf_options.h,v 1.1 1998/11/13 04:20:31 oster Exp $ */
/*
* rf_options.h
*/
/*
* Copyright (c) 1996 Carnegie-Mellon University.
* All rights reserved.
*
* Author: Jim Zelenka
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
#ifndef _RF__RF_OPTIONS_H_
#define _RF__RF_OPTIONS_H_
#ifdef _KERNEL
#define KERNEL
#endif
#ifdef KERNEL
#ifndef __NetBSD__
#include <dfstrace.h>
#endif /* !__NetBSD__ */
#endif /* KERNEL */
#define RF_DEFAULT_LOCK_TABLE_SIZE 256
typedef struct RF_DebugNames_s {
char *name;
long *ptr;
} RF_DebugName_t;
extern RF_DebugName_t rf_debugNames[];
#ifdef RF_DBG_OPTION
#undef RF_DBG_OPTION
#endif /* RF_DBG_OPTION */
#ifdef __STDC__
#define RF_DBG_OPTION(_option_,_defval_) extern long rf_##_option_;
#else /* __STDC__ */
#define RF_DBG_OPTION(_option_,_defval_) extern long rf_/**/_option_;
#endif /* __STDC__ */
#include "rf_optnames.h"
void rf_ResetDebugOptions(void);
#endif /* !_RF__RF_OPTIONS_H_ */

View File

@ -0,0 +1,143 @@
/* $NetBSD: rf_optnames.h,v 1.1 1998/11/13 04:20:31 oster Exp $ */
/*
* rf_optnames.h
*/
/*
* Copyright (c) 1996 Carnegie-Mellon University.
* All rights reserved.
*
* Author: Jim Zelenka
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/*
* Don't protect against multiple inclusion here- we actually want this.
*/
#ifdef _KERNEL
#define KERNEL
#endif
RF_DBG_OPTION(accSizeKB,0) /* if nonzero, the fixed access size to run */
RF_DBG_OPTION(accessDebug,0)
RF_DBG_OPTION(accessTraceBufSize,0)
RF_DBG_OPTION(alignAccesses,0) /* whether accs should be aligned to their size */
RF_DBG_OPTION(camlayerIOs,0)
RF_DBG_OPTION(camlayerDebug,0) /* debug CAM activity */
RF_DBG_OPTION(cscanDebug,0) /* debug CSCAN sorting */
RF_DBG_OPTION(dagDebug,0)
RF_DBG_OPTION(debugPrintUseBuffer,0)
RF_DBG_OPTION(degDagDebug,0)
RF_DBG_OPTION(disableAsyncAccs,0)
RF_DBG_OPTION(diskDebug,0)
RF_DBG_OPTION(doDebug,0)
RF_DBG_OPTION(dtDebug,0)
RF_DBG_OPTION(enableAtomicRMW,0) /* this debug var enables locking of the disk
* arm during small-write operations. Setting
* this variable to anything other than 0 will
* result in deadlock. (wvcii)
*/
RF_DBG_OPTION(engineDebug,0)
RF_DBG_OPTION(fifoDebug,0) /* debug fifo queueing */
RF_DBG_OPTION(floatingRbufDebug,0)
RF_DBG_OPTION(forceHeadSepLimit,-1)
RF_DBG_OPTION(forceNumFloatingReconBufs,-1) /* wire down number of extra recon buffers to use */
RF_DBG_OPTION(keepAccTotals,0) /* turn on keep_acc_totals */
RF_DBG_OPTION(lockTableSize,RF_DEFAULT_LOCK_TABLE_SIZE)
RF_DBG_OPTION(mapDebug,0)
RF_DBG_OPTION(maxNumTraces,-1)
RF_DBG_OPTION(maxRandomSizeKB,128) /* if rf_accSizeKB==0, acc sizes are uniform in [ (1/2)..maxRandomSizeKB ] */
RF_DBG_OPTION(maxTraceRunTimeSec,0)
RF_DBG_OPTION(memAmtDebug,0) /* trace amount of memory allocated */
RF_DBG_OPTION(memChunkDebug,0)
RF_DBG_OPTION(memDebug,0)
RF_DBG_OPTION(memDebugAddress,0)
RF_DBG_OPTION(numBufsToAccumulate,1) /* number of buffers to accumulate before doing XOR */
RF_DBG_OPTION(prReconSched,0)
RF_DBG_OPTION(printDAGsDebug,0)
RF_DBG_OPTION(printStatesDebug,0)
RF_DBG_OPTION(protectedSectors,64L) /* # of sectors at start of disk to
exclude from RAID address space */
RF_DBG_OPTION(pssDebug,0)
RF_DBG_OPTION(queueDebug,0)
RF_DBG_OPTION(quiesceDebug,0)
RF_DBG_OPTION(raidSectorOffset,0) /* added to all incoming sectors to
debug alignment problems */
RF_DBG_OPTION(reconDebug,0)
RF_DBG_OPTION(reconbufferDebug,0)
RF_DBG_OPTION(rewriteParityStripes,0) /* debug flag that causes parity rewrite at startup */
RF_DBG_OPTION(scanDebug,0) /* debug SCAN sorting */
RF_DBG_OPTION(showXorCallCounts,0) /* show n-way Xor call counts */
RF_DBG_OPTION(shutdownDebug,0) /* show shutdown calls */
RF_DBG_OPTION(sizePercentage,100)
RF_DBG_OPTION(sstfDebug,0) /* turn on debugging info for sstf queueing */
RF_DBG_OPTION(stripeLockDebug,0)
RF_DBG_OPTION(suppressLocksAndLargeWrites,0)
RF_DBG_OPTION(suppressTraceDelays,0)
RF_DBG_OPTION(testDebug,0)
RF_DBG_OPTION(useMemChunks,1)
RF_DBG_OPTION(validateDAGDebug,0)
RF_DBG_OPTION(validateVisitedDebug,1) /* XXX turn to zero by default? */
RF_DBG_OPTION(verifyParityDebug,0)
RF_DBG_OPTION(warnLongIOs,0)
#ifdef KERNEL
RF_DBG_OPTION(debugKernelAccess,0) /* DoAccessKernel debugging */
#endif /* KERNEL */
#ifndef KERNEL
RF_DBG_OPTION(disableParityVerify,0) /* supress verification of parity */
RF_DBG_OPTION(interactiveScript,0) /* set as a debug option for now */
RF_DBG_OPTION(looptestShowWrites,0) /* user-level loop test write debugging */
RF_DBG_OPTION(traceDebug,0)
#endif /* !KERNEL */
#ifdef SIMULATE
RF_DBG_OPTION(addrSizePercentage,100)
RF_DBG_OPTION(diskTrace,0) /* ised to turn the timing traces on and of */
RF_DBG_OPTION(eventDebug,0)
RF_DBG_OPTION(mWactive,1500)
RF_DBG_OPTION(mWidle,625)
RF_DBG_OPTION(mWsleep,15)
RF_DBG_OPTION(mWspinup,3500)
#endif /* SIMULATE */
#if RF_INCLUDE_PARITYLOGGING > 0
RF_DBG_OPTION(forceParityLogReint,0)
RF_DBG_OPTION(numParityRegions,0) /* number of regions in the array */
RF_DBG_OPTION(numReintegrationThreads,1)
RF_DBG_OPTION(parityLogDebug,0) /* if nonzero, enables debugging of parity logging */
RF_DBG_OPTION(totalInCoreLogCapacity,1024*1024) /* target bytes available for in-core logs */
#endif /* RF_INCLUDE_PARITYLOGGING > 0 */
#if DFSTRACE > 0
RF_DBG_OPTION(DFSTraceAccesses,0)
#endif /* DFSTRACE > 0 */
#if RF_DEMO > 0
RF_DBG_OPTION(demoMeterHpos,0) /* horizontal position of meters for demo mode */
RF_DBG_OPTION(demoMeterTag,0)
RF_DBG_OPTION(demoMeterVpos,0) /* vertical position of meters for demo mode */
RF_DBG_OPTION(demoMode,0)
RF_DBG_OPTION(demoSMM,0)
RF_DBG_OPTION(demoSuppressReconInitVerify,0) /* supress initialization & verify for recon */
#endif /* RF_DEMO > 0 */

View File

@ -0,0 +1,74 @@
/* $NetBSD: rf_owner.h,v 1.1 1998/11/13 04:20:31 oster Exp $ */
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
*
* Author: Mark Holland
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/* :
* Log: rf_owner.h,v
* Revision 1.8 1996/08/20 14:36:51 jimz
* add bufLen to RF_EventCreate_t to be able to include buffer length
* when freeing buffer
*
* Revision 1.7 1996/06/02 17:31:48 jimz
* Moved a lot of global stuff into array structure, where it belongs.
* Fixed up paritylogging, pss modules in this manner. Some general
* code cleanup. Removed lots of dead code, some dead files.
*
* Revision 1.6 1996/05/24 22:17:04 jimz
* continue code + namespace cleanup
* typed a bunch of flags
*
* Revision 1.5 1996/05/23 21:46:35 jimz
* checkpoint in code cleanup (release prep)
* lots of types, function names have been fixed
*
* Revision 1.4 1995/12/01 19:44:30 root
* added copyright info
*
*/
#ifndef _RF__RF_OWNER_H_
#define _RF__RF_OWNER_H_
#include "rf_types.h"
struct RF_OwnerInfo_s {
RF_RaidAccessDesc_t *desc;
int owner;
double last_start;
int done;
int notFirst;
};
struct RF_EventCreate_s {
RF_Raid_t *raidPtr;
RF_Script_t *script;
RF_OwnerInfo_t *ownerInfo;
char *bufPtr;
int bufLen;
};
#endif /* !_RF__RF_OWNER_H_ */

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,224 @@
/* $NetBSD: rf_paritylog.h,v 1.1 1998/11/13 04:20:31 oster Exp $ */
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
*
* Author: William V. Courtright II
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/* header file for parity log
*
* :
* Log: rf_paritylog.h,v
* Revision 1.21 1996/07/17 21:00:58 jimz
* clean up timer interface, tracing
*
* Revision 1.20 1996/07/15 17:22:18 jimz
* nit-pick code cleanup
* resolve stdlib problems on DEC OSF
*
* Revision 1.19 1996/06/11 10:17:57 jimz
* definitions and run state for parity logging thread
*
* Revision 1.18 1996/06/07 21:33:04 jimz
* begin using consistent types for sector numbers,
* stripe numbers, row+col numbers, recon unit numbers
*
* Revision 1.17 1996/06/05 18:06:02 jimz
* Major code cleanup. The Great Renaming is now done.
* Better modularity. Better typing. Fixed a bunch of
* synchronization bugs. Made a lot of global stuff
* per-desc or per-array. Removed dead code.
*
* Revision 1.16 1996/06/02 17:31:48 jimz
* Moved a lot of global stuff into array structure, where it belongs.
* Fixed up paritylogging, pss modules in this manner. Some general
* code cleanup. Removed lots of dead code, some dead files.
*
* Revision 1.15 1996/05/31 22:26:54 jimz
* fix a lot of mapping problems, memory allocation problems
* found some weird lock issues, fixed 'em
* more code cleanup
*
* Revision 1.14 1996/05/30 11:29:41 jimz
* Numerous bug fixes. Stripe lock release code disagreed with the taking code
* about when stripes should be locked (I made it consistent: no parity, no lock)
* There was a lot of extra serialization of I/Os which I've removed- a lot of
* it was to calculate values for the cache code, which is no longer with us.
* More types, function, macro cleanup. Added code to properly quiesce the array
* on shutdown. Made a lot of stuff array-specific which was (bogusly) general
* before. Fixed memory allocation, freeing bugs.
*
* Revision 1.13 1996/05/23 00:33:23 jimz
* code cleanup: move all debug decls to rf_options.c, all extern
* debug decls to rf_options.h, all debug vars preceded by rf_
*
* Revision 1.12 1996/05/18 19:51:34 jimz
* major code cleanup- fix syntax, make some types consistent,
* add prototypes, clean out dead code, et cetera
*
* Revision 1.11 1995/12/06 20:54:58 wvcii
* added prototyping
*
* Revision 1.10 1995/11/30 16:05:50 wvcii
* added copyright info
*
* Revision 1.9 1995/10/07 05:09:27 wvcii
* removed #define BYTESPERSECTOR 512
*
* Revision 1.8 1995/09/06 19:27:52 wvcii
* added startTime to commonLogData
*
* Revision 1.7 1995/07/07 00:13:42 wvcii
* this version free from deadlock, fails parity verification
*
*/
#ifndef _RF__RF_PARITYLOG_H_
#define _RF__RF_PARITYLOG_H_
#include "rf_types.h"
#define RF_DEFAULT_NUM_SECTORS_PER_LOG 64
typedef int RF_RegionId_t;
typedef enum RF_ParityRecordType_e {
RF_STOP,
RF_UPDATE,
RF_OVERWRITE
} RF_ParityRecordType_t;
struct RF_CommonLogData_s {
RF_DECLARE_MUTEX(mutex) /* protects cnt */
int cnt; /* when 0, time to call wakeFunc */
RF_Raid_t *raidPtr;
/* int (*wakeFunc)(struct buf *); */
int (*wakeFunc)(RF_DagNode_t *node, int status);
void *wakeArg;
RF_AccTraceEntry_t *tracerec;
RF_Etimer_t startTime;
caddr_t bufPtr;
RF_ParityRecordType_t operation;
RF_CommonLogData_t *next;
};
struct RF_ParityLogData_s {
RF_RegionId_t regionID; /* this struct guaranteed to span a single region */
int bufOffset; /* offset from common->bufPtr */
RF_PhysDiskAddr_t diskAddress;
RF_CommonLogData_t *common; /* info shared by one or more parityLogData structs */
RF_ParityLogData_t *next;
RF_ParityLogData_t *prev;
};
struct RF_ParityLogAppendQueue_s {
RF_DECLARE_MUTEX(mutex)
};
struct RF_ParityLogRecord_s {
RF_PhysDiskAddr_t parityAddr;
RF_ParityRecordType_t operation;
};
struct RF_ParityLog_s {
RF_RegionId_t regionID;
int numRecords;
int diskOffset;
RF_ParityLogRecord_t *records;
caddr_t bufPtr;
RF_ParityLog_t *next;
};
struct RF_ParityLogQueue_s {
RF_DECLARE_MUTEX(mutex)
RF_ParityLog_t *parityLogs;
};
struct RF_RegionBufferQueue_s {
RF_DECLARE_MUTEX(mutex)
RF_DECLARE_COND(cond)
int bufferSize;
int totalBuffers; /* size of array 'buffers' */
int availableBuffers; /* num available 'buffers' */
int emptyBuffersIndex; /* stick next freed buffer here */
int availBuffersIndex; /* grab next buffer from here */
caddr_t *buffers; /* array buffers used to hold parity */
};
#define RF_PLOG_CREATED (1<<0) /* thread is created */
#define RF_PLOG_RUNNING (1<<1) /* thread is running */
#define RF_PLOG_TERMINATE (1<<2) /* thread is terminated (should exit) */
#define RF_PLOG_SHUTDOWN (1<<3) /* thread is aware and exiting/exited */
struct RF_ParityLogDiskQueue_s {
RF_DECLARE_MUTEX(mutex) /* protects all vars in this struct */
RF_DECLARE_COND(cond)
int threadState; /* is thread running, should it shutdown (see above) */
RF_ParityLog_t *flushQueue; /* list of parity logs to be flushed to log disk */
RF_ParityLog_t *reintQueue; /* list of parity logs waiting to be reintegrated */
RF_ParityLogData_t *bufHead; /* head of FIFO list of log data, waiting on a buffer */
RF_ParityLogData_t *bufTail; /* tail of FIFO list of log data, waiting on a buffer */
RF_ParityLogData_t *reintHead; /* head of FIFO list of log data, waiting on reintegration */
RF_ParityLogData_t *reintTail; /* tail of FIFO list of log data, waiting on reintegration */
RF_ParityLogData_t *logBlockHead; /* queue of work, blocked until a log is available */
RF_ParityLogData_t *logBlockTail;
RF_ParityLogData_t *reintBlockHead; /* queue of work, blocked until reintegration is complete */
RF_ParityLogData_t *reintBlockTail;
RF_CommonLogData_t *freeCommonList; /* list of unused common data structs */
RF_ParityLogData_t *freeDataList; /* list of unused log data structs */
};
struct RF_DiskMap_s {
RF_PhysDiskAddr_t parityAddr;
RF_ParityRecordType_t operation;
};
struct RF_RegionInfo_s {
RF_DECLARE_MUTEX(mutex) /* protects: diskCount, diskMap, loggingEnabled, coreLog */
RF_DECLARE_MUTEX(reintMutex) /* protects: reintInProgress */
int reintInProgress; /* flag used to suspend flushing operations */
RF_SectorCount_t capacity; /* capacity of this region in sectors */
RF_SectorNum_t regionStartAddr; /* starting disk address for this region */
RF_SectorNum_t parityStartAddr; /* starting disk address for this region */
RF_SectorCount_t numSectorsParity; /* number of parity sectors protected by this region */
RF_SectorCount_t diskCount; /* num of sectors written to this region's disk log */
RF_DiskMap_t *diskMap; /* in-core map of what's in this region's disk log */
int loggingEnabled; /* logging enable for this region */
RF_ParityLog_t *coreLog; /* in-core log for this region */
};
RF_ParityLogData_t *rf_CreateParityLogData(RF_ParityRecordType_t operation,
RF_PhysDiskAddr_t *pda, caddr_t bufPtr, RF_Raid_t *raidPtr,
int (*wakeFunc)(RF_DagNode_t *node, int status),
void *wakeArg, RF_AccTraceEntry_t *tracerec,
RF_Etimer_t startTime);
RF_ParityLogData_t *rf_SearchAndDequeueParityLogData(RF_Raid_t *raidPtr,
RF_RegionId_t regionID, RF_ParityLogData_t **head,
RF_ParityLogData_t **tail, int ignoreLocks);
void rf_ReleaseParityLogs(RF_Raid_t *raidPtr, RF_ParityLog_t *firstLog);
int rf_ParityLogAppend(RF_ParityLogData_t *logData, int finish,
RF_ParityLog_t **incomingLog, int clearReintFlag);
void rf_EnableParityLogging(RF_Raid_t *raidPtr);
#endif /* !_RF__RF_PARITYLOG_H_ */

View File

@ -0,0 +1,789 @@
/* $NetBSD: rf_paritylogDiskMgr.c,v 1.1 1998/11/13 04:20:31 oster Exp $ */
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
*
* Author: William V. Courtright II
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/* Code for flushing and reintegration operations related to parity logging.
*
* :
* Log: rf_paritylogDiskMgr.c,v
* Revision 1.25 1996/07/28 20:31:39 jimz
* i386netbsd port
* true/false fixup
*
* Revision 1.24 1996/07/27 23:36:08 jimz
* Solaris port of simulator
*
* Revision 1.23 1996/07/22 19:52:16 jimz
* switched node params to RF_DagParam_t, a union of
* a 64-bit int and a void *, for better portability
* attempted hpux port, but failed partway through for
* lack of a single C compiler capable of compiling all
* source files
*
* Revision 1.22 1996/06/11 10:17:33 jimz
* Put in thread startup/shutdown mechanism for proper synchronization
* with start and end of day routines.
*
* Revision 1.21 1996/06/09 02:36:46 jimz
* lots of little crufty cleanup- fixup whitespace
* issues, comment #ifdefs, improve typing in some
* places (esp size-related)
*
* Revision 1.20 1996/06/07 21:33:04 jimz
* begin using consistent types for sector numbers,
* stripe numbers, row+col numbers, recon unit numbers
*
* Revision 1.19 1996/06/05 18:06:02 jimz
* Major code cleanup. The Great Renaming is now done.
* Better modularity. Better typing. Fixed a bunch of
* synchronization bugs. Made a lot of global stuff
* per-desc or per-array. Removed dead code.
*
* Revision 1.18 1996/06/02 17:31:48 jimz
* Moved a lot of global stuff into array structure, where it belongs.
* Fixed up paritylogging, pss modules in this manner. Some general
* code cleanup. Removed lots of dead code, some dead files.
*
* Revision 1.17 1996/05/31 22:26:54 jimz
* fix a lot of mapping problems, memory allocation problems
* found some weird lock issues, fixed 'em
* more code cleanup
*
* Revision 1.16 1996/05/30 23:22:16 jimz
* bugfixes of serialization, timing problems
* more cleanup
*
* Revision 1.15 1996/05/30 12:59:18 jimz
* make etimer happier, more portable
*
* Revision 1.14 1996/05/30 11:29:41 jimz
* Numerous bug fixes. Stripe lock release code disagreed with the taking code
* about when stripes should be locked (I made it consistent: no parity, no lock)
* There was a lot of extra serialization of I/Os which I've removed- a lot of
* it was to calculate values for the cache code, which is no longer with us.
* More types, function, macro cleanup. Added code to properly quiesce the array
* on shutdown. Made a lot of stuff array-specific which was (bogusly) general
* before. Fixed memory allocation, freeing bugs.
*
* Revision 1.13 1996/05/27 18:56:37 jimz
* more code cleanup
* better typing
* compiles in all 3 environments
*
* Revision 1.12 1996/05/24 22:17:04 jimz
* continue code + namespace cleanup
* typed a bunch of flags
*
* Revision 1.11 1996/05/24 04:28:55 jimz
* release cleanup ckpt
*
* Revision 1.10 1996/05/23 21:46:35 jimz
* checkpoint in code cleanup (release prep)
* lots of types, function names have been fixed
*
* Revision 1.9 1996/05/23 00:33:23 jimz
* code cleanup: move all debug decls to rf_options.c, all extern
* debug decls to rf_options.h, all debug vars preceded by rf_
*
* Revision 1.8 1996/05/18 19:51:34 jimz
* major code cleanup- fix syntax, make some types consistent,
* add prototypes, clean out dead code, et cetera
*
* Revision 1.7 1995/12/12 18:10:06 jimz
* MIN -> RF_MIN, MAX -> RF_MAX, ASSERT -> RF_ASSERT
* fix 80-column brain damage in comments
*
* Revision 1.6 1995/12/06 20:58:27 wvcii
* added prototypes
*
* Revision 1.5 1995/11/30 16:06:05 wvcii
* added copyright info
*
* Revision 1.4 1995/10/09 22:41:10 wvcii
* minor bug fix
*
* Revision 1.3 1995/10/08 20:43:47 wvcii
* lots of random debugging - debugging still incomplete
*
* Revision 1.2 1995/09/07 15:52:19 jimz
* noop compile when INCLUDE_PARITYLOGGING not defined
*
* Revision 1.1 1995/09/06 19:24:44 wvcii
* Initial revision
*
*/
#include "rf_archs.h"
#if RF_INCLUDE_PARITYLOGGING > 0
#include "rf_types.h"
#include "rf_threadstuff.h"
#include "rf_mcpair.h"
#include "rf_raid.h"
#include "rf_dag.h"
#include "rf_dagfuncs.h"
#include "rf_desc.h"
#include "rf_layout.h"
#include "rf_diskqueue.h"
#include "rf_paritylog.h"
#include "rf_general.h"
#include "rf_threadid.h"
#include "rf_etimer.h"
#include "rf_paritylogging.h"
#include "rf_engine.h"
#include "rf_dagutils.h"
#include "rf_map.h"
#include "rf_parityscan.h"
#include "rf_sys.h"
#include "rf_paritylogDiskMgr.h"
static caddr_t AcquireReintBuffer(RF_RegionBufferQueue_t *);
static caddr_t AcquireReintBuffer(pool)
RF_RegionBufferQueue_t *pool;
{
caddr_t bufPtr = NULL;
/* Return a region buffer from the free list (pool).
If the free list is empty, WAIT.
BLOCKING */
RF_LOCK_MUTEX(pool->mutex);
if (pool->availableBuffers > 0) {
bufPtr = pool->buffers[pool->availBuffersIndex];
pool->availableBuffers--;
pool->availBuffersIndex++;
if (pool->availBuffersIndex == pool->totalBuffers)
pool->availBuffersIndex = 0;
RF_UNLOCK_MUTEX(pool->mutex);
}
else {
RF_PANIC(); /* should never happen in currect config, single reint */
RF_WAIT_COND(pool->cond, pool->mutex);
}
return(bufPtr);
}
static void ReleaseReintBuffer(
RF_RegionBufferQueue_t *pool,
caddr_t bufPtr)
{
/* Insert a region buffer (bufPtr) into the free list (pool).
NON-BLOCKING */
RF_LOCK_MUTEX(pool->mutex);
pool->availableBuffers++;
pool->buffers[pool->emptyBuffersIndex] = bufPtr;
pool->emptyBuffersIndex++;
if (pool->emptyBuffersIndex == pool->totalBuffers)
pool->emptyBuffersIndex = 0;
RF_ASSERT(pool->availableBuffers <= pool->totalBuffers);
RF_UNLOCK_MUTEX(pool->mutex);
RF_SIGNAL_COND(pool->cond);
}
static void ReadRegionLog(
RF_RegionId_t regionID,
RF_MCPair_t *rrd_mcpair,
caddr_t regionBuffer,
RF_Raid_t *raidPtr,
RF_DagHeader_t **rrd_dag_h,
RF_AllocListElem_t **rrd_alloclist,
RF_PhysDiskAddr_t **rrd_pda)
{
/* Initiate the read a region log from disk. Once initiated, return
to the calling routine.
NON-BLOCKING
*/
RF_AccTraceEntry_t tracerec;
RF_DagNode_t *rrd_rdNode;
/* create DAG to read region log from disk */
rf_MakeAllocList(*rrd_alloclist);
*rrd_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, regionBuffer, rf_DiskReadFunc, rf_DiskReadUndoFunc,
"Rrl", *rrd_alloclist, RF_DAG_FLAGS_NONE, RF_IO_NORMAL_PRIORITY);
/* create and initialize PDA for the core log */
/* RF_Malloc(*rrd_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *)); */
*rrd_pda = rf_AllocPDAList(1);
rf_MapLogParityLogging(raidPtr, regionID, 0, &((*rrd_pda)->row), &((*rrd_pda)->col), &((*rrd_pda)->startSector));
(*rrd_pda)->numSector = raidPtr->regionInfo[regionID].capacity;
if ((*rrd_pda)->next) {
(*rrd_pda)->next = NULL;
printf("set rrd_pda->next to NULL\n");
}
/* initialize DAG parameters */
bzero((char *)&tracerec,sizeof(tracerec));
(*rrd_dag_h)->tracerec = &tracerec;
rrd_rdNode = (*rrd_dag_h)->succedents[0]->succedents[0];
rrd_rdNode->params[0].p = *rrd_pda;
/* rrd_rdNode->params[1] = regionBuffer; */
rrd_rdNode->params[2].v = 0;
rrd_rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0);
/* launch region log read dag */
rf_DispatchDAG(*rrd_dag_h, (void (*)(void *))rf_MCPairWakeupFunc,
(void *) rrd_mcpair);
}
static void WriteCoreLog(
RF_ParityLog_t *log,
RF_MCPair_t *fwr_mcpair,
RF_Raid_t *raidPtr,
RF_DagHeader_t **fwr_dag_h,
RF_AllocListElem_t **fwr_alloclist,
RF_PhysDiskAddr_t **fwr_pda)
{
RF_RegionId_t regionID = log->regionID;
RF_AccTraceEntry_t tracerec;
RF_SectorNum_t regionOffset;
RF_DagNode_t *fwr_wrNode;
/* Initiate the write of a core log to a region log disk.
Once initiated, return to the calling routine.
NON-BLOCKING
*/
/* create DAG to write a core log to a region log disk */
rf_MakeAllocList(*fwr_alloclist);
*fwr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, log->bufPtr, rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
"Wcl", *fwr_alloclist, RF_DAG_FLAGS_NONE, RF_IO_NORMAL_PRIORITY);
/* create and initialize PDA for the region log */
/* RF_Malloc(*fwr_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *)); */
*fwr_pda = rf_AllocPDAList(1);
regionOffset = log->diskOffset;
rf_MapLogParityLogging(raidPtr, regionID, regionOffset, &((*fwr_pda)->row), &((*fwr_pda)->col), &((*fwr_pda)->startSector));
(*fwr_pda)->numSector = raidPtr->numSectorsPerLog;
/* initialize DAG parameters */
bzero((char *)&tracerec,sizeof(tracerec));
(*fwr_dag_h)->tracerec = &tracerec;
fwr_wrNode = (*fwr_dag_h)->succedents[0]->succedents[0];
fwr_wrNode->params[0].p = *fwr_pda;
/* fwr_wrNode->params[1] = log->bufPtr; */
fwr_wrNode->params[2].v = 0;
fwr_wrNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0);
/* launch the dag to write the core log to disk */
rf_DispatchDAG(*fwr_dag_h, (void (*)(void *)) rf_MCPairWakeupFunc,
(void *) fwr_mcpair);
}
static void ReadRegionParity(
RF_RegionId_t regionID,
RF_MCPair_t *prd_mcpair,
caddr_t parityBuffer,
RF_Raid_t *raidPtr,
RF_DagHeader_t **prd_dag_h,
RF_AllocListElem_t **prd_alloclist,
RF_PhysDiskAddr_t **prd_pda)
{
/* Initiate the read region parity from disk.
Once initiated, return to the calling routine.
NON-BLOCKING
*/
RF_AccTraceEntry_t tracerec;
RF_DagNode_t *prd_rdNode;
/* create DAG to read region parity from disk */
rf_MakeAllocList(*prd_alloclist);
*prd_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, NULL, rf_DiskReadFunc, rf_DiskReadUndoFunc,
"Rrp", *prd_alloclist, RF_DAG_FLAGS_NONE, RF_IO_NORMAL_PRIORITY);
/* create and initialize PDA for region parity */
/* RF_Malloc(*prd_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *)); */
*prd_pda = rf_AllocPDAList(1);
rf_MapRegionParity(raidPtr, regionID, &((*prd_pda)->row), &((*prd_pda)->col), &((*prd_pda)->startSector), &((*prd_pda)->numSector));
if (rf_parityLogDebug)
printf("[reading %d sectors of parity from region %d]\n",
(int)(*prd_pda)->numSector, regionID);
if ((*prd_pda)->next) {
(*prd_pda)->next = NULL;
printf("set prd_pda->next to NULL\n");
}
/* initialize DAG parameters */
bzero((char *)&tracerec,sizeof(tracerec));
(*prd_dag_h)->tracerec = &tracerec;
prd_rdNode = (*prd_dag_h)->succedents[0]->succedents[0];
prd_rdNode->params[0].p = *prd_pda;
prd_rdNode->params[1].p = parityBuffer;
prd_rdNode->params[2].v = 0;
prd_rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0);
if (rf_validateDAGDebug)
rf_ValidateDAG(*prd_dag_h);
/* launch region parity read dag */
rf_DispatchDAG(*prd_dag_h, (void (*)(void *)) rf_MCPairWakeupFunc,
(void *) prd_mcpair);
}
static void WriteRegionParity(
RF_RegionId_t regionID,
RF_MCPair_t *pwr_mcpair,
caddr_t parityBuffer,
RF_Raid_t *raidPtr,
RF_DagHeader_t **pwr_dag_h,
RF_AllocListElem_t **pwr_alloclist,
RF_PhysDiskAddr_t **pwr_pda)
{
/* Initiate the write of region parity to disk.
Once initiated, return to the calling routine.
NON-BLOCKING
*/
RF_AccTraceEntry_t tracerec;
RF_DagNode_t *pwr_wrNode;
/* create DAG to write region log from disk */
rf_MakeAllocList(*pwr_alloclist);
*pwr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, parityBuffer, rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
"Wrp", *pwr_alloclist, RF_DAG_FLAGS_NONE, RF_IO_NORMAL_PRIORITY);
/* create and initialize PDA for region parity */
/* RF_Malloc(*pwr_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *)); */
*pwr_pda = rf_AllocPDAList(1);
rf_MapRegionParity(raidPtr, regionID, &((*pwr_pda)->row), &((*pwr_pda)->col), &((*pwr_pda)->startSector), &((*pwr_pda)->numSector));
/* initialize DAG parameters */
bzero((char *)&tracerec,sizeof(tracerec));
(*pwr_dag_h)->tracerec = &tracerec;
pwr_wrNode = (*pwr_dag_h)->succedents[0]->succedents[0];
pwr_wrNode->params[0].p = *pwr_pda;
/* pwr_wrNode->params[1] = parityBuffer; */
pwr_wrNode->params[2].v = 0;
pwr_wrNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0);
/* launch the dag to write region parity to disk */
rf_DispatchDAG(*pwr_dag_h, (void (*)(void *))rf_MCPairWakeupFunc,
(void *) pwr_mcpair);
}
static void FlushLogsToDisk(
RF_Raid_t *raidPtr,
RF_ParityLog_t *logList)
{
/* Flush a linked list of core logs to the log disk.
Logs contain the disk location where they should be
written. Logs were written in FIFO order and that
order must be preserved.
Recommended optimizations:
1) allow multiple flushes to occur simultaneously
2) coalesce contiguous flush operations
BLOCKING
*/
RF_ParityLog_t *log;
RF_RegionId_t regionID;
RF_MCPair_t *fwr_mcpair;
RF_DagHeader_t *fwr_dag_h;
RF_AllocListElem_t *fwr_alloclist;
RF_PhysDiskAddr_t *fwr_pda;
fwr_mcpair = rf_AllocMCPair();
RF_LOCK_MUTEX(fwr_mcpair->mutex);
RF_ASSERT(logList);
log = logList;
while (log)
{
regionID = log->regionID;
/* create and launch a DAG to write the core log */
if (rf_parityLogDebug)
printf("[initiating write of core log for region %d]\n", regionID);
fwr_mcpair->flag = RF_FALSE;
WriteCoreLog(log, fwr_mcpair, raidPtr, &fwr_dag_h, &fwr_alloclist, &fwr_pda);
/* wait for the DAG to complete */
#ifndef SIMULATE
while (!fwr_mcpair->flag)
RF_WAIT_COND(fwr_mcpair->cond, fwr_mcpair->mutex);
#endif /* !SIMULATE */
if (fwr_dag_h->status != rf_enable)
{
RF_ERRORMSG1("Unable to write core log to disk (region %d)\n", regionID);
RF_ASSERT(0);
}
/* RF_Free(fwr_pda, sizeof(RF_PhysDiskAddr_t)); */
rf_FreePhysDiskAddr(fwr_pda);
rf_FreeDAG(fwr_dag_h);
rf_FreeAllocList(fwr_alloclist);
log = log->next;
}
RF_UNLOCK_MUTEX(fwr_mcpair->mutex);
rf_FreeMCPair(fwr_mcpair);
rf_ReleaseParityLogs(raidPtr, logList);
}
static void ReintegrateRegion(
RF_Raid_t *raidPtr,
RF_RegionId_t regionID,
RF_ParityLog_t *coreLog)
{
RF_MCPair_t *rrd_mcpair=NULL, *prd_mcpair, *pwr_mcpair;
RF_DagHeader_t *rrd_dag_h, *prd_dag_h, *pwr_dag_h;
RF_AllocListElem_t *rrd_alloclist, *prd_alloclist, *pwr_alloclist;
RF_PhysDiskAddr_t *rrd_pda, *prd_pda, *pwr_pda;
caddr_t parityBuffer, regionBuffer=NULL;
/* Reintegrate a region (regionID).
1. acquire region and parity buffers
2. read log from disk
3. read parity from disk
4. apply log to parity
5. apply core log to parity
6. write new parity to disk
BLOCKING
*/
if (rf_parityLogDebug)
printf("[reintegrating region %d]\n", regionID);
/* initiate read of region parity */
if (rf_parityLogDebug)
printf("[initiating read of parity for region %d]\n", regionID);
parityBuffer = AcquireReintBuffer(&raidPtr->parityBufferPool);
prd_mcpair = rf_AllocMCPair();
RF_LOCK_MUTEX(prd_mcpair->mutex);
prd_mcpair->flag = RF_FALSE;
ReadRegionParity(regionID, prd_mcpair, parityBuffer, raidPtr, &prd_dag_h, &prd_alloclist, &prd_pda);
/* if region log nonempty, initiate read */
if (raidPtr->regionInfo[regionID].diskCount > 0)
{
if (rf_parityLogDebug)
printf("[initiating read of disk log for region %d]\n", regionID);
regionBuffer = AcquireReintBuffer(&raidPtr->regionBufferPool);
rrd_mcpair = rf_AllocMCPair();
RF_LOCK_MUTEX(rrd_mcpair->mutex);
rrd_mcpair->flag = RF_FALSE;
ReadRegionLog(regionID, rrd_mcpair, regionBuffer, raidPtr, &rrd_dag_h, &rrd_alloclist, &rrd_pda);
}
/* wait on read of region parity to complete */
#ifndef SIMULATE
while (!prd_mcpair->flag) {
RF_WAIT_COND(prd_mcpair->cond, prd_mcpair->mutex);
}
#endif /* !SIMULATE */
RF_UNLOCK_MUTEX(prd_mcpair->mutex);
if (prd_dag_h->status != rf_enable)
{
RF_ERRORMSG("Unable to read parity from disk\n");
/* add code to fail the parity disk */
RF_ASSERT(0);
}
/* apply core log to parity */
/* if (coreLog)
ApplyLogsToParity(coreLog, parityBuffer); */
if (raidPtr->regionInfo[regionID].diskCount > 0)
{
/* wait on read of region log to complete */
#ifndef SIMULATE
while (!rrd_mcpair->flag)
RF_WAIT_COND(rrd_mcpair->cond, rrd_mcpair->mutex);
#endif /* !SIMULATE */
RF_UNLOCK_MUTEX(rrd_mcpair->mutex);
if (rrd_dag_h->status != rf_enable)
{
RF_ERRORMSG("Unable to read region log from disk\n");
/* add code to fail the log disk */
RF_ASSERT(0);
}
/* apply region log to parity */
/* ApplyRegionToParity(regionID, regionBuffer, parityBuffer); */
/* release resources associated with region log */
/* RF_Free(rrd_pda, sizeof(RF_PhysDiskAddr_t)); */
rf_FreePhysDiskAddr(rrd_pda);
rf_FreeDAG(rrd_dag_h);
rf_FreeAllocList(rrd_alloclist);
rf_FreeMCPair(rrd_mcpair);
ReleaseReintBuffer(&raidPtr->regionBufferPool, regionBuffer);
}
/* write reintegrated parity to disk */
if (rf_parityLogDebug)
printf("[initiating write of parity for region %d]\n", regionID);
pwr_mcpair = rf_AllocMCPair();
RF_LOCK_MUTEX(pwr_mcpair->mutex);
pwr_mcpair->flag = RF_FALSE;
WriteRegionParity(regionID, pwr_mcpair, parityBuffer, raidPtr, &pwr_dag_h, &pwr_alloclist, &pwr_pda);
#ifndef SIMULATE
while (!pwr_mcpair->flag)
RF_WAIT_COND(pwr_mcpair->cond, pwr_mcpair->mutex);
#endif /* !SIMULATE */
RF_UNLOCK_MUTEX(pwr_mcpair->mutex);
if (pwr_dag_h->status != rf_enable)
{
RF_ERRORMSG("Unable to write parity to disk\n");
/* add code to fail the parity disk */
RF_ASSERT(0);
}
/* release resources associated with read of old parity */
/* RF_Free(prd_pda, sizeof(RF_PhysDiskAddr_t)); */
rf_FreePhysDiskAddr(prd_pda);
rf_FreeDAG(prd_dag_h);
rf_FreeAllocList(prd_alloclist);
rf_FreeMCPair(prd_mcpair);
/* release resources associated with write of new parity */
ReleaseReintBuffer(&raidPtr->parityBufferPool, parityBuffer);
/* RF_Free(pwr_pda, sizeof(RF_PhysDiskAddr_t)); */
rf_FreePhysDiskAddr(pwr_pda);
rf_FreeDAG(pwr_dag_h);
rf_FreeAllocList(pwr_alloclist);
rf_FreeMCPair(pwr_mcpair);
if (rf_parityLogDebug)
printf("[finished reintegrating region %d]\n", regionID);
}
static void ReintegrateLogs(
RF_Raid_t *raidPtr,
RF_ParityLog_t *logList)
{
RF_ParityLog_t *log, *freeLogList = NULL;
RF_ParityLogData_t *logData, *logDataList;
RF_RegionId_t regionID;
RF_ASSERT(logList);
while (logList)
{
log = logList;
logList = logList->next;
log->next = NULL;
regionID = log->regionID;
ReintegrateRegion(raidPtr, regionID, log);
log->numRecords = 0;
/* remove all items which are blocked on reintegration of this region */
RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
logData = rf_SearchAndDequeueParityLogData(raidPtr, regionID, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail, RF_TRUE);
logDataList = logData;
while (logData)
{
logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail, RF_TRUE);
logData = logData->next;
}
RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
/* process blocked log data and clear reintInProgress flag for this region */
if (logDataList)
rf_ParityLogAppend(logDataList, RF_TRUE, &log, RF_TRUE);
else
{
/* Enable flushing for this region. Holding both locks provides
a synchronization barrier with DumpParityLogToDisk
*/
RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
raidPtr->regionInfo[regionID].diskCount = 0;
raidPtr->regionInfo[regionID].reintInProgress = RF_FALSE;
RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); /* flushing is now enabled */
RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
}
/* if log wasn't used, attach it to the list of logs to be returned */
if (log)
{
log->next = freeLogList;
freeLogList = log;
}
}
if (freeLogList)
rf_ReleaseParityLogs(raidPtr, freeLogList);
}
int rf_ShutdownLogging(RF_Raid_t *raidPtr)
{
/* shutdown parity logging
1) disable parity logging in all regions
2) reintegrate all regions
*/
RF_SectorCount_t diskCount;
RF_RegionId_t regionID;
RF_ParityLog_t *log;
if (rf_parityLogDebug)
printf("[shutting down parity logging]\n");
/* Since parity log maps are volatile, we must reintegrate all regions. */
if (rf_forceParityLogReint) {
for (regionID = 0; regionID < rf_numParityRegions; regionID++)
{
RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
raidPtr->regionInfo[regionID].loggingEnabled = RF_FALSE;
log = raidPtr->regionInfo[regionID].coreLog;
raidPtr->regionInfo[regionID].coreLog = NULL;
diskCount = raidPtr->regionInfo[regionID].diskCount;
RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
if (diskCount > 0 || log != NULL)
ReintegrateRegion(raidPtr, regionID, log);
if (log != NULL)
rf_ReleaseParityLogs(raidPtr, log);
}
}
if (rf_parityLogDebug)
{
printf("[parity logging disabled]\n");
printf("[should be done!]\n");
}
return(0);
}
int rf_ParityLoggingDiskManager(RF_Raid_t *raidPtr)
{
RF_ParityLog_t *reintQueue, *flushQueue;
int workNeeded, done = RF_FALSE;
rf_assign_threadid(); /* don't remove this line */
/* Main program for parity logging disk thread. This routine waits
for work to appear in either the flush or reintegration queues
and is responsible for flushing core logs to the log disk as
well as reintegrating parity regions.
BLOCKING
*/
RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
/*
* Inform our creator that we're running. Don't bother doing the
* mutex lock/unlock dance- we locked above, and we'll unlock
* below with nothing to do, yet.
*/
raidPtr->parityLogDiskQueue.threadState |= RF_PLOG_RUNNING;
RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
/* empty the work queues */
flushQueue = raidPtr->parityLogDiskQueue.flushQueue; raidPtr->parityLogDiskQueue.flushQueue = NULL;
reintQueue = raidPtr->parityLogDiskQueue.reintQueue; raidPtr->parityLogDiskQueue.reintQueue = NULL;
workNeeded = (flushQueue || reintQueue);
while (!done)
{
while (workNeeded)
{
/* First, flush all logs in the flush queue, freeing buffers
Second, reintegrate all regions which are reported as full.
Third, append queued log data until blocked.
Note: Incoming appends (ParityLogAppend) can block on either
1. empty buffer pool
2. region under reintegration
To preserve a global FIFO ordering of appends, buffers are not
released to the world until those appends blocked on buffers are
removed from the append queue. Similarly, regions which are
reintegrated are not opened for general use until the append
queue has been emptied.
*/
RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
/* empty flushQueue, using free'd log buffers to process bufTail */
if (flushQueue)
FlushLogsToDisk(raidPtr, flushQueue);
/* empty reintQueue, flushing from reintTail as we go */
if (reintQueue)
ReintegrateLogs(raidPtr, reintQueue);
RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
flushQueue = raidPtr->parityLogDiskQueue.flushQueue; raidPtr->parityLogDiskQueue.flushQueue = NULL;
reintQueue = raidPtr->parityLogDiskQueue.reintQueue; raidPtr->parityLogDiskQueue.reintQueue = NULL;
workNeeded = (flushQueue || reintQueue);
}
/* no work is needed at this point */
if (raidPtr->parityLogDiskQueue.threadState&RF_PLOG_TERMINATE)
{
/* shutdown parity logging
1. disable parity logging in all regions
2. reintegrate all regions
*/
done = RF_TRUE; /* thread disabled, no work needed */
RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
rf_ShutdownLogging(raidPtr);
}
if (!done)
{
/* thread enabled, no work needed, so sleep */
if (rf_parityLogDebug)
printf("[parity logging disk manager sleeping]\n");
RF_WAIT_COND(raidPtr->parityLogDiskQueue.cond, raidPtr->parityLogDiskQueue.mutex);
if (rf_parityLogDebug)
printf("[parity logging disk manager just woke up]\n");
flushQueue = raidPtr->parityLogDiskQueue.flushQueue; raidPtr->parityLogDiskQueue.flushQueue = NULL;
reintQueue = raidPtr->parityLogDiskQueue.reintQueue; raidPtr->parityLogDiskQueue.reintQueue = NULL;
workNeeded = (flushQueue || reintQueue);
}
}
/*
* Announce that we're done.
*/
RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
raidPtr->parityLogDiskQueue.threadState |= RF_PLOG_SHUTDOWN;
RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
#if defined(__NetBSD__) && defined(_KERNEL)
/*
* In the NetBSD kernel, the thread must exit; returning would
* cause the proc trampoline to attempt to return to userspace.
*/
kthread_exit(0); /* does not return */
#else
return(0);
#endif
}
#endif /* RF_INCLUDE_PARITYLOGGING > 0 */

View File

@ -0,0 +1,62 @@
/* $NetBSD: rf_paritylogDiskMgr.h,v 1.1 1998/11/13 04:20:32 oster Exp $ */
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
*
* Author: William V. Courtright II
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/* header file for parity log disk mgr code
*
* :
* Log: rf_paritylogDiskMgr.h,v
* Revision 1.5 1996/06/02 17:31:48 jimz
* Moved a lot of global stuff into array structure, where it belongs.
* Fixed up paritylogging, pss modules in this manner. Some general
* code cleanup. Removed lots of dead code, some dead files.
*
* Revision 1.4 1996/05/23 21:46:35 jimz
* checkpoint in code cleanup (release prep)
* lots of types, function names have been fixed
*
* Revision 1.3 1995/12/06 20:56:39 wvcii
* added prototypes
*
* Revision 1.2 1995/11/30 16:06:21 wvcii
* added copyright info
*
* Revision 1.1 1995/09/06 19:25:29 wvcii
* Initial revision
*
*
*/
#ifndef _RF__RF_PARITYLOGDISKMGR_H_
#define _RF__RF_PARITYLOGDISKMGR_H_
#include "rf_types.h"
int rf_ShutdownLogging(RF_Raid_t *raidPtr);
int rf_ParityLoggingDiskManager(RF_Raid_t *raidPtr);
#endif /* !_RF__RF_PARITYLOGDISKMGR_H_ */

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,136 @@
/* $NetBSD: rf_paritylogging.h,v 1.1 1998/11/13 04:20:32 oster Exp $ */
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
*
* Author: William V. Courtright II
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/* header file for Parity Logging */
/*
* :
* Log: rf_paritylogging.h,v
* Revision 1.22 1996/07/27 23:36:08 jimz
* Solaris port of simulator
*
* Revision 1.21 1996/07/13 00:00:59 jimz
* sanitized generalized reconstruction architecture
* cleaned up head sep, rbuf problems
*
* Revision 1.20 1996/06/10 11:55:47 jimz
* Straightened out some per-array/not-per-array distinctions, fixed
* a couple bugs related to confusion. Added shutdown lists. Removed
* layout shutdown function (now subsumed by shutdown lists).
*
* Revision 1.19 1996/06/07 22:26:27 jimz
* type-ify which_ru (RF_ReconUnitNum_t)
*
* Revision 1.18 1996/06/07 21:33:04 jimz
* begin using consistent types for sector numbers,
* stripe numbers, row+col numbers, recon unit numbers
*
* Revision 1.17 1996/06/03 23:28:26 jimz
* more bugfixes
* check in tree to sync for IPDS runs with current bugfixes
* there still may be a problem with threads in the script test
* getting I/Os stuck- not trivially reproducible (runs ~50 times
* in a row without getting stuck)
*
* Revision 1.16 1996/06/02 17:31:48 jimz
* Moved a lot of global stuff into array structure, where it belongs.
* Fixed up paritylogging, pss modules in this manner. Some general
* code cleanup. Removed lots of dead code, some dead files.
*
* Revision 1.15 1996/05/31 22:26:54 jimz
* fix a lot of mapping problems, memory allocation problems
* found some weird lock issues, fixed 'em
* more code cleanup
*
* Revision 1.14 1996/05/27 18:56:37 jimz
* more code cleanup
* better typing
* compiles in all 3 environments
*
* Revision 1.13 1996/05/24 01:59:45 jimz
* another checkpoint in code cleanup for release
* time to sync kernel tree
*
* Revision 1.12 1996/05/18 19:51:34 jimz
* major code cleanup- fix syntax, make some types consistent,
* add prototypes, clean out dead code, et cetera
*
* Revision 1.11 1995/12/06 20:56:25 wvcii
* added prototypes
*
* Revision 1.10 1995/11/30 16:06:58 wvcii
* added copyright info
*
* Revision 1.9 1995/11/17 19:53:08 wvcii
* fixed bug in MapParityRegion prototype
*
* Revision 1.8 1995/11/17 19:09:24 wvcii
* added prototypint to MapParity
*
* Revision 1.7 1995/11/07 15:28:17 wvcii
* changed ParityLoggingDagSelect prototype
* function no longer generates numHdrSucc, numTermAnt
*
* Revision 1.6 1995/07/07 00:16:50 wvcii
* this version free from deadlock, fails parity verification
*
* Revision 1.5 1995/06/23 13:39:44 robby
* updeated to prototypes in rf_layout.h
*
*/
#ifndef _RF__RF_PARITYLOGGING_H_
#define _RF__RF_PARITYLOGGING_H_
int rf_ConfigureParityLogging(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
RF_Config_t *cfgPtr);
int rf_GetDefaultNumFloatingReconBuffersParityLogging(RF_Raid_t *raidPtr);
RF_HeadSepLimit_t rf_GetDefaultHeadSepLimitParityLogging(RF_Raid_t *raidPtr);
RF_RegionId_t rf_MapRegionIDParityLogging(RF_Raid_t *raidPtr,
RF_SectorNum_t address);
void rf_MapSectorParityLogging(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector,
RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector,
int remap);
void rf_MapParityParityLogging(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector,
RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector,
int remap);
void rf_MapLogParityLogging(RF_Raid_t *raidPtr, RF_RegionId_t regionID,
RF_SectorNum_t regionOffset, RF_RowCol_t *row, RF_RowCol_t *col,
RF_SectorNum_t *startSector);
void rf_MapRegionParity(RF_Raid_t *raidPtr, RF_RegionId_t regionID,
RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *startSector,
RF_SectorCount_t *numSector);
void rf_IdentifyStripeParityLogging(RF_Raid_t *raidPtr, RF_RaidAddr_t addr,
RF_RowCol_t **diskids, RF_RowCol_t *outRow);
void rf_MapSIDToPSIDParityLogging(RF_RaidLayout_t *layoutPtr,
RF_StripeNum_t stripeID, RF_StripeNum_t *psID,
RF_ReconUnitNum_t *which_ru);
void rf_ParityLoggingDagSelect(RF_Raid_t *raidPtr, RF_IoType_t type,
RF_AccessStripeMap_t *asmap, RF_VoidFuncPtr *createFunc);
#endif /* !_RF__RF_PARITYLOGGING_H_ */

View File

@ -0,0 +1,751 @@
/* $NetBSD: rf_parityloggingdags.c,v 1.1 1998/11/13 04:20:32 oster Exp $ */
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
*
* Author: William V. Courtright II
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/*
* Log: rf_parityloggingdags.c,v
* Revision 1.27 1996/07/28 20:31:39 jimz
* i386netbsd port
* true/false fixup
*
* Revision 1.26 1996/07/27 23:36:08 jimz
* Solaris port of simulator
*
* Revision 1.25 1996/07/22 19:52:16 jimz
* switched node params to RF_DagParam_t, a union of
* a 64-bit int and a void *, for better portability
* attempted hpux port, but failed partway through for
* lack of a single C compiler capable of compiling all
* source files
*
* Revision 1.24 1996/06/11 13:47:21 jimz
* fix up for in-kernel compilation
*
* Revision 1.23 1996/06/07 22:26:27 jimz
* type-ify which_ru (RF_ReconUnitNum_t)
*
* Revision 1.22 1996/06/07 21:33:04 jimz
* begin using consistent types for sector numbers,
* stripe numbers, row+col numbers, recon unit numbers
*
* Revision 1.21 1996/06/02 17:31:48 jimz
* Moved a lot of global stuff into array structure, where it belongs.
* Fixed up paritylogging, pss modules in this manner. Some general
* code cleanup. Removed lots of dead code, some dead files.
*
* Revision 1.20 1996/05/31 22:26:54 jimz
* fix a lot of mapping problems, memory allocation problems
* found some weird lock issues, fixed 'em
* more code cleanup
*
* Revision 1.19 1996/05/30 11:29:41 jimz
* Numerous bug fixes. Stripe lock release code disagreed with the taking code
* about when stripes should be locked (I made it consistent: no parity, no lock)
* There was a lot of extra serialization of I/Os which I've removed- a lot of
* it was to calculate values for the cache code, which is no longer with us.
* More types, function, macro cleanup. Added code to properly quiesce the array
* on shutdown. Made a lot of stuff array-specific which was (bogusly) general
* before. Fixed memory allocation, freeing bugs.
*
* Revision 1.18 1996/05/27 18:56:37 jimz
* more code cleanup
* better typing
* compiles in all 3 environments
*
* Revision 1.17 1996/05/24 22:17:04 jimz
* continue code + namespace cleanup
* typed a bunch of flags
*
* Revision 1.16 1996/05/24 04:28:55 jimz
* release cleanup ckpt
*
* Revision 1.15 1996/05/23 21:46:35 jimz
* checkpoint in code cleanup (release prep)
* lots of types, function names have been fixed
*
* Revision 1.14 1996/05/23 00:33:23 jimz
* code cleanup: move all debug decls to rf_options.c, all extern
* debug decls to rf_options.h, all debug vars preceded by rf_
*
* Revision 1.13 1996/05/18 19:51:34 jimz
* major code cleanup- fix syntax, make some types consistent,
* add prototypes, clean out dead code, et cetera
*
* Revision 1.12 1996/05/08 21:01:24 jimz
* fixed up enum type names that were conflicting with other
* enums and function names (ie, "panic")
* future naming trends will be towards RF_ and rf_ for
* everything raidframe-related
*
* Revision 1.11 1996/05/03 19:42:02 wvcii
* added includes for dag library
*
* Revision 1.10 1995/12/12 18:10:06 jimz
* MIN -> RF_MIN, MAX -> RF_MAX, ASSERT -> RF_ASSERT
* fix 80-column brain damage in comments
*
* Revision 1.9 1995/12/06 20:55:24 wvcii
* added prototyping
* fixed bug in dag header numSuccedents count for both small and large dags
*
* Revision 1.8 1995/11/30 16:08:01 wvcii
* added copyright info
*
* Revision 1.7 1995/11/07 15:29:05 wvcii
* reorganized code, adding comments and asserts
* dag creation routines now generate term node
* encoded commit point, barrier, and antecedence types into dags
*
* Revision 1.6 1995/09/07 15:52:06 jimz
* noop compile when INCLUDE_PARITYLOGGING not defined
*
* Revision 1.5 1995/06/15 13:51:53 robby
* updated some wrong prototypes (after prototyping rf_dagutils.h)
*
* Revision 1.4 1995/06/09 13:15:05 wvcii
* code is now nonblocking
*
* Revision 1.3 95/05/31 13:09:14 wvcii
* code debug
*
* Revision 1.2 1995/05/21 15:34:14 wvcii
* code debug
*
* Revision 1.1 95/05/16 14:36:53 wvcii
* Initial revision
*
*
*/
#include "rf_archs.h"
#if RF_INCLUDE_PARITYLOGGING > 0
/*
DAGs specific to parity logging are created here
*/
#include "rf_types.h"
#include "rf_raid.h"
#include "rf_dag.h"
#include "rf_dagutils.h"
#include "rf_dagfuncs.h"
#include "rf_threadid.h"
#include "rf_debugMem.h"
#include "rf_paritylog.h"
#include "rf_memchunk.h"
#include "rf_general.h"
#include "rf_parityloggingdags.h"
/******************************************************************************
*
* creates a DAG to perform a large-write operation:
*
* / Rod \ / Wnd \
* H -- NIL- Rod - NIL - Wnd ------ NIL - T
* \ Rod / \ Xor - Lpo /
*
* The writes are not done until the reads complete because if they were done in
* parallel, a failure on one of the reads could leave the parity in an inconsistent
* state, so that the retry with a new DAG would produce erroneous parity.
*
* Note: this DAG has the nasty property that none of the buffers allocated for reading
* old data can be freed until the XOR node fires. Need to fix this.
*
* The last two arguments are the number of faults tolerated, and function for the
* redundancy calculation. The undo for the redundancy calc is assumed to be null
*
*****************************************************************************/
void rf_CommonCreateParityLoggingLargeWriteDAG(
RF_Raid_t *raidPtr,
RF_AccessStripeMap_t *asmap,
RF_DagHeader_t *dag_h,
void *bp,
RF_RaidAccessFlags_t flags,
RF_AllocListElem_t *allocList,
int nfaults,
int (*redFunc)(RF_DagNode_t *))
{
RF_DagNode_t *nodes, *wndNodes, *rodNodes=NULL, *syncNode, *xorNode, *lpoNode, *blockNode, *unblockNode, *termNode;
int nWndNodes, nRodNodes, i;
RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
RF_AccessStripeMapHeader_t *new_asm_h[2];
int nodeNum, asmNum;
RF_ReconUnitNum_t which_ru;
char *sosBuffer, *eosBuffer;
RF_PhysDiskAddr_t *pda;
RF_StripeNum_t parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout), asmap->raidAddress, &which_ru);
if (rf_dagDebug)
printf("[Creating parity-logging large-write DAG]\n");
RF_ASSERT(nfaults == 1); /* this arch only single fault tolerant */
dag_h->creator = "ParityLoggingLargeWriteDAG";
/* alloc the Wnd nodes, the xor node, and the Lpo node */
nWndNodes = asmap->numStripeUnitsAccessed;
RF_CallocAndAdd(nodes, nWndNodes + 6, sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList);
i = 0;
wndNodes = &nodes[i]; i += nWndNodes;
xorNode = &nodes[i]; i += 1;
lpoNode = &nodes[i]; i += 1;
blockNode = &nodes[i]; i += 1;
syncNode = &nodes[i]; i += 1;
unblockNode = &nodes[i]; i += 1;
termNode = &nodes[i]; i += 1;
dag_h->numCommitNodes = nWndNodes + 1;
dag_h->numCommits = 0;
dag_h->numSuccedents = 1;
rf_MapUnaccessedPortionOfStripe(raidPtr, layoutPtr, asmap, dag_h, new_asm_h, &nRodNodes, &sosBuffer, &eosBuffer, allocList);
if (nRodNodes > 0)
RF_CallocAndAdd(rodNodes, nRodNodes, sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList);
/* begin node initialization */
rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, nRodNodes + 1, 0, 0, 0, dag_h, "Nil", allocList);
rf_InitNode(unblockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, 1, nWndNodes + 1, 0, 0, dag_h, "Nil", allocList);
rf_InitNode(syncNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, nWndNodes + 1, nRodNodes + 1, 0, 0, dag_h, "Nil", allocList);
rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, dag_h, "Trm", allocList);
/* initialize the Rod nodes */
for (nodeNum = asmNum = 0; asmNum < 2; asmNum++) {
if (new_asm_h[asmNum]) {
pda = new_asm_h[asmNum]->stripeMap->physInfo;
while (pda) {
rf_InitNode(&rodNodes[nodeNum], rf_wait, RF_FALSE, rf_DiskReadFunc,rf_DiskReadUndoFunc,rf_GenericWakeupFunc,1,1,4,0, dag_h, "Rod", allocList);
rodNodes[nodeNum].params[0].p = pda;
rodNodes[nodeNum].params[1].p = pda->bufPtr;
rodNodes[nodeNum].params[2].v = parityStripeID;
rodNodes[nodeNum].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
nodeNum++;
pda=pda->next;
}
}
}
RF_ASSERT(nodeNum == nRodNodes);
/* initialize the wnd nodes */
pda = asmap->physInfo;
for (i=0; i < nWndNodes; i++) {
rf_InitNode(&wndNodes[i], rf_wait, RF_TRUE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wnd", allocList);
RF_ASSERT(pda != NULL);
wndNodes[i].params[0].p = pda;
wndNodes[i].params[1].p = pda->bufPtr;
wndNodes[i].params[2].v = parityStripeID;
wndNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
pda = pda->next;
}
/* initialize the redundancy node */
rf_InitNode(xorNode, rf_wait, RF_TRUE, redFunc, rf_NullNodeUndoFunc, NULL, 1, 1, 2*(nWndNodes+nRodNodes)+1, 1, dag_h, "Xr ", allocList);
xorNode->flags |= RF_DAGNODE_FLAG_YIELD;
for (i=0; i < nWndNodes; i++) {
xorNode->params[2*i+0] = wndNodes[i].params[0]; /* pda */
xorNode->params[2*i+1] = wndNodes[i].params[1]; /* buf ptr */
}
for (i=0; i < nRodNodes; i++) {
xorNode->params[2*(nWndNodes+i)+0] = rodNodes[i].params[0]; /* pda */
xorNode->params[2*(nWndNodes+i)+1] = rodNodes[i].params[1]; /* buf ptr */
}
xorNode->params[2*(nWndNodes+nRodNodes)].p = raidPtr; /* xor node needs to get at RAID information */
/* look for an Rod node that reads a complete SU. If none, alloc a buffer to receive the parity info.
* Note that we can't use a new data buffer because it will not have gotten written when the xor occurs.
*/
for (i = 0; i < nRodNodes; i++)
if (((RF_PhysDiskAddr_t *) rodNodes[i].params[0].p)->numSector == raidPtr->Layout.sectorsPerStripeUnit)
break;
if (i == nRodNodes) {
RF_CallocAndAdd(xorNode->results[0], 1, rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit), (void *), allocList);
}
else {
xorNode->results[0] = rodNodes[i].params[1].p;
}
/* initialize the Lpo node */
rf_InitNode(lpoNode, rf_wait, RF_FALSE, rf_ParityLogOverwriteFunc, rf_ParityLogOverwriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 2, 0, dag_h, "Lpo", allocList);
lpoNode->params[0].p = asmap->parityInfo;
lpoNode->params[1].p = xorNode->results[0];
RF_ASSERT(asmap->parityInfo->next == NULL); /* parityInfo must describe entire parity unit */
/* connect nodes to form graph */
/* connect dag header to block node */
RF_ASSERT(dag_h->numSuccedents == 1);
RF_ASSERT(blockNode->numAntecedents == 0);
dag_h->succedents[0] = blockNode;
/* connect the block node to the Rod nodes */
RF_ASSERT(blockNode->numSuccedents == nRodNodes + 1);
for (i = 0; i < nRodNodes; i++) {
RF_ASSERT(rodNodes[i].numAntecedents == 1);
blockNode->succedents[i] = &rodNodes[i];
rodNodes[i].antecedents[0] = blockNode;
rodNodes[i].antType[0] = rf_control;
}
/* connect the block node to the sync node */
/* necessary if nRodNodes == 0 */
RF_ASSERT(syncNode->numAntecedents == nRodNodes + 1);
blockNode->succedents[nRodNodes] = syncNode;
syncNode->antecedents[0] = blockNode;
syncNode->antType[0] = rf_control;
/* connect the Rod nodes to the syncNode */
for (i = 0; i < nRodNodes; i++) {
rodNodes[i].succedents[0] = syncNode;
syncNode->antecedents[1 + i] = &rodNodes[i];
syncNode->antType[1 + i] = rf_control;
}
/* connect the sync node to the xor node */
RF_ASSERT(syncNode->numSuccedents == nWndNodes + 1);
RF_ASSERT(xorNode->numAntecedents == 1);
syncNode->succedents[0] = xorNode;
xorNode->antecedents[0] = syncNode;
xorNode->antType[0] = rf_trueData; /* carry forward from sync */
/* connect the sync node to the Wnd nodes */
for (i = 0; i < nWndNodes; i++) {
RF_ASSERT(wndNodes->numAntecedents == 1);
syncNode->succedents[1 + i] = &wndNodes[i];
wndNodes[i].antecedents[0] = syncNode;
wndNodes[i].antType[0] = rf_control;
}
/* connect the xor node to the Lpo node */
RF_ASSERT(xorNode->numSuccedents == 1);
RF_ASSERT(lpoNode->numAntecedents == 1);
xorNode->succedents[0] = lpoNode;
lpoNode->antecedents[0]= xorNode;
lpoNode->antType[0] = rf_trueData;
/* connect the Wnd nodes to the unblock node */
RF_ASSERT(unblockNode->numAntecedents == nWndNodes + 1);
for (i = 0; i < nWndNodes; i++) {
RF_ASSERT(wndNodes->numSuccedents == 1);
wndNodes[i].succedents[0] = unblockNode;
unblockNode->antecedents[i] = &wndNodes[i];
unblockNode->antType[i] = rf_control;
}
/* connect the Lpo node to the unblock node */
RF_ASSERT(lpoNode->numSuccedents == 1);
lpoNode->succedents[0] = unblockNode;
unblockNode->antecedents[nWndNodes] = lpoNode;
unblockNode->antType[nWndNodes] = rf_control;
/* connect unblock node to terminator */
RF_ASSERT(unblockNode->numSuccedents == 1);
RF_ASSERT(termNode->numAntecedents == 1);
RF_ASSERT(termNode->numSuccedents == 0);
unblockNode->succedents[0] = termNode;
termNode->antecedents[0] = unblockNode;
termNode->antType[0] = rf_control;
}
/******************************************************************************
*
* creates a DAG to perform a small-write operation (either raid 5 or pq), which is as follows:
*
* Header
* |
* Block
* / | ... \ \
* / | \ \
* Rod Rod Rod Rop
* | \ /| \ / | \/ |
* | | | /\ |
* Wnd Wnd Wnd X
* | \ / |
* | \ / |
* \ \ / Lpo
* \ \ / /
* +-> Unblock <-+
* |
* T
*
*
* R = Read, W = Write, X = Xor, o = old, n = new, d = data, p = parity.
* When the access spans a stripe unit boundary and is less than one SU in size, there will
* be two Rop -- X -- Wnp branches. I call this the "double-XOR" case.
* The second output from each Rod node goes to the X node. In the double-XOR
* case, there are exactly 2 Rod nodes, and each sends one output to one X node.
* There is one Rod -- Wnd -- T branch for each stripe unit being updated.
*
* The block and unblock nodes are unused. See comment above CreateFaultFreeReadDAG.
*
* Note: this DAG ignores all the optimizations related to making the RMWs atomic.
* it also has the nasty property that none of the buffers allocated for reading
* old data & parity can be freed until the XOR node fires. Need to fix this.
*
* A null qfuncs indicates single fault tolerant
*****************************************************************************/
void rf_CommonCreateParityLoggingSmallWriteDAG(
RF_Raid_t *raidPtr,
RF_AccessStripeMap_t *asmap,
RF_DagHeader_t *dag_h,
void *bp,
RF_RaidAccessFlags_t flags,
RF_AllocListElem_t *allocList,
RF_RedFuncs_t *pfuncs,
RF_RedFuncs_t *qfuncs)
{
RF_DagNode_t *xorNodes, *blockNode, *unblockNode, *nodes;
RF_DagNode_t *readDataNodes, *readParityNodes;
RF_DagNode_t *writeDataNodes, *lpuNodes;
RF_DagNode_t *unlockDataNodes=NULL, *termNode;
RF_PhysDiskAddr_t *pda = asmap->physInfo;
int numDataNodes = asmap->numStripeUnitsAccessed;
int numParityNodes = (asmap->parityInfo->next) ? 2 : 1;
int i, j, nNodes, totalNumNodes;
RF_ReconUnitNum_t which_ru;
int (*func)(RF_DagNode_t *node), (*undoFunc)(RF_DagNode_t *node);
int (*qfunc)(RF_DagNode_t *node);
char *name, *qname;
RF_StripeNum_t parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout), asmap->raidAddress, &which_ru);
long nfaults = qfuncs ? 2 : 1;
int lu_flag = (rf_enableAtomicRMW) ? 1 : 0; /* lock/unlock flag */
if (rf_dagDebug) printf("[Creating parity-logging small-write DAG]\n");
RF_ASSERT(numDataNodes > 0);
RF_ASSERT(nfaults == 1);
dag_h->creator = "ParityLoggingSmallWriteDAG";
/* DAG creation occurs in three steps:
1. count the number of nodes in the DAG
2. create the nodes
3. initialize the nodes
4. connect the nodes
*/
/* Step 1. compute number of nodes in the graph */
/* number of nodes:
a read and write for each data unit
a redundancy computation node for each parity node
a read and Lpu for each parity unit
a block and unblock node (2)
a terminator node
if atomic RMW
an unlock node for each data unit, redundancy unit
*/
totalNumNodes = (2 * numDataNodes) + numParityNodes + (2 * numParityNodes) + 3;
if (lu_flag)
totalNumNodes += numDataNodes;
nNodes = numDataNodes + numParityNodes;
dag_h->numCommitNodes = numDataNodes + numParityNodes;
dag_h->numCommits = 0;
dag_h->numSuccedents = 1;
/* Step 2. create the nodes */
RF_CallocAndAdd(nodes, totalNumNodes, sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList);
i = 0;
blockNode = &nodes[i]; i += 1;
unblockNode = &nodes[i]; i += 1;
readDataNodes = &nodes[i]; i += numDataNodes;
readParityNodes = &nodes[i]; i += numParityNodes;
writeDataNodes = &nodes[i]; i += numDataNodes;
lpuNodes = &nodes[i]; i += numParityNodes;
xorNodes = &nodes[i]; i += numParityNodes;
termNode = &nodes[i]; i += 1;
if (lu_flag) {
unlockDataNodes = &nodes[i]; i += numDataNodes;
}
RF_ASSERT(i == totalNumNodes);
/* Step 3. initialize the nodes */
/* initialize block node (Nil) */
rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, nNodes, 0, 0, 0, dag_h, "Nil", allocList);
/* initialize unblock node (Nil) */
rf_InitNode(unblockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, 1, nNodes, 0, 0, dag_h, "Nil", allocList);
/* initialize terminatory node (Trm) */
rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, dag_h, "Trm", allocList);
/* initialize nodes which read old data (Rod) */
for (i = 0; i < numDataNodes; i++) {
rf_InitNode(&readDataNodes[i], rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, rf_GenericWakeupFunc, nNodes, 1, 4, 0, dag_h, "Rod", allocList);
RF_ASSERT(pda != NULL);
readDataNodes[i].params[0].p = pda; /* physical disk addr desc */
readDataNodes[i].params[1].p = rf_AllocBuffer(raidPtr, dag_h, pda, allocList); /* buffer to hold old data */
readDataNodes[i].params[2].v = parityStripeID;
readDataNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, lu_flag, 0, which_ru);
pda=pda->next;
readDataNodes[i].propList[0] = NULL;
readDataNodes[i].propList[1] = NULL;
}
/* initialize nodes which read old parity (Rop) */
pda = asmap->parityInfo; i = 0;
for (i = 0; i < numParityNodes; i++) {
RF_ASSERT(pda != NULL);
rf_InitNode(&readParityNodes[i], rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, rf_GenericWakeupFunc, nNodes, 1, 4, 0, dag_h, "Rop", allocList);
readParityNodes[i].params[0].p = pda;
readParityNodes[i].params[1].p = rf_AllocBuffer(raidPtr, dag_h, pda, allocList); /* buffer to hold old parity */
readParityNodes[i].params[2].v = parityStripeID;
readParityNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
readParityNodes[i].propList[0] = NULL;
pda=pda->next;
}
/* initialize nodes which write new data (Wnd) */
pda = asmap->physInfo;
for (i=0; i < numDataNodes; i++) {
RF_ASSERT(pda != NULL);
rf_InitNode(&writeDataNodes[i], rf_wait, RF_TRUE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, nNodes, 4, 0, dag_h, "Wnd", allocList);
writeDataNodes[i].params[0].p = pda; /* physical disk addr desc */
writeDataNodes[i].params[1].p = pda->bufPtr; /* buffer holding new data to be written */
writeDataNodes[i].params[2].v = parityStripeID;
writeDataNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
if (lu_flag) {
/* initialize node to unlock the disk queue */
rf_InitNode(&unlockDataNodes[i], rf_wait, RF_FALSE, rf_DiskUnlockFunc, rf_DiskUnlockUndoFunc, rf_GenericWakeupFunc, 1, 1, 2, 0, dag_h, "Und", allocList);
unlockDataNodes[i].params[0].p = pda; /* physical disk addr desc */
unlockDataNodes[i].params[1].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, lu_flag, which_ru);
}
pda = pda->next;
}
/* initialize nodes which compute new parity */
/* we use the simple XOR func in the double-XOR case, and when we're accessing only a portion of one stripe unit.
* the distinction between the two is that the regular XOR func assumes that the targbuf is a full SU in size,
* and examines the pda associated with the buffer to decide where within the buffer to XOR the data, whereas
* the simple XOR func just XORs the data into the start of the buffer.
*/
if ((numParityNodes==2) || ((numDataNodes == 1) && (asmap->totalSectorsAccessed < raidPtr->Layout.sectorsPerStripeUnit))) {
func = pfuncs->simple; undoFunc = rf_NullNodeUndoFunc; name = pfuncs->SimpleName;
if (qfuncs)
{ qfunc = qfuncs->simple; qname = qfuncs->SimpleName;}
} else {
func = pfuncs->regular; undoFunc = rf_NullNodeUndoFunc; name = pfuncs->RegularName;
if (qfuncs) { qfunc = qfuncs->regular; qname = qfuncs->RegularName;}
}
/* initialize the xor nodes: params are {pda,buf} from {Rod,Wnd,Rop} nodes, and raidPtr */
if (numParityNodes==2) { /* double-xor case */
for (i=0; i < numParityNodes; i++) {
rf_InitNode(&xorNodes[i], rf_wait, RF_TRUE, func, undoFunc, NULL, 1, nNodes, 7, 1, dag_h, name, allocList); /* no wakeup func for xor */
xorNodes[i].flags |= RF_DAGNODE_FLAG_YIELD;
xorNodes[i].params[0] = readDataNodes[i].params[0];
xorNodes[i].params[1] = readDataNodes[i].params[1];
xorNodes[i].params[2] = readParityNodes[i].params[0];
xorNodes[i].params[3] = readParityNodes[i].params[1];
xorNodes[i].params[4] = writeDataNodes[i].params[0];
xorNodes[i].params[5] = writeDataNodes[i].params[1];
xorNodes[i].params[6].p = raidPtr;
xorNodes[i].results[0] = readParityNodes[i].params[1].p; /* use old parity buf as target buf */
}
}
else {
/* there is only one xor node in this case */
rf_InitNode(&xorNodes[0], rf_wait, RF_TRUE, func, undoFunc, NULL, 1, nNodes, (2 * (numDataNodes + numDataNodes + 1) + 1), 1, dag_h, name, allocList);
xorNodes[0].flags |= RF_DAGNODE_FLAG_YIELD;
for (i=0; i < numDataNodes + 1; i++) {
/* set up params related to Rod and Rop nodes */
xorNodes[0].params[2*i+0] = readDataNodes[i].params[0]; /* pda */
xorNodes[0].params[2*i+1] = readDataNodes[i].params[1]; /* buffer pointer */
}
for (i=0; i < numDataNodes; i++) {
/* set up params related to Wnd and Wnp nodes */
xorNodes[0].params[2*(numDataNodes+1+i)+0] = writeDataNodes[i].params[0]; /* pda */
xorNodes[0].params[2*(numDataNodes+1+i)+1] = writeDataNodes[i].params[1]; /* buffer pointer */
}
xorNodes[0].params[2*(numDataNodes+numDataNodes+1)].p = raidPtr; /* xor node needs to get at RAID information */
xorNodes[0].results[0] = readParityNodes[0].params[1].p;
}
/* initialize the log node(s) */
pda = asmap->parityInfo;
for (i = 0; i < numParityNodes; i++) {
RF_ASSERT(pda);
rf_InitNode(&lpuNodes[i], rf_wait, RF_FALSE, rf_ParityLogUpdateFunc, rf_ParityLogUpdateUndoFunc, rf_GenericWakeupFunc, 1, 1, 2, 0, dag_h, "Lpu", allocList);
lpuNodes[i].params[0].p = pda; /* PhysDiskAddr of parity */
lpuNodes[i].params[1].p = xorNodes[i].results[0]; /* buffer pointer to parity */
pda = pda->next;
}
/* Step 4. connect the nodes */
/* connect header to block node */
RF_ASSERT(dag_h->numSuccedents == 1);
RF_ASSERT(blockNode->numAntecedents == 0);
dag_h->succedents[0] = blockNode;
/* connect block node to read old data nodes */
RF_ASSERT(blockNode->numSuccedents == (numDataNodes + numParityNodes));
for (i = 0; i < numDataNodes; i++) {
blockNode->succedents[i] = &readDataNodes[i];
RF_ASSERT(readDataNodes[i].numAntecedents == 1);
readDataNodes[i].antecedents[0]= blockNode;
readDataNodes[i].antType[0] = rf_control;
}
/* connect block node to read old parity nodes */
for (i = 0; i < numParityNodes; i++) {
blockNode->succedents[numDataNodes + i] = &readParityNodes[i];
RF_ASSERT(readParityNodes[i].numAntecedents == 1);
readParityNodes[i].antecedents[0] = blockNode;
readParityNodes[i].antType[0] = rf_control;
}
/* connect read old data nodes to write new data nodes */
for (i = 0; i < numDataNodes; i++) {
RF_ASSERT(readDataNodes[i].numSuccedents == numDataNodes + numParityNodes);
for (j = 0; j < numDataNodes; j++) {
RF_ASSERT(writeDataNodes[j].numAntecedents == numDataNodes + numParityNodes);
readDataNodes[i].succedents[j] = &writeDataNodes[j];
writeDataNodes[j].antecedents[i] = &readDataNodes[i];
if (i == j)
writeDataNodes[j].antType[i] = rf_antiData;
else
writeDataNodes[j].antType[i] = rf_control;
}
}
/* connect read old data nodes to xor nodes */
for (i = 0; i < numDataNodes; i++)
for (j = 0; j < numParityNodes; j++){
RF_ASSERT(xorNodes[j].numAntecedents == numDataNodes + numParityNodes);
readDataNodes[i].succedents[numDataNodes + j] = &xorNodes[j];
xorNodes[j].antecedents[i] = &readDataNodes[i];
xorNodes[j].antType[i] = rf_trueData;
}
/* connect read old parity nodes to write new data nodes */
for (i = 0; i < numParityNodes; i++) {
RF_ASSERT(readParityNodes[i].numSuccedents == numDataNodes + numParityNodes);
for (j = 0; j < numDataNodes; j++) {
readParityNodes[i].succedents[j] = &writeDataNodes[j];
writeDataNodes[j].antecedents[numDataNodes + i] = &readParityNodes[i];
writeDataNodes[j].antType[numDataNodes + i] = rf_control;
}
}
/* connect read old parity nodes to xor nodes */
for (i = 0; i < numParityNodes; i++)
for (j = 0; j < numParityNodes; j++) {
readParityNodes[i].succedents[numDataNodes + j] = &xorNodes[j];
xorNodes[j].antecedents[numDataNodes + i] = &readParityNodes[i];
xorNodes[j].antType[numDataNodes + i] = rf_trueData;
}
/* connect xor nodes to write new parity nodes */
for (i = 0; i < numParityNodes; i++) {
RF_ASSERT(xorNodes[i].numSuccedents == 1);
RF_ASSERT(lpuNodes[i].numAntecedents == 1);
xorNodes[i].succedents[0] = &lpuNodes[i];
lpuNodes[i].antecedents[0] = &xorNodes[i];
lpuNodes[i].antType[0] = rf_trueData;
}
for (i = 0; i < numDataNodes; i++) {
if (lu_flag) {
/* connect write new data nodes to unlock nodes */
RF_ASSERT(writeDataNodes[i].numSuccedents == 1);
RF_ASSERT(unlockDataNodes[i].numAntecedents == 1);
writeDataNodes[i].succedents[0] = &unlockDataNodes[i];
unlockDataNodes[i].antecedents[0] = &writeDataNodes[i];
unlockDataNodes[i].antType[0] = rf_control;
/* connect unlock nodes to unblock node */
RF_ASSERT(unlockDataNodes[i].numSuccedents == 1);
RF_ASSERT(unblockNode->numAntecedents == (numDataNodes + (nfaults * numParityNodes)));
unlockDataNodes[i].succedents[0] = unblockNode;
unblockNode->antecedents[i] = &unlockDataNodes[i];
unblockNode->antType[i] = rf_control;
}
else {
/* connect write new data nodes to unblock node */
RF_ASSERT(writeDataNodes[i].numSuccedents == 1);
RF_ASSERT(unblockNode->numAntecedents == (numDataNodes + (nfaults * numParityNodes)));
writeDataNodes[i].succedents[0] = unblockNode;
unblockNode->antecedents[i] = &writeDataNodes[i];
unblockNode->antType[i] = rf_control;
}
}
/* connect write new parity nodes to unblock node */
for (i = 0; i < numParityNodes; i++) {
RF_ASSERT(lpuNodes[i].numSuccedents == 1);
lpuNodes[i].succedents[0] = unblockNode;
unblockNode->antecedents[numDataNodes + i] = &lpuNodes[i];
unblockNode->antType[numDataNodes + i] = rf_control;
}
/* connect unblock node to terminator */
RF_ASSERT(unblockNode->numSuccedents == 1);
RF_ASSERT(termNode->numAntecedents == 1);
RF_ASSERT(termNode->numSuccedents == 0);
unblockNode->succedents[0] = termNode;
termNode->antecedents[0] = unblockNode;
termNode->antType[0] = rf_control;
}
void rf_CreateParityLoggingSmallWriteDAG(
RF_Raid_t *raidPtr,
RF_AccessStripeMap_t *asmap,
RF_DagHeader_t *dag_h,
void *bp,
RF_RaidAccessFlags_t flags,
RF_AllocListElem_t *allocList,
RF_RedFuncs_t *pfuncs,
RF_RedFuncs_t *qfuncs)
{
dag_h->creator = "ParityLoggingSmallWriteDAG";
rf_CommonCreateParityLoggingSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_xorFuncs, NULL);
}
void rf_CreateParityLoggingLargeWriteDAG(
RF_Raid_t *raidPtr,
RF_AccessStripeMap_t *asmap,
RF_DagHeader_t *dag_h,
void *bp,
RF_RaidAccessFlags_t flags,
RF_AllocListElem_t *allocList,
int nfaults,
int (*redFunc)(RF_DagNode_t *))
{
dag_h->creator = "ParityLoggingSmallWriteDAG";
rf_CommonCreateParityLoggingLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 1, rf_RegularXorFunc);
}
#endif /* RF_INCLUDE_PARITYLOGGING > 0 */

Some files were not shown because too many files have changed in this diff Show More