Use a db(3) hash database instead of shared memory. Don't fork() to service

requests in the database when restarting.
1997-10-21 20:38:03 +00:00 · 1997-10-21 20:38:03 +00:00 · 1175f55b11
parent 890b485672
commit 1175f55b11
3 changed files with 495 additions and 314 deletions
--- a/usr.sbin/rpc.statd/stat_proc.c
+++ b/usr.sbin/rpc.statd/stat_proc.c
@ -1,4 +1,4 @@
-/*	$NetBSD: stat_proc.c,v 1.3 1997/10/17 16:12:48 lukem Exp $	*/
+/*	$NetBSD: stat_proc.c,v 1.4 1997/10/21 20:38:03 christos Exp $	*/

 /*
 * Copyright (c) 1995
@ -35,7 +35,7 @@

 #include <sys/cdefs.h>
 #ifndef lint
-__RCSID("$NetBSD: stat_proc.c,v 1.3 1997/10/17 16:12:48 lukem Exp $");
+__RCSID("$NetBSD: stat_proc.c,v 1.4 1997/10/21 20:38:03 christos Exp $");
 #endif

 #include <errno.h>
@ -44,14 +44,13 @@ __RCSID("$NetBSD: stat_proc.c,v 1.3 1997/10/17 16:12:48 lukem Exp $");
 #include <stdlib.h>
 #include <string.h>
 #include <syslog.h>
+#include <signal.h>
 #include <unistd.h>

 #include <rpc/rpc.h>

 #include "statd.h"

-static	int	do_unmon __P((HostInfo *, my_id *));
-
 /* sm_stat_1 --------------------------------------------------------------- */
 /*
 * Purpose:	RPC call to enquire if a host can be monitored
@ -65,6 +64,7 @@ sm_stat_1_svc(arg, req)
 {
 	static sm_stat_res res;

+	NO_ALARM;
 	if (debug)
 		syslog(LOG_DEBUG, "stat called for host %s", arg->mon_name);

@ -76,7 +76,8 @@ sm_stat_1_svc(arg, req)
 		res.res_stat = stat_fail;
 	}

-	res.state = status_info->ourState;
+	res.state = status_info.ourState;
+	ALARM;
 	return (&res);
 }

@ -94,9 +95,10 @@ sm_mon_1_svc(arg, req)
 	struct svc_req *req;
 {
 	static sm_stat_res res;
-	HostInfo *hp;
+	HostInfo *hp, h;
 	MonList *lp;

+	NO_ALARM;
 	if (debug) {
 		syslog(LOG_DEBUG, "monitor request for host %s",
 		    arg->mon_id.mon_name);
@ -105,35 +107,40 @@ sm_mon_1_svc(arg, req)
 		    arg->mon_id.my_id.my_vers, arg->mon_id.my_id.my_proc);
 	}
 	res.res_stat = stat_fail;	/* Assume fail until set otherwise */
-	res.state = status_info->ourState;
+	res.state = status_info.ourState;

 	/*
 	 * Find existing host entry, or create one if not found.  If
 	 * find_host() fails, it will have logged the error already.
 	 */
-	if (!gethostbyname(arg->mon_id.mon_name))
+	if (!gethostbyname(arg->mon_id.mon_name)) {
 		syslog(LOG_ERR, "Invalid hostname to sm_mon: %s",
 		    arg->mon_id.mon_name);
-	else if ((hp = find_host(arg->mon_id.mon_name, TRUE)) != NULL) {
-		lp = (MonList *)malloc(sizeof(MonList));
-		if (!lp)
-			syslog(LOG_ERR, "Out of memory");
-		else {
-			strncpy(lp->notifyHost, arg->mon_id.my_id.my_name,
-			    SM_MAXSTRLEN);
-			lp->notifyProg = arg->mon_id.my_id.my_prog;
-			lp->notifyVers = arg->mon_id.my_id.my_vers;
-			lp->notifyProc = arg->mon_id.my_id.my_proc;
-			memcpy(lp->notifyData, arg->priv,
-			    sizeof(lp->notifyData));
-
-			lp->next = hp->monList;
-			hp->monList = lp;
-			sync_file();
-
-			res.res_stat = stat_succ;	/* Report success */
-		}
+		return &res;
 	}
+
+	if ((hp = find_host(arg->mon_id.mon_name, &h)) == NULL)
+		memset(hp = &h, 0, sizeof(h));
+
+	lp = (MonList *)malloc(sizeof(MonList));
+	if (!lp)
+		syslog(LOG_ERR, "Out of memory");
+	else {
+		strncpy(lp->notifyHost, arg->mon_id.my_id.my_name,
+		    SM_MAXSTRLEN);
+		lp->notifyProg = arg->mon_id.my_id.my_prog;
+		lp->notifyVers = arg->mon_id.my_id.my_vers;
+		lp->notifyProc = arg->mon_id.my_id.my_proc;
+		memcpy(lp->notifyData, arg->priv,
+		    sizeof(lp->notifyData));
+
+		lp->next = hp->monList;
+		hp->monList = lp;
+		change_host(arg->mon_id.mon_name, hp);
+		sync_file();
+		res.res_stat = stat_succ;	/* Report success */
+	}
+	ALARM;
 	return (&res);
 }

@ -145,11 +152,13 @@ sm_mon_1_svc(arg, req)
 *		In the unlikely event of more than one identical monitor
 *		request, all are removed.
 */
-static int 
-do_unmon(hp, idp)
+int 
+do_unmon(name, hp, ptr)
+	char *name;
 	HostInfo *hp;
-	my_id *idp;
+	void *ptr;
 {
+	my_id *idp = ptr;
 	MonList *lp, *next;
 	MonList *last = NULL;
 	int result = FALSE;
@ -190,8 +199,9 @@ sm_unmon_1_svc(arg, req)
 	struct svc_req *req;
 {
 	static sm_stat res;
-	HostInfo *hp;
+	HostInfo *hp, h;

+	NO_ALARM;
 	if (debug) {
 		syslog(LOG_DEBUG, "un-monitor request for host %s",
 		    arg->mon_name);
@ -199,9 +209,11 @@ sm_unmon_1_svc(arg, req)
 		    arg->my_id.my_name, arg->my_id.my_prog,
 		    arg->my_id.my_vers, arg->my_id.my_proc);
 	}
-	if ((hp = find_host(arg->mon_name, FALSE)) != NULL) {
-		if (do_unmon(hp, &arg->my_id))
+	if ((hp = find_host(arg->mon_name, &h)) != NULL) {
+		if (do_unmon(arg->mon_name, hp, &arg->my_id)) {
+			change_host(arg->mon_name, hp);
 			sync_file();
+		}
 		else
 			syslog(LOG_ERR,
 			    "unmon request from %s, no matching monitor",
@ -210,7 +222,8 @@ sm_unmon_1_svc(arg, req)
 		syslog(LOG_ERR, "unmon request from %s for unknown host %s",
 		    arg->my_id.my_name, arg->mon_name);

-	res.state = status_info->ourState;
+	res.state = status_info.ourState;
+	ALARM;

 	return (&res);
 }
@ -228,21 +241,19 @@ sm_unmon_all_1_svc(arg, req)
 	struct svc_req *req;
 {
 	static sm_stat res;
-	HostInfo *hp;
-	int     i;

+	NO_ALARM;
 	if (debug) {
 		syslog(LOG_DEBUG,
 		    "unmon_all for host: %s prog: %d ver: %d proc: %d",
 		    arg->my_name, arg->my_prog, arg->my_vers, arg->my_proc);
 	}

-	for (i = status_info->noOfHosts, hp = status_info->hosts; i; i--, hp++)
-		do_unmon(hp, arg);
-
+	unmon_hosts();
 	sync_file();

-	res.state = status_info->ourState;
+	res.state = status_info.ourState;
+	ALARM;

 	return (&res);
 }
@ -266,30 +277,14 @@ sm_simu_crash_1_svc(v, req)
 	struct svc_req *req;
 {
 	static char dummy;
-	int     work_to_do;
-	HostInfo *hp;
-	int     i;

-	work_to_do = 0;
+	NO_ALARM;
 	if (debug)
 		syslog(LOG_DEBUG, "simu_crash called!!");

-	/*
-	 * Simulate crash by setting notify-required flag on all monitored
-	 * hosts, and incrementing our status number.  notify_hosts() is
-	 * then called to fork a process to do the notifications.
-	 */
-	for (i = status_info->noOfHosts, hp = status_info->hosts; i > 0;
-	    i--, hp++) {
-		if (hp->monList) {
-			work_to_do = TRUE;
-			hp->notifyReqd = TRUE;
-		}
-	}
-	status_info->ourState += 2;	/* always even numbers if not crashed */
-
-	if (work_to_do)
-		notify_hosts();
+	reset_database();
+	ALARM;
+	notify_handler(0);

 	return (&dummy);
 }
@ -319,14 +314,14 @@ sm_notify_1_svc(arg, req)
 	static char dummy;
 	status tx_arg;		/* arg sent to callback procedure */
 	MonList *lp;
-	HostInfo *hp;
+	HostInfo *hp, h;
 	pid_t pid;

 	if (debug)
 		syslog(LOG_DEBUG, "notify from host %s, new state %d",
 		    arg->mon_name, arg->state);

-	hp = find_host(arg->mon_name, FALSE);
+	hp = find_host(arg->mon_name, &h);
 	if (!hp) {
 		/* Never heard of this host - why is it notifying us? */
 		syslog(LOG_ERR, "Unsolicited notification from host %s",
--- a/usr.sbin/rpc.statd/statd.c
+++ b/usr.sbin/rpc.statd/statd.c
@ -1,6 +1,7 @@
-/*	$NetBSD: statd.c,v 1.8 1997/10/21 13:33:23 is Exp $	*/
+/*	$NetBSD: statd.c,v 1.9 1997/10/21 20:38:11 christos Exp $	*/

 /*
+ * Copyright (c) 1997 Christos Zoulas. All rights reserved.
 * Copyright (c) 1995
 *	A.R. Gordon (andrew.gordon@net-tel.co.uk).  All rights reserved.
 *
@ -15,6 +16,7 @@
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *	This product includes software developed for the FreeBSD project
+ *	This product includes software developed by Christos Zoulas.
 * 4. Neither the name of the author nor the names of any co-contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
@ -35,7 +37,7 @@

 #include <sys/cdefs.h>
 #ifndef lint
-__RCSID("$NetBSD: statd.c,v 1.8 1997/10/21 13:33:23 is Exp $");
+__RCSID("$NetBSD: statd.c,v 1.9 1997/10/21 20:38:11 christos Exp $");
 #endif


@ -43,11 +45,8 @@ __RCSID("$NetBSD: statd.c,v 1.8 1997/10/21 13:33:23 is Exp $");
 /* file was generated by running rpcgen /usr/include/rpcsvc/sm_inter.x	*/
 /* The actual program logic is in the file procs.c			*/

-#include <sys/types.h>
-#include <sys/mman.h>
-#include <sys/wait.h>
-
 #include <err.h>
+#include <ctype.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <signal.h>
@ -56,21 +55,39 @@ __RCSID("$NetBSD: statd.c,v 1.8 1997/10/21 13:33:23 is Exp $");
 #include <string.h>
 #include <syslog.h>
 #include <unistd.h>
+#include <db.h>

 #include <rpc/rpc.h>

 #include "statd.h"

+struct sigaction sa;
 int     	debug = 0;		/* Controls syslog() for debug msgs */
 int     	_rpcsvcdirty = 0;	/* XXX ??? */
-FileLayout	*status_info;		/* Pointer to mmap()ed status file */
-static int	status_fd;		/* File descriptor for the open file */
-static off_t	status_file_len;	/* Current on-disc length of file */
+static DB	*db;			/* Database file */

-	int	main __P((int, char **));
-static	void	handle_sigchld __P((int));
-static	int	notify_one_host __P((char *));
-extern	void	sm_prog_1 __P((struct svc_req *, SVCXPRT *));
+Header		 status_info;
+
+static char undefdata[] = "\0\1\2\3\4\5\6\7";
+static DBT undefkey = {
+	undefdata,
+	sizeof(undefdata)
+};
+extern char *__progname;
+
+
+/* statd.c */
+static int walk_one __P((int (*fun )__P ((DBT *, DBT *, void *)), DBT *, DBT *, void *));
+static int walk_db __P((int (*fun )__P ((DBT *, DBT *, void *)), void *));
+static int reset_host __P((DBT *, DBT *, void *));
+static int check_work __P((DBT *, DBT *, void *));
+static int unmon_host __P((DBT *, DBT *, void *));
+static int notify_one __P((DBT *, DBT *, void *));
+static void init_file __P((char *));
+static int notify_one_host __P((char *));
+static void die __P((int)) __attribute__((__noreturn__));
+
+int main __P((int, char **));

 int
 main(argc, argv)
@ -78,7 +95,6 @@ main(argc, argv)
 	char **argv;
 {
 	SVCXPRT *transp;
-	struct sigaction sa;
 	int ch;

 	while ((ch = getopt(argc, argv, "d")) != (-1)) {
@ -88,7 +104,7 @@ main(argc, argv)
 			break;
 		default:
 		case '?':
-			errx(1, "usage: rpc.statd [-d]");
+			(void) fprintf(stderr, "Usage: %s [-d]", __progname);
 			/* NOTREACHED */
 		}
 	}
@ -118,35 +134,40 @@ main(argc, argv)
 	 * Note that it is NOT sensible to run this program from inetd - the
 	 * protocol assumes that it will run immediately at boot time.
 	 */
-	daemon(0, 0);
+	if (!debug)
+		daemon(0, 0);
 	openlog("rpc.statd", 0, LOG_DAEMON);
 	if (debug)
 		syslog(LOG_INFO, "Starting - debug enabled");
 	else
 		syslog(LOG_INFO, "Starting");

-	/* Install signal handler to collect exit status of child processes */
-	sa.sa_handler = handle_sigchld;
+	sa.sa_handler = die;
+	sa.sa_flags = 0;
 	sigemptyset(&sa.sa_mask);
-	sigaddset(&sa.sa_mask, SIGCHLD);
+	(void)sigaction(SIGTERM, &sa, NULL);
+	(void)sigaction(SIGQUIT, &sa, NULL);
+	(void)sigaction(SIGHUP, &sa, NULL);
+	(void)sigaction(SIGINT, &sa, NULL);
+
+	sa.sa_handler = SIG_IGN;
 	sa.sa_flags = SA_RESTART;
-	sigaction(SIGCHLD, &sa, NULL);
+	sigemptyset(&sa.sa_mask);
+	sigaddset(&sa.sa_mask, SIGALRM);

 	/* Initialisation now complete - start operating */

-	/*
-	 * notify_hosts() forks a process (if necessary) to do the
-	 * SM_NOTIFY calls, which may be slow.
-	 */
-	notify_hosts();
+	/* Notify hosts that need it */
+	notify_handler(0);

-	svc_run();		/* Should never return */
-	exit(1);
+	while (1)
+		svc_run();		/* Should never return */
+	die(0);
 }

-/* handle_sigchld ---------------------------------------------------------- */
+/* notify_handler ---------------------------------------------------------- */
 /*
- * Purpose:	Catch SIGCHLD and collect process status
+ * Purpose:	Catch SIGALRM and collect process status
 * Returns:	Nothing.
 * Notes:	No special action required, other than to collect the
 *		process status and hence allow the child to die:
@ -154,19 +175,30 @@ main(argc, argv)
 *		of SM_NOTIFY to other systems, so it is normal for the
 *		children to exit when they have done their work.
 */
-static void 
-handle_sigchld(sig)
+void 
+notify_handler(sig)
 	int sig;
 {
-	int     pid, status;
-	pid = wait4(-1, &status, WNOHANG, (struct rusage *) 0);
-	if (!pid)
-		syslog(LOG_ERR, "Phantom SIGCHLD??");
-	else if (status)
-		syslog(LOG_ERR, "Child %d failed with status %d", pid,
-		    WEXITSTATUS(status));
-	else if (debug)
-		syslog(LOG_DEBUG, "Child %d exited OK", pid);
+	time_t now;
+
+	NO_ALARM;
+	sa.sa_handler = SIG_IGN;
+	(void)sigaction(SIGALRM, &sa, NULL);
+
+	now = time(NULL);
+
+	(void) walk_db(notify_one, &now);
+
+	if (walk_db(check_work, &now) == 0) {
+		/*
+		 * No more work to be done.
+		 */
+		CLR_ALARM;
+		return;
+	}
+	sync_file();
+	ALARM;
+	alarm(5);
 }

 /* sync_file --------------------------------------------------------------- */
@ -177,72 +209,296 @@ handle_sigchld(sig)
 void 
 sync_file()
 {
-	if (msync((caddr_t)status_info, 0, 0) < 0)
-		syslog(LOG_ERR, "msync() failed: %s", strerror(errno));
+	DBT data;
+
+	data.data = &status_info;
+	data.size = sizeof(status_info);
+	switch ((*db->put)(db, &undefkey, &data, 0)) {
+	case 0:
+		return;
+	case -1:
+		goto bad;
+	default:
+		abort();
+	}
+	if ((*db->sync)(db, 0) == -1) {
+bad:
+		syslog(LOG_ERR, "database corrupted %m");
+		die(1);
+	}
 }

+/* change_host -------------------------------------------------------------- */
+/*
+ * Purpose:	Update/Create an entry for host
+ * Returns:	Nothing
+ * Notes:
+ *
+ */
+void
+change_host(hostname, hp)
+	char *hostname;
+	HostInfo *hp;
+{
+	DBT key, data;
+	char *ptr;
+
+	for (ptr = hostname; *ptr; ptr++)
+		if (isupper((unsigned char) *ptr))
+			*ptr = tolower((unsigned char) *ptr);
+
+	key.data = hostname;
+	key.size = ptr - hostname + 1;
+	data.data = hp;
+	data.size = sizeof(*hp);
+
+	switch ((*db->put)(db, &key, &data, 0)) {
+	case -1:
+		syslog(LOG_ERR, "database corrupted %m");
+		die(1);
+	case 0:
+		return;
+	default:
+		abort();
+	}
+}
+
+
 /* find_host -------------------------------------------------------------- */
 /*
 * Purpose:	Find the entry in the status file for a given host
- * Returns:	Pointer to that entry in the mmap() region, or NULL.
- * Notes:	Also creates entries if requested.
- *		Failure to create also returns NULL.
+ * Returns:	Copy of entry in hd, or NULL
+ * Notes:
+ *
 */
 HostInfo *
-find_host(hostname, create)
+find_host(hostname, hp)
 	char *hostname;
-	int create;
-{
 	HostInfo *hp;
-	HostInfo *spare_slot = NULL;
-	HostInfo *result = NULL;
-	int i;
+{
+	DBT key, data;
+	char *ptr;

-	for (i = 0, hp = status_info->hosts; i < status_info->noOfHosts;
-	    i++, hp++) {
-		if (!strncasecmp(hostname, hp->hostname, SM_MAXSTRLEN)) {
-			result = hp;
+	for (ptr = hostname; *ptr; ptr++)
+		if (isupper((unsigned char) *ptr))
+			*ptr = tolower((unsigned char) *ptr);
+
+	key.data = hostname;
+	key.size = ptr - hostname + 1;
+	switch ((*db->get)(db, &key, &data, 0)) {
+	case 0:
+		if (data.size != sizeof(*hp))
+			goto bad;
+		return memcpy(hp, data.data, sizeof(*hp));
+	case 1:
+		return NULL;
+	case -1:
+		goto bad;
+	default:
+		abort();
+	}
+
+bad:
+	syslog(LOG_ERR, "Database corrupted %m");
+	return NULL;
+}
+
+/* walk_one ------------------------------------------------------------- */
+/*
+ * Purpose:	Call the given function if the element is valid
+ * Returns:	Nothing - exits on error
+ * Notes:	
+ */
+static int
+walk_one(fun, key, data, ptr)
+	int (*fun) __P((DBT *, DBT *, void *));
+	DBT *key, *data;
+	void *ptr;
+{
+	if (key->size == undefkey.size &&
+	    memcmp(key->data, undefkey.data, key->size) == 0)
+		return 0;
+	if (data->size != sizeof(HostInfo)) {
+		syslog(LOG_ERR, "Bad data in database");
+		die(1);
+	}
+
+	return (*fun)(key, data, ptr);
+}
+
+/* walk_db -------------------------------------------------------------- */
+/*
+ * Purpose:	Iterate over all elements calling the given function
+ * Returns:	-1 if function failed, 0 on success
+ * Notes:	
+ */
+static int
+walk_db(fun, ptr)
+	int (*fun) __P((DBT *, DBT *, void *));
+	void *ptr;
+{
+	DBT key, data;
+
+	switch ((*db->seq)(db, &key, &data, R_FIRST)) {
+	case -1:
+		goto bad;
+	case 1:
+		/* We should have at least the magic entry at this point */
+		abort();
+	case 0:
+		if (walk_one(fun, &key, &data, ptr) == -1)
+			return -1;
+		break;
+	default:
+		abort();
+	}
+
+
+	for (;;)
+		switch ((*db->seq)(db, &key, &data, R_NEXT)) {
+		case -1:
+			goto bad;
+		case 1:
+			if (walk_one(fun, &key, &data, ptr) == -1)
+				return -1;
 			break;
+		case 0:
+			return 0;
+		default:
+			abort();
 		}
-		if (!spare_slot && !hp->monList && !hp->notifyReqd)
-			spare_slot = hp;
+bad:
+	syslog(LOG_ERR, "Corrupted database %m");
+	die(1);
+}
+
+/* reset_host ------------------------------------------------------------ */
+/*
+ * Purpose:	Clean up existing hosts in file.
+ * Returns:	Always success 0.
+ * Notes:	Clean-up of existing file - monitored hosts will have a
+ *		pointer to a list of clients, which refers to memory in
+ *		the previous incarnation of the program and so are
+ *		meaningless now.  These pointers are zeroed and the fact
+ *		that the host was previously monitored is recorded by
+ *		setting the notifyReqd flag, which will in due course
+ *		cause a SM_NOTIFY to be sent.
+ *		 
+ *		Note that if we crash twice in quick succession, some hosts
+ *		may already have notifyReqd set, where we didn't manage to
+ *		notify them before the second crash occurred.
+ */
+static int
+reset_host(key, data, ptr)
+	DBT *key, *data;
+	void *ptr;
+{
+	HostInfo *hi = data->data;
+
+	if (hi->monList) {
+		hi->notifyReqd = *(time_t *) data;
+		hi->attempts = 0;
+		hi->monList = NULL;
 	}
+	return 0;
+}

-	/* Return if entry found, or if not asked to create one. */
-	if (result || !create)
-		return (result);
+/* check_work ------------------------------------------------------------ */
+/*
+ * Purpose:	Check if there is work to be done.
+ * Returns:	0 if there is no work to be done -1 if there is.
+ * Notes:	
+ */
+static int
+check_work(key, data, ptr)
+	DBT *key, *data;
+	void *ptr;
+{
+	HostInfo *hi = data->data;

-	/*
-	 * Now create an entry, using the spare slot if one was found or
-	 * adding to the end of the list otherwise, extending file if req'd
-	 */
-	if (!spare_slot) {
-		off_t desired_size;
-		spare_slot = &status_info->hosts[status_info->noOfHosts];
-		desired_size = ((char *)spare_slot - (char *)status_info) +
-		    sizeof(HostInfo);
+	return hi->notifyReqd ? -1 : 0;
+}

-		if (desired_size > status_file_len) {
-			/* Extend file by writing 1 byte of junk at the
-			 * desired end pos	 */
-			lseek(status_fd, desired_size - 1, SEEK_SET);
-			i = write(status_fd, &i, 1);
-			if (i < 1) {
-				syslog(LOG_ERR, "Unable to extend status file");
-				return (NULL);
-			}
-			status_file_len = desired_size;
+/* unmon_host ------------------------------------------------------------ */
+/*
+ * Purpose:	Unmonitor a host
+ * Returns:	0
+ * Notes:	
+ */
+static int
+unmon_host(key, data, ptr)
+	DBT *key, *data;
+	void *ptr;
+{
+	char *name = key->data;
+	HostInfo *hi = data->data;
+
+	if (do_unmon(name, hi, ptr))
+		change_host(name, hi);
+	return 0;
+}
+
+/* notify_one ------------------------------------------------------------ */
+/*
+ * Purpose:	Notify one host.
+ * Returns:	0 if success -1 on failure
+ * Notes:	
+ */
+static int
+notify_one(key, data, ptr)
+	DBT *key, *data;
+	void *ptr;
+{
+	time_t now = *(time_t *) ptr;
+	char *name = key->data;
+	HostInfo *hi = data->data;
+
+	if (hi->notifyReqd == 0 || hi->notifyReqd > now)
+		return 0;
+
+	if (notify_one_host(name)) {
+give_up:
+		hi->notifyReqd = 0;
+		hi->attempts = 0;
+		switch ((*db->put)(db, key, data, 0)) {
+		case -1:
+			syslog(LOG_ERR, "Error storing %s (%m)", name);
+		case 0:
+			return 0;
+
+		default:
+			abort();
 		}
-		status_info->noOfHosts++;
 	}
-	/*
-	 * Initialise the spare slot that has been found/created
-	 * Note that we do not msync(), since the caller is presumed to be
-	 * about to modify the entry further
-	 */
-	memset(spare_slot, 0, sizeof(HostInfo));
-	strncpy(spare_slot->hostname, hostname, SM_MAXSTRLEN);
-	return (spare_slot);
+	else {
+		/*
+		 * If one of the initial attempts fails, we wait
+		 * for a while and have another go.  This is necessary
+		 * because when we have crashed, (eg. a power outage)
+		 * it is quite possible that we won't be able to
+		 * contact all monitored hosts immediately on restart,
+		 * either because they crashed too and take longer
+		 * to come up (in which case the notification isn't
+		 * really required), or more importantly if some
+		 * router etc. needed to reach the monitored host
+		 * has not come back up yet.  In this case, we will
+		 * be a bit late in re-establishing locks (after the
+		 * grace period) but that is the best we can do.  We
+		 * try 10 times at 5 sec intervals, 10 more times at
+		 * 1 minute intervals, then 24 more times at hourly
+		 * intervals, finally giving up altogether if the
+		 * host hasn't come back to life after 24 hours.
+		 */
+		if (hi->attempts++ >= 44)
+			goto give_up;
+		else if (hi->attempts < 10)
+			hi->notifyReqd += 5;
+		else if (hi->attempts < 20)
+			hi->notifyReqd += 60;
+		else
+			hi->notifyReqd += 60 * 60;
+		return -1;
+	}
 }

 /* init_file -------------------------------------------------------------- */
@ -257,89 +513,73 @@ find_host(hostname, create)
 *		all hosts that had a monitor list, and incrementing
 *		the state number to the next even value.
 */
-void 
+static void 
 init_file(filename)
 	char *filename;
 {
-	char buf[HEADER_LEN];
-	int new_file = FALSE;
-	int i;
+	DBT data;

-	/* try to open existing file - if not present, create one */
-	status_fd = open(filename, O_RDWR);
-	if ((status_fd < 0) && (errno == ENOENT)) {
-		status_fd = open(filename, O_RDWR | O_CREAT, 0644);
-		new_file = TRUE;
-	}
-	if (status_fd < 0) {
-		err(1, "unable to open status file %s", filename);
-		/* NOTREACHED */
+	db = dbopen(filename, O_RDWR|O_CREAT|O_NDELAY|O_EXLOCK, 644, DB_HASH, 
+	    NULL);
+	if (db == NULL)
+		err(1, "Cannot open `%s'", filename);
+
+	switch ((*db->get)(db, &undefkey, &data, 0)) {
+	case 1:
+		/* New database */
+		(void)memset(&status_info, 0, sizeof(status_info));
+		sync_file();
+		return;
+
+	case -1:
+		err(1, "error accessing database (%m)");
+	case 0:
+		/* Existing database */
+		if (data.size != sizeof(status_info))
+			errx(1, "database corrupted %d != %d",
+			    data.size, sizeof(status_info));
+		break;
+	default:
+		abort();
 	}

-	/*
-	 * File now open.  mmap() it, with a generous size to allow for
-	 * later growth, where we will extend the file but not re-map it.
-	 */
-	status_info = (FileLayout *)mmap(NULL, 0x1000000,
-	    PROT_READ | PROT_WRITE, MAP_SHARED, status_fd, 0);
-
-	if (status_info == (FileLayout *)(-1)) {
-		perror("rpc.statd");
-		fprintf(stderr, "Unable to mmap() status file\n");
-	}
-	status_file_len = lseek(status_fd, 0L, SEEK_END);
-
-	/*
-	 * If the file was not newly created, validate the contents, and if
-	 * defective, re-create from scratch.
-	 */
-	if (!new_file) {
-		if ((status_file_len < HEADER_LEN) || (status_file_len <
-		    (HEADER_LEN + sizeof(HostInfo) * status_info->noOfHosts))) {
-			fprintf(stderr, "rpc.statd: status file is corrupt\n");
-			new_file = TRUE;
-		}
-	}
-	/* Initialisation of a new, empty file. */
-	if (new_file) {
-		memset(buf, 0, sizeof(buf));
-		lseek(status_fd, 0L, SEEK_SET);
-		write(status_fd, buf, HEADER_LEN);
-		status_file_len = HEADER_LEN;
-	} else {
-		/*
-		 * Clean-up of existing file - monitored hosts will have a
-		 * pointer to a list of clients, which refers to memory in
-		 * the previous incarnation of the program and so are
-		 * meaningless now.  These pointers are zeroed and the fact
-		 * that the host was previously monitored is recorded by
-		 * setting the notifyReqd flag, which will in due course
-		 * cause a SM_NOTIFY to be sent.
-		 *
-		 * Note that if we crash twice in quick succession, some hosts
-		 * may already have notifyReqd set, where we didn't manage to
-		 * notify them before the second crash occurred.
-		 */
-		for (i = 0; i < status_info->noOfHosts; i++) {
-			HostInfo *this_host = &status_info->hosts[i];
-
-			if (this_host->monList) {
-				this_host->notifyReqd = TRUE;
-				this_host->monList = NULL;
-			}
-		}
-		/* Select the next higher even number for the state counter */
-		status_info->ourState =
-		    (status_info->ourState + 2) & 0xfffffffe;
-		status_info->ourState++;	/* XXX - ??? */
-	}
+	reset_database();
+	return;
 }

-/* notify_one_host --------------------------------------------------------- */
+/* reset_database --------------------------------------------------------- */
 /*
- * Purpose:	Perform SM_NOTIFY procedure at specified host
- * Returns:	TRUE if success, FALSE if failed.
+ * Purpose:	Clears the statd database
+ * Returns:	Nothing
+ * Notes:	If this is not called on reset, it will leak memory.
 */
+void
+reset_database()
+{
+	time_t now = time(NULL);
+	walk_db(reset_host, &now);
+
+	/* Select the next higher even number for the state counter */
+	status_info.ourState =
+	    (status_info.ourState + 2) & 0xfffffffe;
+	status_info.ourState++;	/* XXX - ??? */
+	sync_file();
+}
+
+/* unmon_hosts --------------------------------------------------------- */
+/*
+ * Purpose:	Unmonitor all the hosts
+ * Returns:	Nothing
+ * Notes:
+ */
+void
+unmon_hosts()
+{
+	time_t now = time(NULL);
+	walk_db(unmon_host, &now);
+	sync_file();
+}
+
 static int 
 notify_one_host(hostname)
 	char *hostname;
@ -353,7 +593,7 @@ notify_one_host(hostname)
 	gethostname(our_hostname, sizeof(our_hostname));
 	our_hostname[SM_MAXSTRLEN] = '\0';
 	arg.mon_name = our_hostname;
-	arg.state = status_info->ourState;
+	arg.state = status_info.ourState;

 	if (debug)
 		syslog(LOG_DEBUG, "Sending SM_NOTIFY to host %s from %s",
@ -376,81 +616,11 @@ notify_one_host(hostname)
 	return (TRUE);
 }

-/* notify_hosts ------------------------------------------------------------ */
-/*
- * Purpose:	Send SM_NOTIFY to all hosts marked as requiring it
- * Returns:	Nothing, immediately - forks a process to do the work.
- * Notes:	Does nothing if there are no monitored hosts.
- *		Called after all the initialisation has been done -
- *		logs to syslog.
- */
-void 
-notify_hosts(void)
+
+static void
+die(n)
+	int n;
 {
-	HostInfo *hp;
-	int i, attempts;
-	int work_to_do = FALSE;
-	pid_t pid;
-
-	/* First check if there is in fact any work to do. */
-	for (i = status_info->noOfHosts, hp = status_info->hosts; i;
-	    i--, hp++) {
-		if (hp->notifyReqd) {
-			work_to_do = TRUE;
-			break;
-		}
-	}
-
-	if (!work_to_do)
-		return;		/* No work found */
-
-	pid = fork();
-	if (pid == -1) {
-		syslog(LOG_ERR, "Unable to fork notify process - %s",
-		    strerror(errno));
-		return;
-	}
-	if (pid)
-		return;
-
-	/*
-	 * Here in the child process.  We continue until all the hosts marked
-	 * as requiring notification have been duly notified.
-	 * If one of the initial attempts fails, we sleep for a while and
-	 * have another go.  This is necessary because when we have crashed,
-	 * (eg. a power outage) it is quite possible that we won't be able to
-	 * contact all monitored hosts immediately on restart, either because
-	 * they crashed too and take longer to come up (in which case the
-	 * notification isn't really required), or more importantly if some
-	 * router etc. needed to reach the monitored host has not come back
-	 * up yet.  In this case, we will be a bit late in re-establishing
-	 * locks (after the grace period) but that is the best we can do.
-	 * We try 10 times at 5 sec intervals, 10 more times at 1 minute
-	 * intervals, then 24 more times at hourly intervals, finally
-	 * giving up altogether if the host hasn't come back to life after
-	 * 24 hours.
-	 */
-	for (attempts = 0; attempts < 44; attempts++) {
-		work_to_do = FALSE;	/* Unless anything fails */
-		for (i = status_info->noOfHosts, hp = status_info->hosts; i > 0;
-		    i--, hp++) {
-			if (hp->notifyReqd) {
-				if (notify_one_host(hp->hostname)) {
-					hp->notifyReqd = FALSE;
-					sync_file();
-				} else
-					work_to_do = TRUE;
-			}
-		}
-		if (!work_to_do)
-			break;
-		if (attempts < 10)
-			sleep(5);
-		else
-			if (attempts < 20)
-				sleep(60);
-			else
-				sleep(60 * 60);
-	}
-	exit(0);
+	(*db->close)(db);
+	exit(n);
 }
--- a/usr.sbin/rpc.statd/statd.h
+++ b/usr.sbin/rpc.statd/statd.h
@ -1,4 +1,4 @@
-/*	$NetBSD: statd.h,v 1.1 1997/03/10 06:28:32 scottr Exp $	*/
+/*	$NetBSD: statd.h,v 1.2 1997/10/21 20:38:19 christos Exp $	*/

 /*
 * Copyright (c) 1995
@ -52,7 +52,7 @@
 *
 * We handle this by keeping the list of monitored hosts in a file
 * (/var/statd.state) which is mmap()ed and whose format is described
- * by the typedef FileLayout.  The lists of client callbacks are chained
+ * by the typedef Header.  The lists of client callbacks are chained
 * off this structure, but are held in normal memory and so will be
 * lost after a re-boot.  Hence the actual values of MonList * pointers
 * in the copy on disc have no significance, but their NULL/non-NULL
@ -70,9 +70,9 @@ typedef struct MonList_s {
 }       MonList;

 typedef struct {
-	char    hostname[SM_MAXSTRLEN + 1]; /* Name of monitored host */
-	int     notifyReqd;	/* TRUE if we've crashed and not yet
+	int     notifyReqd;	/* Time of our next attempt or 0
 				   informed the monitored host */
+	int	attempts;	/* Number of attempts we tried so far */
 	MonList *monList;	/* List of clients to inform if we
 				   hear that the monitored host has
 				   crashed, NULL if no longer monitored	 */
@ -82,23 +82,39 @@ typedef struct {
 /* Overall file layout. */

 typedef struct {
+	int	magic;		/* Zero magic */
 	int	ourState;	/* State number as defined in statd protocol */
-	int	noOfHosts;	/* Number of elements in hosts[] */
-	char	reserved[248];	/* Reserved for future use */
-	HostInfo hosts[1];	/* vector of monitored hosts */
-}       FileLayout;
-#define	HEADER_LEN (sizeof(FileLayout) - sizeof(HostInfo))
+}       Header;

 /* ------------------------------------------------------------------------- */

 /* Global variables */

-extern FileLayout *status_info;	/* The mmap()ed status file */
 extern int	debug;		/* = 1 to enable diagnostics to syslog */
+extern struct sigaction sa;
+extern Header status_info;

 /* Function prototypes */

-extern HostInfo	*find_host __P((char *hostname, int create));
-extern void	init_file __P((char *filename));
-extern void	notify_hosts __P((void));
-extern void	sync_file __P((void));
+/* stat_proc.c */
+struct sm_stat_res *sm_stat_1_svc __P((sm_name *, struct svc_req *));
+struct sm_stat_res *sm_mon_1_svc __P((mon *, struct svc_req *));
+struct sm_stat *sm_unmon_1_svc __P((mon_id *, struct svc_req *));
+struct sm_stat *sm_unmon_all_1_svc __P((my_id *, struct svc_req *));
+void *sm_simu_crash_1_svc __P((void *, struct svc_req *));
+void *sm_notify_1_svc __P((stat_chge *, struct svc_req *));
+int	do_unmon __P((char *, HostInfo *, void *));
+
+/* statd.c */
+void notify_handler __P((int));
+void sync_file __P((void));
+void unmon_hosts __P((void));
+void change_host __P((char *, HostInfo *));
+HostInfo *find_host __P((char *, HostInfo *));
+void reset_database __P((void));
+
+void sm_prog_1 __P((struct svc_req *, SVCXPRT *));
+
+#define NO_ALARM sa.sa_handler == SIG_DFL ? 0 : (sa.sa_handler = SIG_IGN, sigaction(SIGALRM, &sa, NULL))
+#define ALARM sa.sa_handler == SIG_DFL ? 0 : (sa.sa_handler = notify_handler, sigaction(SIGALRM, &sa, NULL))
+#define CLR_ALARM sa.sa_handler == SIG_DFL ? 0 : (sa.sa_handler = SIG_DFL, sigaction(SIGALRM, &sa, NULL))