From 1175f55b11742a486f34723a121b6e892b3eecae Mon Sep 17 00:00:00 2001 From: christos Date: Tue, 21 Oct 1997 20:38:03 +0000 Subject: [PATCH] Use a db(3) hash database instead of shared memory. Don't fork() to service requests in the database when restarting. --- usr.sbin/rpc.statd/stat_proc.c | 119 +++--- usr.sbin/rpc.statd/statd.c | 646 +++++++++++++++++++++------------ usr.sbin/rpc.statd/statd.h | 44 ++- 3 files changed, 495 insertions(+), 314 deletions(-) diff --git a/usr.sbin/rpc.statd/stat_proc.c b/usr.sbin/rpc.statd/stat_proc.c index eca519a5a913..dec8bef6eceb 100644 --- a/usr.sbin/rpc.statd/stat_proc.c +++ b/usr.sbin/rpc.statd/stat_proc.c @@ -1,4 +1,4 @@ -/* $NetBSD: stat_proc.c,v 1.3 1997/10/17 16:12:48 lukem Exp $ */ +/* $NetBSD: stat_proc.c,v 1.4 1997/10/21 20:38:03 christos Exp $ */ /* * Copyright (c) 1995 @@ -35,7 +35,7 @@ #include #ifndef lint -__RCSID("$NetBSD: stat_proc.c,v 1.3 1997/10/17 16:12:48 lukem Exp $"); +__RCSID("$NetBSD: stat_proc.c,v 1.4 1997/10/21 20:38:03 christos Exp $"); #endif #include @@ -44,14 +44,13 @@ __RCSID("$NetBSD: stat_proc.c,v 1.3 1997/10/17 16:12:48 lukem Exp $"); #include #include #include +#include #include #include #include "statd.h" -static int do_unmon __P((HostInfo *, my_id *)); - /* sm_stat_1 --------------------------------------------------------------- */ /* * Purpose: RPC call to enquire if a host can be monitored @@ -65,6 +64,7 @@ sm_stat_1_svc(arg, req) { static sm_stat_res res; + NO_ALARM; if (debug) syslog(LOG_DEBUG, "stat called for host %s", arg->mon_name); @@ -76,7 +76,8 @@ sm_stat_1_svc(arg, req) res.res_stat = stat_fail; } - res.state = status_info->ourState; + res.state = status_info.ourState; + ALARM; return (&res); } @@ -94,9 +95,10 @@ sm_mon_1_svc(arg, req) struct svc_req *req; { static sm_stat_res res; - HostInfo *hp; + HostInfo *hp, h; MonList *lp; + NO_ALARM; if (debug) { syslog(LOG_DEBUG, "monitor request for host %s", arg->mon_id.mon_name); @@ -105,35 +107,40 @@ sm_mon_1_svc(arg, req) arg->mon_id.my_id.my_vers, arg->mon_id.my_id.my_proc); } res.res_stat = stat_fail; /* Assume fail until set otherwise */ - res.state = status_info->ourState; + res.state = status_info.ourState; /* * Find existing host entry, or create one if not found. If * find_host() fails, it will have logged the error already. */ - if (!gethostbyname(arg->mon_id.mon_name)) + if (!gethostbyname(arg->mon_id.mon_name)) { syslog(LOG_ERR, "Invalid hostname to sm_mon: %s", arg->mon_id.mon_name); - else if ((hp = find_host(arg->mon_id.mon_name, TRUE)) != NULL) { - lp = (MonList *)malloc(sizeof(MonList)); - if (!lp) - syslog(LOG_ERR, "Out of memory"); - else { - strncpy(lp->notifyHost, arg->mon_id.my_id.my_name, - SM_MAXSTRLEN); - lp->notifyProg = arg->mon_id.my_id.my_prog; - lp->notifyVers = arg->mon_id.my_id.my_vers; - lp->notifyProc = arg->mon_id.my_id.my_proc; - memcpy(lp->notifyData, arg->priv, - sizeof(lp->notifyData)); - - lp->next = hp->monList; - hp->monList = lp; - sync_file(); - - res.res_stat = stat_succ; /* Report success */ - } + return &res; } + + if ((hp = find_host(arg->mon_id.mon_name, &h)) == NULL) + memset(hp = &h, 0, sizeof(h)); + + lp = (MonList *)malloc(sizeof(MonList)); + if (!lp) + syslog(LOG_ERR, "Out of memory"); + else { + strncpy(lp->notifyHost, arg->mon_id.my_id.my_name, + SM_MAXSTRLEN); + lp->notifyProg = arg->mon_id.my_id.my_prog; + lp->notifyVers = arg->mon_id.my_id.my_vers; + lp->notifyProc = arg->mon_id.my_id.my_proc; + memcpy(lp->notifyData, arg->priv, + sizeof(lp->notifyData)); + + lp->next = hp->monList; + hp->monList = lp; + change_host(arg->mon_id.mon_name, hp); + sync_file(); + res.res_stat = stat_succ; /* Report success */ + } + ALARM; return (&res); } @@ -145,11 +152,13 @@ sm_mon_1_svc(arg, req) * In the unlikely event of more than one identical monitor * request, all are removed. */ -static int -do_unmon(hp, idp) +int +do_unmon(name, hp, ptr) + char *name; HostInfo *hp; - my_id *idp; + void *ptr; { + my_id *idp = ptr; MonList *lp, *next; MonList *last = NULL; int result = FALSE; @@ -190,8 +199,9 @@ sm_unmon_1_svc(arg, req) struct svc_req *req; { static sm_stat res; - HostInfo *hp; + HostInfo *hp, h; + NO_ALARM; if (debug) { syslog(LOG_DEBUG, "un-monitor request for host %s", arg->mon_name); @@ -199,9 +209,11 @@ sm_unmon_1_svc(arg, req) arg->my_id.my_name, arg->my_id.my_prog, arg->my_id.my_vers, arg->my_id.my_proc); } - if ((hp = find_host(arg->mon_name, FALSE)) != NULL) { - if (do_unmon(hp, &arg->my_id)) + if ((hp = find_host(arg->mon_name, &h)) != NULL) { + if (do_unmon(arg->mon_name, hp, &arg->my_id)) { + change_host(arg->mon_name, hp); sync_file(); + } else syslog(LOG_ERR, "unmon request from %s, no matching monitor", @@ -210,7 +222,8 @@ sm_unmon_1_svc(arg, req) syslog(LOG_ERR, "unmon request from %s for unknown host %s", arg->my_id.my_name, arg->mon_name); - res.state = status_info->ourState; + res.state = status_info.ourState; + ALARM; return (&res); } @@ -228,21 +241,19 @@ sm_unmon_all_1_svc(arg, req) struct svc_req *req; { static sm_stat res; - HostInfo *hp; - int i; + NO_ALARM; if (debug) { syslog(LOG_DEBUG, "unmon_all for host: %s prog: %d ver: %d proc: %d", arg->my_name, arg->my_prog, arg->my_vers, arg->my_proc); } - for (i = status_info->noOfHosts, hp = status_info->hosts; i; i--, hp++) - do_unmon(hp, arg); - + unmon_hosts(); sync_file(); - res.state = status_info->ourState; + res.state = status_info.ourState; + ALARM; return (&res); } @@ -266,30 +277,14 @@ sm_simu_crash_1_svc(v, req) struct svc_req *req; { static char dummy; - int work_to_do; - HostInfo *hp; - int i; - work_to_do = 0; + NO_ALARM; if (debug) syslog(LOG_DEBUG, "simu_crash called!!"); - /* - * Simulate crash by setting notify-required flag on all monitored - * hosts, and incrementing our status number. notify_hosts() is - * then called to fork a process to do the notifications. - */ - for (i = status_info->noOfHosts, hp = status_info->hosts; i > 0; - i--, hp++) { - if (hp->monList) { - work_to_do = TRUE; - hp->notifyReqd = TRUE; - } - } - status_info->ourState += 2; /* always even numbers if not crashed */ - - if (work_to_do) - notify_hosts(); + reset_database(); + ALARM; + notify_handler(0); return (&dummy); } @@ -319,14 +314,14 @@ sm_notify_1_svc(arg, req) static char dummy; status tx_arg; /* arg sent to callback procedure */ MonList *lp; - HostInfo *hp; + HostInfo *hp, h; pid_t pid; if (debug) syslog(LOG_DEBUG, "notify from host %s, new state %d", arg->mon_name, arg->state); - hp = find_host(arg->mon_name, FALSE); + hp = find_host(arg->mon_name, &h); if (!hp) { /* Never heard of this host - why is it notifying us? */ syslog(LOG_ERR, "Unsolicited notification from host %s", diff --git a/usr.sbin/rpc.statd/statd.c b/usr.sbin/rpc.statd/statd.c index b2b1b503aaee..c61942cdb764 100644 --- a/usr.sbin/rpc.statd/statd.c +++ b/usr.sbin/rpc.statd/statd.c @@ -1,6 +1,7 @@ -/* $NetBSD: statd.c,v 1.8 1997/10/21 13:33:23 is Exp $ */ +/* $NetBSD: statd.c,v 1.9 1997/10/21 20:38:11 christos Exp $ */ /* + * Copyright (c) 1997 Christos Zoulas. All rights reserved. * Copyright (c) 1995 * A.R. Gordon (andrew.gordon@net-tel.co.uk). All rights reserved. * @@ -15,6 +16,7 @@ * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed for the FreeBSD project + * This product includes software developed by Christos Zoulas. * 4. Neither the name of the author nor the names of any co-contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. @@ -35,7 +37,7 @@ #include #ifndef lint -__RCSID("$NetBSD: statd.c,v 1.8 1997/10/21 13:33:23 is Exp $"); +__RCSID("$NetBSD: statd.c,v 1.9 1997/10/21 20:38:11 christos Exp $"); #endif @@ -43,11 +45,8 @@ __RCSID("$NetBSD: statd.c,v 1.8 1997/10/21 13:33:23 is Exp $"); /* file was generated by running rpcgen /usr/include/rpcsvc/sm_inter.x */ /* The actual program logic is in the file procs.c */ -#include -#include -#include - #include +#include #include #include #include @@ -56,21 +55,39 @@ __RCSID("$NetBSD: statd.c,v 1.8 1997/10/21 13:33:23 is Exp $"); #include #include #include +#include #include #include "statd.h" +struct sigaction sa; int debug = 0; /* Controls syslog() for debug msgs */ int _rpcsvcdirty = 0; /* XXX ??? */ -FileLayout *status_info; /* Pointer to mmap()ed status file */ -static int status_fd; /* File descriptor for the open file */ -static off_t status_file_len; /* Current on-disc length of file */ +static DB *db; /* Database file */ - int main __P((int, char **)); -static void handle_sigchld __P((int)); -static int notify_one_host __P((char *)); -extern void sm_prog_1 __P((struct svc_req *, SVCXPRT *)); +Header status_info; + +static char undefdata[] = "\0\1\2\3\4\5\6\7"; +static DBT undefkey = { + undefdata, + sizeof(undefdata) +}; +extern char *__progname; + + +/* statd.c */ +static int walk_one __P((int (*fun )__P ((DBT *, DBT *, void *)), DBT *, DBT *, void *)); +static int walk_db __P((int (*fun )__P ((DBT *, DBT *, void *)), void *)); +static int reset_host __P((DBT *, DBT *, void *)); +static int check_work __P((DBT *, DBT *, void *)); +static int unmon_host __P((DBT *, DBT *, void *)); +static int notify_one __P((DBT *, DBT *, void *)); +static void init_file __P((char *)); +static int notify_one_host __P((char *)); +static void die __P((int)) __attribute__((__noreturn__)); + +int main __P((int, char **)); int main(argc, argv) @@ -78,7 +95,6 @@ main(argc, argv) char **argv; { SVCXPRT *transp; - struct sigaction sa; int ch; while ((ch = getopt(argc, argv, "d")) != (-1)) { @@ -88,7 +104,7 @@ main(argc, argv) break; default: case '?': - errx(1, "usage: rpc.statd [-d]"); + (void) fprintf(stderr, "Usage: %s [-d]", __progname); /* NOTREACHED */ } } @@ -118,35 +134,40 @@ main(argc, argv) * Note that it is NOT sensible to run this program from inetd - the * protocol assumes that it will run immediately at boot time. */ - daemon(0, 0); + if (!debug) + daemon(0, 0); openlog("rpc.statd", 0, LOG_DAEMON); if (debug) syslog(LOG_INFO, "Starting - debug enabled"); else syslog(LOG_INFO, "Starting"); - /* Install signal handler to collect exit status of child processes */ - sa.sa_handler = handle_sigchld; + sa.sa_handler = die; + sa.sa_flags = 0; sigemptyset(&sa.sa_mask); - sigaddset(&sa.sa_mask, SIGCHLD); + (void)sigaction(SIGTERM, &sa, NULL); + (void)sigaction(SIGQUIT, &sa, NULL); + (void)sigaction(SIGHUP, &sa, NULL); + (void)sigaction(SIGINT, &sa, NULL); + + sa.sa_handler = SIG_IGN; sa.sa_flags = SA_RESTART; - sigaction(SIGCHLD, &sa, NULL); + sigemptyset(&sa.sa_mask); + sigaddset(&sa.sa_mask, SIGALRM); /* Initialisation now complete - start operating */ - /* - * notify_hosts() forks a process (if necessary) to do the - * SM_NOTIFY calls, which may be slow. - */ - notify_hosts(); + /* Notify hosts that need it */ + notify_handler(0); - svc_run(); /* Should never return */ - exit(1); + while (1) + svc_run(); /* Should never return */ + die(0); } -/* handle_sigchld ---------------------------------------------------------- */ +/* notify_handler ---------------------------------------------------------- */ /* - * Purpose: Catch SIGCHLD and collect process status + * Purpose: Catch SIGALRM and collect process status * Returns: Nothing. * Notes: No special action required, other than to collect the * process status and hence allow the child to die: @@ -154,19 +175,30 @@ main(argc, argv) * of SM_NOTIFY to other systems, so it is normal for the * children to exit when they have done their work. */ -static void -handle_sigchld(sig) +void +notify_handler(sig) int sig; { - int pid, status; - pid = wait4(-1, &status, WNOHANG, (struct rusage *) 0); - if (!pid) - syslog(LOG_ERR, "Phantom SIGCHLD??"); - else if (status) - syslog(LOG_ERR, "Child %d failed with status %d", pid, - WEXITSTATUS(status)); - else if (debug) - syslog(LOG_DEBUG, "Child %d exited OK", pid); + time_t now; + + NO_ALARM; + sa.sa_handler = SIG_IGN; + (void)sigaction(SIGALRM, &sa, NULL); + + now = time(NULL); + + (void) walk_db(notify_one, &now); + + if (walk_db(check_work, &now) == 0) { + /* + * No more work to be done. + */ + CLR_ALARM; + return; + } + sync_file(); + ALARM; + alarm(5); } /* sync_file --------------------------------------------------------------- */ @@ -177,72 +209,296 @@ handle_sigchld(sig) void sync_file() { - if (msync((caddr_t)status_info, 0, 0) < 0) - syslog(LOG_ERR, "msync() failed: %s", strerror(errno)); + DBT data; + + data.data = &status_info; + data.size = sizeof(status_info); + switch ((*db->put)(db, &undefkey, &data, 0)) { + case 0: + return; + case -1: + goto bad; + default: + abort(); + } + if ((*db->sync)(db, 0) == -1) { +bad: + syslog(LOG_ERR, "database corrupted %m"); + die(1); + } } +/* change_host -------------------------------------------------------------- */ +/* + * Purpose: Update/Create an entry for host + * Returns: Nothing + * Notes: + * + */ +void +change_host(hostname, hp) + char *hostname; + HostInfo *hp; +{ + DBT key, data; + char *ptr; + + for (ptr = hostname; *ptr; ptr++) + if (isupper((unsigned char) *ptr)) + *ptr = tolower((unsigned char) *ptr); + + key.data = hostname; + key.size = ptr - hostname + 1; + data.data = hp; + data.size = sizeof(*hp); + + switch ((*db->put)(db, &key, &data, 0)) { + case -1: + syslog(LOG_ERR, "database corrupted %m"); + die(1); + case 0: + return; + default: + abort(); + } +} + + /* find_host -------------------------------------------------------------- */ /* * Purpose: Find the entry in the status file for a given host - * Returns: Pointer to that entry in the mmap() region, or NULL. - * Notes: Also creates entries if requested. - * Failure to create also returns NULL. + * Returns: Copy of entry in hd, or NULL + * Notes: + * */ HostInfo * -find_host(hostname, create) +find_host(hostname, hp) char *hostname; - int create; -{ HostInfo *hp; - HostInfo *spare_slot = NULL; - HostInfo *result = NULL; - int i; +{ + DBT key, data; + char *ptr; - for (i = 0, hp = status_info->hosts; i < status_info->noOfHosts; - i++, hp++) { - if (!strncasecmp(hostname, hp->hostname, SM_MAXSTRLEN)) { - result = hp; + for (ptr = hostname; *ptr; ptr++) + if (isupper((unsigned char) *ptr)) + *ptr = tolower((unsigned char) *ptr); + + key.data = hostname; + key.size = ptr - hostname + 1; + switch ((*db->get)(db, &key, &data, 0)) { + case 0: + if (data.size != sizeof(*hp)) + goto bad; + return memcpy(hp, data.data, sizeof(*hp)); + case 1: + return NULL; + case -1: + goto bad; + default: + abort(); + } + +bad: + syslog(LOG_ERR, "Database corrupted %m"); + return NULL; +} + +/* walk_one ------------------------------------------------------------- */ +/* + * Purpose: Call the given function if the element is valid + * Returns: Nothing - exits on error + * Notes: + */ +static int +walk_one(fun, key, data, ptr) + int (*fun) __P((DBT *, DBT *, void *)); + DBT *key, *data; + void *ptr; +{ + if (key->size == undefkey.size && + memcmp(key->data, undefkey.data, key->size) == 0) + return 0; + if (data->size != sizeof(HostInfo)) { + syslog(LOG_ERR, "Bad data in database"); + die(1); + } + + return (*fun)(key, data, ptr); +} + +/* walk_db -------------------------------------------------------------- */ +/* + * Purpose: Iterate over all elements calling the given function + * Returns: -1 if function failed, 0 on success + * Notes: + */ +static int +walk_db(fun, ptr) + int (*fun) __P((DBT *, DBT *, void *)); + void *ptr; +{ + DBT key, data; + + switch ((*db->seq)(db, &key, &data, R_FIRST)) { + case -1: + goto bad; + case 1: + /* We should have at least the magic entry at this point */ + abort(); + case 0: + if (walk_one(fun, &key, &data, ptr) == -1) + return -1; + break; + default: + abort(); + } + + + for (;;) + switch ((*db->seq)(db, &key, &data, R_NEXT)) { + case -1: + goto bad; + case 1: + if (walk_one(fun, &key, &data, ptr) == -1) + return -1; break; + case 0: + return 0; + default: + abort(); } - if (!spare_slot && !hp->monList && !hp->notifyReqd) - spare_slot = hp; +bad: + syslog(LOG_ERR, "Corrupted database %m"); + die(1); +} + +/* reset_host ------------------------------------------------------------ */ +/* + * Purpose: Clean up existing hosts in file. + * Returns: Always success 0. + * Notes: Clean-up of existing file - monitored hosts will have a + * pointer to a list of clients, which refers to memory in + * the previous incarnation of the program and so are + * meaningless now. These pointers are zeroed and the fact + * that the host was previously monitored is recorded by + * setting the notifyReqd flag, which will in due course + * cause a SM_NOTIFY to be sent. + * + * Note that if we crash twice in quick succession, some hosts + * may already have notifyReqd set, where we didn't manage to + * notify them before the second crash occurred. + */ +static int +reset_host(key, data, ptr) + DBT *key, *data; + void *ptr; +{ + HostInfo *hi = data->data; + + if (hi->monList) { + hi->notifyReqd = *(time_t *) data; + hi->attempts = 0; + hi->monList = NULL; } + return 0; +} - /* Return if entry found, or if not asked to create one. */ - if (result || !create) - return (result); +/* check_work ------------------------------------------------------------ */ +/* + * Purpose: Check if there is work to be done. + * Returns: 0 if there is no work to be done -1 if there is. + * Notes: + */ +static int +check_work(key, data, ptr) + DBT *key, *data; + void *ptr; +{ + HostInfo *hi = data->data; - /* - * Now create an entry, using the spare slot if one was found or - * adding to the end of the list otherwise, extending file if req'd - */ - if (!spare_slot) { - off_t desired_size; - spare_slot = &status_info->hosts[status_info->noOfHosts]; - desired_size = ((char *)spare_slot - (char *)status_info) + - sizeof(HostInfo); + return hi->notifyReqd ? -1 : 0; +} - if (desired_size > status_file_len) { - /* Extend file by writing 1 byte of junk at the - * desired end pos */ - lseek(status_fd, desired_size - 1, SEEK_SET); - i = write(status_fd, &i, 1); - if (i < 1) { - syslog(LOG_ERR, "Unable to extend status file"); - return (NULL); - } - status_file_len = desired_size; +/* unmon_host ------------------------------------------------------------ */ +/* + * Purpose: Unmonitor a host + * Returns: 0 + * Notes: + */ +static int +unmon_host(key, data, ptr) + DBT *key, *data; + void *ptr; +{ + char *name = key->data; + HostInfo *hi = data->data; + + if (do_unmon(name, hi, ptr)) + change_host(name, hi); + return 0; +} + +/* notify_one ------------------------------------------------------------ */ +/* + * Purpose: Notify one host. + * Returns: 0 if success -1 on failure + * Notes: + */ +static int +notify_one(key, data, ptr) + DBT *key, *data; + void *ptr; +{ + time_t now = *(time_t *) ptr; + char *name = key->data; + HostInfo *hi = data->data; + + if (hi->notifyReqd == 0 || hi->notifyReqd > now) + return 0; + + if (notify_one_host(name)) { +give_up: + hi->notifyReqd = 0; + hi->attempts = 0; + switch ((*db->put)(db, key, data, 0)) { + case -1: + syslog(LOG_ERR, "Error storing %s (%m)", name); + case 0: + return 0; + + default: + abort(); } - status_info->noOfHosts++; } - /* - * Initialise the spare slot that has been found/created - * Note that we do not msync(), since the caller is presumed to be - * about to modify the entry further - */ - memset(spare_slot, 0, sizeof(HostInfo)); - strncpy(spare_slot->hostname, hostname, SM_MAXSTRLEN); - return (spare_slot); + else { + /* + * If one of the initial attempts fails, we wait + * for a while and have another go. This is necessary + * because when we have crashed, (eg. a power outage) + * it is quite possible that we won't be able to + * contact all monitored hosts immediately on restart, + * either because they crashed too and take longer + * to come up (in which case the notification isn't + * really required), or more importantly if some + * router etc. needed to reach the monitored host + * has not come back up yet. In this case, we will + * be a bit late in re-establishing locks (after the + * grace period) but that is the best we can do. We + * try 10 times at 5 sec intervals, 10 more times at + * 1 minute intervals, then 24 more times at hourly + * intervals, finally giving up altogether if the + * host hasn't come back to life after 24 hours. + */ + if (hi->attempts++ >= 44) + goto give_up; + else if (hi->attempts < 10) + hi->notifyReqd += 5; + else if (hi->attempts < 20) + hi->notifyReqd += 60; + else + hi->notifyReqd += 60 * 60; + return -1; + } } /* init_file -------------------------------------------------------------- */ @@ -257,89 +513,73 @@ find_host(hostname, create) * all hosts that had a monitor list, and incrementing * the state number to the next even value. */ -void +static void init_file(filename) char *filename; { - char buf[HEADER_LEN]; - int new_file = FALSE; - int i; + DBT data; - /* try to open existing file - if not present, create one */ - status_fd = open(filename, O_RDWR); - if ((status_fd < 0) && (errno == ENOENT)) { - status_fd = open(filename, O_RDWR | O_CREAT, 0644); - new_file = TRUE; - } - if (status_fd < 0) { - err(1, "unable to open status file %s", filename); - /* NOTREACHED */ + db = dbopen(filename, O_RDWR|O_CREAT|O_NDELAY|O_EXLOCK, 644, DB_HASH, + NULL); + if (db == NULL) + err(1, "Cannot open `%s'", filename); + + switch ((*db->get)(db, &undefkey, &data, 0)) { + case 1: + /* New database */ + (void)memset(&status_info, 0, sizeof(status_info)); + sync_file(); + return; + + case -1: + err(1, "error accessing database (%m)"); + case 0: + /* Existing database */ + if (data.size != sizeof(status_info)) + errx(1, "database corrupted %d != %d", + data.size, sizeof(status_info)); + break; + default: + abort(); } - /* - * File now open. mmap() it, with a generous size to allow for - * later growth, where we will extend the file but not re-map it. - */ - status_info = (FileLayout *)mmap(NULL, 0x1000000, - PROT_READ | PROT_WRITE, MAP_SHARED, status_fd, 0); - - if (status_info == (FileLayout *)(-1)) { - perror("rpc.statd"); - fprintf(stderr, "Unable to mmap() status file\n"); - } - status_file_len = lseek(status_fd, 0L, SEEK_END); - - /* - * If the file was not newly created, validate the contents, and if - * defective, re-create from scratch. - */ - if (!new_file) { - if ((status_file_len < HEADER_LEN) || (status_file_len < - (HEADER_LEN + sizeof(HostInfo) * status_info->noOfHosts))) { - fprintf(stderr, "rpc.statd: status file is corrupt\n"); - new_file = TRUE; - } - } - /* Initialisation of a new, empty file. */ - if (new_file) { - memset(buf, 0, sizeof(buf)); - lseek(status_fd, 0L, SEEK_SET); - write(status_fd, buf, HEADER_LEN); - status_file_len = HEADER_LEN; - } else { - /* - * Clean-up of existing file - monitored hosts will have a - * pointer to a list of clients, which refers to memory in - * the previous incarnation of the program and so are - * meaningless now. These pointers are zeroed and the fact - * that the host was previously monitored is recorded by - * setting the notifyReqd flag, which will in due course - * cause a SM_NOTIFY to be sent. - * - * Note that if we crash twice in quick succession, some hosts - * may already have notifyReqd set, where we didn't manage to - * notify them before the second crash occurred. - */ - for (i = 0; i < status_info->noOfHosts; i++) { - HostInfo *this_host = &status_info->hosts[i]; - - if (this_host->monList) { - this_host->notifyReqd = TRUE; - this_host->monList = NULL; - } - } - /* Select the next higher even number for the state counter */ - status_info->ourState = - (status_info->ourState + 2) & 0xfffffffe; - status_info->ourState++; /* XXX - ??? */ - } + reset_database(); + return; } -/* notify_one_host --------------------------------------------------------- */ +/* reset_database --------------------------------------------------------- */ /* - * Purpose: Perform SM_NOTIFY procedure at specified host - * Returns: TRUE if success, FALSE if failed. + * Purpose: Clears the statd database + * Returns: Nothing + * Notes: If this is not called on reset, it will leak memory. */ +void +reset_database() +{ + time_t now = time(NULL); + walk_db(reset_host, &now); + + /* Select the next higher even number for the state counter */ + status_info.ourState = + (status_info.ourState + 2) & 0xfffffffe; + status_info.ourState++; /* XXX - ??? */ + sync_file(); +} + +/* unmon_hosts --------------------------------------------------------- */ +/* + * Purpose: Unmonitor all the hosts + * Returns: Nothing + * Notes: + */ +void +unmon_hosts() +{ + time_t now = time(NULL); + walk_db(unmon_host, &now); + sync_file(); +} + static int notify_one_host(hostname) char *hostname; @@ -353,7 +593,7 @@ notify_one_host(hostname) gethostname(our_hostname, sizeof(our_hostname)); our_hostname[SM_MAXSTRLEN] = '\0'; arg.mon_name = our_hostname; - arg.state = status_info->ourState; + arg.state = status_info.ourState; if (debug) syslog(LOG_DEBUG, "Sending SM_NOTIFY to host %s from %s", @@ -376,81 +616,11 @@ notify_one_host(hostname) return (TRUE); } -/* notify_hosts ------------------------------------------------------------ */ -/* - * Purpose: Send SM_NOTIFY to all hosts marked as requiring it - * Returns: Nothing, immediately - forks a process to do the work. - * Notes: Does nothing if there are no monitored hosts. - * Called after all the initialisation has been done - - * logs to syslog. - */ -void -notify_hosts(void) + +static void +die(n) + int n; { - HostInfo *hp; - int i, attempts; - int work_to_do = FALSE; - pid_t pid; - - /* First check if there is in fact any work to do. */ - for (i = status_info->noOfHosts, hp = status_info->hosts; i; - i--, hp++) { - if (hp->notifyReqd) { - work_to_do = TRUE; - break; - } - } - - if (!work_to_do) - return; /* No work found */ - - pid = fork(); - if (pid == -1) { - syslog(LOG_ERR, "Unable to fork notify process - %s", - strerror(errno)); - return; - } - if (pid) - return; - - /* - * Here in the child process. We continue until all the hosts marked - * as requiring notification have been duly notified. - * If one of the initial attempts fails, we sleep for a while and - * have another go. This is necessary because when we have crashed, - * (eg. a power outage) it is quite possible that we won't be able to - * contact all monitored hosts immediately on restart, either because - * they crashed too and take longer to come up (in which case the - * notification isn't really required), or more importantly if some - * router etc. needed to reach the monitored host has not come back - * up yet. In this case, we will be a bit late in re-establishing - * locks (after the grace period) but that is the best we can do. - * We try 10 times at 5 sec intervals, 10 more times at 1 minute - * intervals, then 24 more times at hourly intervals, finally - * giving up altogether if the host hasn't come back to life after - * 24 hours. - */ - for (attempts = 0; attempts < 44; attempts++) { - work_to_do = FALSE; /* Unless anything fails */ - for (i = status_info->noOfHosts, hp = status_info->hosts; i > 0; - i--, hp++) { - if (hp->notifyReqd) { - if (notify_one_host(hp->hostname)) { - hp->notifyReqd = FALSE; - sync_file(); - } else - work_to_do = TRUE; - } - } - if (!work_to_do) - break; - if (attempts < 10) - sleep(5); - else - if (attempts < 20) - sleep(60); - else - sleep(60 * 60); - } - exit(0); + (*db->close)(db); + exit(n); } diff --git a/usr.sbin/rpc.statd/statd.h b/usr.sbin/rpc.statd/statd.h index 6bfc6960fee5..252b25dd39dd 100644 --- a/usr.sbin/rpc.statd/statd.h +++ b/usr.sbin/rpc.statd/statd.h @@ -1,4 +1,4 @@ -/* $NetBSD: statd.h,v 1.1 1997/03/10 06:28:32 scottr Exp $ */ +/* $NetBSD: statd.h,v 1.2 1997/10/21 20:38:19 christos Exp $ */ /* * Copyright (c) 1995 @@ -52,7 +52,7 @@ * * We handle this by keeping the list of monitored hosts in a file * (/var/statd.state) which is mmap()ed and whose format is described - * by the typedef FileLayout. The lists of client callbacks are chained + * by the typedef Header. The lists of client callbacks are chained * off this structure, but are held in normal memory and so will be * lost after a re-boot. Hence the actual values of MonList * pointers * in the copy on disc have no significance, but their NULL/non-NULL @@ -70,9 +70,9 @@ typedef struct MonList_s { } MonList; typedef struct { - char hostname[SM_MAXSTRLEN + 1]; /* Name of monitored host */ - int notifyReqd; /* TRUE if we've crashed and not yet + int notifyReqd; /* Time of our next attempt or 0 informed the monitored host */ + int attempts; /* Number of attempts we tried so far */ MonList *monList; /* List of clients to inform if we hear that the monitored host has crashed, NULL if no longer monitored */ @@ -82,23 +82,39 @@ typedef struct { /* Overall file layout. */ typedef struct { + int magic; /* Zero magic */ int ourState; /* State number as defined in statd protocol */ - int noOfHosts; /* Number of elements in hosts[] */ - char reserved[248]; /* Reserved for future use */ - HostInfo hosts[1]; /* vector of monitored hosts */ -} FileLayout; -#define HEADER_LEN (sizeof(FileLayout) - sizeof(HostInfo)) +} Header; /* ------------------------------------------------------------------------- */ /* Global variables */ -extern FileLayout *status_info; /* The mmap()ed status file */ extern int debug; /* = 1 to enable diagnostics to syslog */ +extern struct sigaction sa; +extern Header status_info; /* Function prototypes */ -extern HostInfo *find_host __P((char *hostname, int create)); -extern void init_file __P((char *filename)); -extern void notify_hosts __P((void)); -extern void sync_file __P((void)); +/* stat_proc.c */ +struct sm_stat_res *sm_stat_1_svc __P((sm_name *, struct svc_req *)); +struct sm_stat_res *sm_mon_1_svc __P((mon *, struct svc_req *)); +struct sm_stat *sm_unmon_1_svc __P((mon_id *, struct svc_req *)); +struct sm_stat *sm_unmon_all_1_svc __P((my_id *, struct svc_req *)); +void *sm_simu_crash_1_svc __P((void *, struct svc_req *)); +void *sm_notify_1_svc __P((stat_chge *, struct svc_req *)); +int do_unmon __P((char *, HostInfo *, void *)); + +/* statd.c */ +void notify_handler __P((int)); +void sync_file __P((void)); +void unmon_hosts __P((void)); +void change_host __P((char *, HostInfo *)); +HostInfo *find_host __P((char *, HostInfo *)); +void reset_database __P((void)); + +void sm_prog_1 __P((struct svc_req *, SVCXPRT *)); + +#define NO_ALARM sa.sa_handler == SIG_DFL ? 0 : (sa.sa_handler = SIG_IGN, sigaction(SIGALRM, &sa, NULL)) +#define ALARM sa.sa_handler == SIG_DFL ? 0 : (sa.sa_handler = notify_handler, sigaction(SIGALRM, &sa, NULL)) +#define CLR_ALARM sa.sa_handler == SIG_DFL ? 0 : (sa.sa_handler = SIG_DFL, sigaction(SIGALRM, &sa, NULL))