Use a db(3) hash database instead of shared memory. Don't fork() to service
requests in the database when restarting.
This commit is contained in:
parent
890b485672
commit
1175f55b11
|
@ -1,4 +1,4 @@
|
|||
/* $NetBSD: stat_proc.c,v 1.3 1997/10/17 16:12:48 lukem Exp $ */
|
||||
/* $NetBSD: stat_proc.c,v 1.4 1997/10/21 20:38:03 christos Exp $ */
|
||||
|
||||
/*
|
||||
* Copyright (c) 1995
|
||||
|
@ -35,7 +35,7 @@
|
|||
|
||||
#include <sys/cdefs.h>
|
||||
#ifndef lint
|
||||
__RCSID("$NetBSD: stat_proc.c,v 1.3 1997/10/17 16:12:48 lukem Exp $");
|
||||
__RCSID("$NetBSD: stat_proc.c,v 1.4 1997/10/21 20:38:03 christos Exp $");
|
||||
#endif
|
||||
|
||||
#include <errno.h>
|
||||
|
@ -44,14 +44,13 @@ __RCSID("$NetBSD: stat_proc.c,v 1.3 1997/10/17 16:12:48 lukem Exp $");
|
|||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <syslog.h>
|
||||
#include <signal.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <rpc/rpc.h>
|
||||
|
||||
#include "statd.h"
|
||||
|
||||
static int do_unmon __P((HostInfo *, my_id *));
|
||||
|
||||
/* sm_stat_1 --------------------------------------------------------------- */
|
||||
/*
|
||||
* Purpose: RPC call to enquire if a host can be monitored
|
||||
|
@ -65,6 +64,7 @@ sm_stat_1_svc(arg, req)
|
|||
{
|
||||
static sm_stat_res res;
|
||||
|
||||
NO_ALARM;
|
||||
if (debug)
|
||||
syslog(LOG_DEBUG, "stat called for host %s", arg->mon_name);
|
||||
|
||||
|
@ -76,7 +76,8 @@ sm_stat_1_svc(arg, req)
|
|||
res.res_stat = stat_fail;
|
||||
}
|
||||
|
||||
res.state = status_info->ourState;
|
||||
res.state = status_info.ourState;
|
||||
ALARM;
|
||||
return (&res);
|
||||
}
|
||||
|
||||
|
@ -94,9 +95,10 @@ sm_mon_1_svc(arg, req)
|
|||
struct svc_req *req;
|
||||
{
|
||||
static sm_stat_res res;
|
||||
HostInfo *hp;
|
||||
HostInfo *hp, h;
|
||||
MonList *lp;
|
||||
|
||||
NO_ALARM;
|
||||
if (debug) {
|
||||
syslog(LOG_DEBUG, "monitor request for host %s",
|
||||
arg->mon_id.mon_name);
|
||||
|
@ -105,35 +107,40 @@ sm_mon_1_svc(arg, req)
|
|||
arg->mon_id.my_id.my_vers, arg->mon_id.my_id.my_proc);
|
||||
}
|
||||
res.res_stat = stat_fail; /* Assume fail until set otherwise */
|
||||
res.state = status_info->ourState;
|
||||
res.state = status_info.ourState;
|
||||
|
||||
/*
|
||||
* Find existing host entry, or create one if not found. If
|
||||
* find_host() fails, it will have logged the error already.
|
||||
*/
|
||||
if (!gethostbyname(arg->mon_id.mon_name))
|
||||
if (!gethostbyname(arg->mon_id.mon_name)) {
|
||||
syslog(LOG_ERR, "Invalid hostname to sm_mon: %s",
|
||||
arg->mon_id.mon_name);
|
||||
else if ((hp = find_host(arg->mon_id.mon_name, TRUE)) != NULL) {
|
||||
lp = (MonList *)malloc(sizeof(MonList));
|
||||
if (!lp)
|
||||
syslog(LOG_ERR, "Out of memory");
|
||||
else {
|
||||
strncpy(lp->notifyHost, arg->mon_id.my_id.my_name,
|
||||
SM_MAXSTRLEN);
|
||||
lp->notifyProg = arg->mon_id.my_id.my_prog;
|
||||
lp->notifyVers = arg->mon_id.my_id.my_vers;
|
||||
lp->notifyProc = arg->mon_id.my_id.my_proc;
|
||||
memcpy(lp->notifyData, arg->priv,
|
||||
sizeof(lp->notifyData));
|
||||
|
||||
lp->next = hp->monList;
|
||||
hp->monList = lp;
|
||||
sync_file();
|
||||
|
||||
res.res_stat = stat_succ; /* Report success */
|
||||
}
|
||||
return &res;
|
||||
}
|
||||
|
||||
if ((hp = find_host(arg->mon_id.mon_name, &h)) == NULL)
|
||||
memset(hp = &h, 0, sizeof(h));
|
||||
|
||||
lp = (MonList *)malloc(sizeof(MonList));
|
||||
if (!lp)
|
||||
syslog(LOG_ERR, "Out of memory");
|
||||
else {
|
||||
strncpy(lp->notifyHost, arg->mon_id.my_id.my_name,
|
||||
SM_MAXSTRLEN);
|
||||
lp->notifyProg = arg->mon_id.my_id.my_prog;
|
||||
lp->notifyVers = arg->mon_id.my_id.my_vers;
|
||||
lp->notifyProc = arg->mon_id.my_id.my_proc;
|
||||
memcpy(lp->notifyData, arg->priv,
|
||||
sizeof(lp->notifyData));
|
||||
|
||||
lp->next = hp->monList;
|
||||
hp->monList = lp;
|
||||
change_host(arg->mon_id.mon_name, hp);
|
||||
sync_file();
|
||||
res.res_stat = stat_succ; /* Report success */
|
||||
}
|
||||
ALARM;
|
||||
return (&res);
|
||||
}
|
||||
|
||||
|
@ -145,11 +152,13 @@ sm_mon_1_svc(arg, req)
|
|||
* In the unlikely event of more than one identical monitor
|
||||
* request, all are removed.
|
||||
*/
|
||||
static int
|
||||
do_unmon(hp, idp)
|
||||
int
|
||||
do_unmon(name, hp, ptr)
|
||||
char *name;
|
||||
HostInfo *hp;
|
||||
my_id *idp;
|
||||
void *ptr;
|
||||
{
|
||||
my_id *idp = ptr;
|
||||
MonList *lp, *next;
|
||||
MonList *last = NULL;
|
||||
int result = FALSE;
|
||||
|
@ -190,8 +199,9 @@ sm_unmon_1_svc(arg, req)
|
|||
struct svc_req *req;
|
||||
{
|
||||
static sm_stat res;
|
||||
HostInfo *hp;
|
||||
HostInfo *hp, h;
|
||||
|
||||
NO_ALARM;
|
||||
if (debug) {
|
||||
syslog(LOG_DEBUG, "un-monitor request for host %s",
|
||||
arg->mon_name);
|
||||
|
@ -199,9 +209,11 @@ sm_unmon_1_svc(arg, req)
|
|||
arg->my_id.my_name, arg->my_id.my_prog,
|
||||
arg->my_id.my_vers, arg->my_id.my_proc);
|
||||
}
|
||||
if ((hp = find_host(arg->mon_name, FALSE)) != NULL) {
|
||||
if (do_unmon(hp, &arg->my_id))
|
||||
if ((hp = find_host(arg->mon_name, &h)) != NULL) {
|
||||
if (do_unmon(arg->mon_name, hp, &arg->my_id)) {
|
||||
change_host(arg->mon_name, hp);
|
||||
sync_file();
|
||||
}
|
||||
else
|
||||
syslog(LOG_ERR,
|
||||
"unmon request from %s, no matching monitor",
|
||||
|
@ -210,7 +222,8 @@ sm_unmon_1_svc(arg, req)
|
|||
syslog(LOG_ERR, "unmon request from %s for unknown host %s",
|
||||
arg->my_id.my_name, arg->mon_name);
|
||||
|
||||
res.state = status_info->ourState;
|
||||
res.state = status_info.ourState;
|
||||
ALARM;
|
||||
|
||||
return (&res);
|
||||
}
|
||||
|
@ -228,21 +241,19 @@ sm_unmon_all_1_svc(arg, req)
|
|||
struct svc_req *req;
|
||||
{
|
||||
static sm_stat res;
|
||||
HostInfo *hp;
|
||||
int i;
|
||||
|
||||
NO_ALARM;
|
||||
if (debug) {
|
||||
syslog(LOG_DEBUG,
|
||||
"unmon_all for host: %s prog: %d ver: %d proc: %d",
|
||||
arg->my_name, arg->my_prog, arg->my_vers, arg->my_proc);
|
||||
}
|
||||
|
||||
for (i = status_info->noOfHosts, hp = status_info->hosts; i; i--, hp++)
|
||||
do_unmon(hp, arg);
|
||||
|
||||
unmon_hosts();
|
||||
sync_file();
|
||||
|
||||
res.state = status_info->ourState;
|
||||
res.state = status_info.ourState;
|
||||
ALARM;
|
||||
|
||||
return (&res);
|
||||
}
|
||||
|
@ -266,30 +277,14 @@ sm_simu_crash_1_svc(v, req)
|
|||
struct svc_req *req;
|
||||
{
|
||||
static char dummy;
|
||||
int work_to_do;
|
||||
HostInfo *hp;
|
||||
int i;
|
||||
|
||||
work_to_do = 0;
|
||||
NO_ALARM;
|
||||
if (debug)
|
||||
syslog(LOG_DEBUG, "simu_crash called!!");
|
||||
|
||||
/*
|
||||
* Simulate crash by setting notify-required flag on all monitored
|
||||
* hosts, and incrementing our status number. notify_hosts() is
|
||||
* then called to fork a process to do the notifications.
|
||||
*/
|
||||
for (i = status_info->noOfHosts, hp = status_info->hosts; i > 0;
|
||||
i--, hp++) {
|
||||
if (hp->monList) {
|
||||
work_to_do = TRUE;
|
||||
hp->notifyReqd = TRUE;
|
||||
}
|
||||
}
|
||||
status_info->ourState += 2; /* always even numbers if not crashed */
|
||||
|
||||
if (work_to_do)
|
||||
notify_hosts();
|
||||
reset_database();
|
||||
ALARM;
|
||||
notify_handler(0);
|
||||
|
||||
return (&dummy);
|
||||
}
|
||||
|
@ -319,14 +314,14 @@ sm_notify_1_svc(arg, req)
|
|||
static char dummy;
|
||||
status tx_arg; /* arg sent to callback procedure */
|
||||
MonList *lp;
|
||||
HostInfo *hp;
|
||||
HostInfo *hp, h;
|
||||
pid_t pid;
|
||||
|
||||
if (debug)
|
||||
syslog(LOG_DEBUG, "notify from host %s, new state %d",
|
||||
arg->mon_name, arg->state);
|
||||
|
||||
hp = find_host(arg->mon_name, FALSE);
|
||||
hp = find_host(arg->mon_name, &h);
|
||||
if (!hp) {
|
||||
/* Never heard of this host - why is it notifying us? */
|
||||
syslog(LOG_ERR, "Unsolicited notification from host %s",
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
/* $NetBSD: statd.c,v 1.8 1997/10/21 13:33:23 is Exp $ */
|
||||
/* $NetBSD: statd.c,v 1.9 1997/10/21 20:38:11 christos Exp $ */
|
||||
|
||||
/*
|
||||
* Copyright (c) 1997 Christos Zoulas. All rights reserved.
|
||||
* Copyright (c) 1995
|
||||
* A.R. Gordon (andrew.gordon@net-tel.co.uk). All rights reserved.
|
||||
*
|
||||
|
@ -15,6 +16,7 @@
|
|||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed for the FreeBSD project
|
||||
* This product includes software developed by Christos Zoulas.
|
||||
* 4. Neither the name of the author nor the names of any co-contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
|
@ -35,7 +37,7 @@
|
|||
|
||||
#include <sys/cdefs.h>
|
||||
#ifndef lint
|
||||
__RCSID("$NetBSD: statd.c,v 1.8 1997/10/21 13:33:23 is Exp $");
|
||||
__RCSID("$NetBSD: statd.c,v 1.9 1997/10/21 20:38:11 christos Exp $");
|
||||
#endif
|
||||
|
||||
|
||||
|
@ -43,11 +45,8 @@ __RCSID("$NetBSD: statd.c,v 1.8 1997/10/21 13:33:23 is Exp $");
|
|||
/* file was generated by running rpcgen /usr/include/rpcsvc/sm_inter.x */
|
||||
/* The actual program logic is in the file procs.c */
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/wait.h>
|
||||
|
||||
#include <err.h>
|
||||
#include <ctype.h>
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <signal.h>
|
||||
|
@ -56,21 +55,39 @@ __RCSID("$NetBSD: statd.c,v 1.8 1997/10/21 13:33:23 is Exp $");
|
|||
#include <string.h>
|
||||
#include <syslog.h>
|
||||
#include <unistd.h>
|
||||
#include <db.h>
|
||||
|
||||
#include <rpc/rpc.h>
|
||||
|
||||
#include "statd.h"
|
||||
|
||||
struct sigaction sa;
|
||||
int debug = 0; /* Controls syslog() for debug msgs */
|
||||
int _rpcsvcdirty = 0; /* XXX ??? */
|
||||
FileLayout *status_info; /* Pointer to mmap()ed status file */
|
||||
static int status_fd; /* File descriptor for the open file */
|
||||
static off_t status_file_len; /* Current on-disc length of file */
|
||||
static DB *db; /* Database file */
|
||||
|
||||
int main __P((int, char **));
|
||||
static void handle_sigchld __P((int));
|
||||
static int notify_one_host __P((char *));
|
||||
extern void sm_prog_1 __P((struct svc_req *, SVCXPRT *));
|
||||
Header status_info;
|
||||
|
||||
static char undefdata[] = "\0\1\2\3\4\5\6\7";
|
||||
static DBT undefkey = {
|
||||
undefdata,
|
||||
sizeof(undefdata)
|
||||
};
|
||||
extern char *__progname;
|
||||
|
||||
|
||||
/* statd.c */
|
||||
static int walk_one __P((int (*fun )__P ((DBT *, DBT *, void *)), DBT *, DBT *, void *));
|
||||
static int walk_db __P((int (*fun )__P ((DBT *, DBT *, void *)), void *));
|
||||
static int reset_host __P((DBT *, DBT *, void *));
|
||||
static int check_work __P((DBT *, DBT *, void *));
|
||||
static int unmon_host __P((DBT *, DBT *, void *));
|
||||
static int notify_one __P((DBT *, DBT *, void *));
|
||||
static void init_file __P((char *));
|
||||
static int notify_one_host __P((char *));
|
||||
static void die __P((int)) __attribute__((__noreturn__));
|
||||
|
||||
int main __P((int, char **));
|
||||
|
||||
int
|
||||
main(argc, argv)
|
||||
|
@ -78,7 +95,6 @@ main(argc, argv)
|
|||
char **argv;
|
||||
{
|
||||
SVCXPRT *transp;
|
||||
struct sigaction sa;
|
||||
int ch;
|
||||
|
||||
while ((ch = getopt(argc, argv, "d")) != (-1)) {
|
||||
|
@ -88,7 +104,7 @@ main(argc, argv)
|
|||
break;
|
||||
default:
|
||||
case '?':
|
||||
errx(1, "usage: rpc.statd [-d]");
|
||||
(void) fprintf(stderr, "Usage: %s [-d]", __progname);
|
||||
/* NOTREACHED */
|
||||
}
|
||||
}
|
||||
|
@ -118,35 +134,40 @@ main(argc, argv)
|
|||
* Note that it is NOT sensible to run this program from inetd - the
|
||||
* protocol assumes that it will run immediately at boot time.
|
||||
*/
|
||||
daemon(0, 0);
|
||||
if (!debug)
|
||||
daemon(0, 0);
|
||||
openlog("rpc.statd", 0, LOG_DAEMON);
|
||||
if (debug)
|
||||
syslog(LOG_INFO, "Starting - debug enabled");
|
||||
else
|
||||
syslog(LOG_INFO, "Starting");
|
||||
|
||||
/* Install signal handler to collect exit status of child processes */
|
||||
sa.sa_handler = handle_sigchld;
|
||||
sa.sa_handler = die;
|
||||
sa.sa_flags = 0;
|
||||
sigemptyset(&sa.sa_mask);
|
||||
sigaddset(&sa.sa_mask, SIGCHLD);
|
||||
(void)sigaction(SIGTERM, &sa, NULL);
|
||||
(void)sigaction(SIGQUIT, &sa, NULL);
|
||||
(void)sigaction(SIGHUP, &sa, NULL);
|
||||
(void)sigaction(SIGINT, &sa, NULL);
|
||||
|
||||
sa.sa_handler = SIG_IGN;
|
||||
sa.sa_flags = SA_RESTART;
|
||||
sigaction(SIGCHLD, &sa, NULL);
|
||||
sigemptyset(&sa.sa_mask);
|
||||
sigaddset(&sa.sa_mask, SIGALRM);
|
||||
|
||||
/* Initialisation now complete - start operating */
|
||||
|
||||
/*
|
||||
* notify_hosts() forks a process (if necessary) to do the
|
||||
* SM_NOTIFY calls, which may be slow.
|
||||
*/
|
||||
notify_hosts();
|
||||
/* Notify hosts that need it */
|
||||
notify_handler(0);
|
||||
|
||||
svc_run(); /* Should never return */
|
||||
exit(1);
|
||||
while (1)
|
||||
svc_run(); /* Should never return */
|
||||
die(0);
|
||||
}
|
||||
|
||||
/* handle_sigchld ---------------------------------------------------------- */
|
||||
/* notify_handler ---------------------------------------------------------- */
|
||||
/*
|
||||
* Purpose: Catch SIGCHLD and collect process status
|
||||
* Purpose: Catch SIGALRM and collect process status
|
||||
* Returns: Nothing.
|
||||
* Notes: No special action required, other than to collect the
|
||||
* process status and hence allow the child to die:
|
||||
|
@ -154,19 +175,30 @@ main(argc, argv)
|
|||
* of SM_NOTIFY to other systems, so it is normal for the
|
||||
* children to exit when they have done their work.
|
||||
*/
|
||||
static void
|
||||
handle_sigchld(sig)
|
||||
void
|
||||
notify_handler(sig)
|
||||
int sig;
|
||||
{
|
||||
int pid, status;
|
||||
pid = wait4(-1, &status, WNOHANG, (struct rusage *) 0);
|
||||
if (!pid)
|
||||
syslog(LOG_ERR, "Phantom SIGCHLD??");
|
||||
else if (status)
|
||||
syslog(LOG_ERR, "Child %d failed with status %d", pid,
|
||||
WEXITSTATUS(status));
|
||||
else if (debug)
|
||||
syslog(LOG_DEBUG, "Child %d exited OK", pid);
|
||||
time_t now;
|
||||
|
||||
NO_ALARM;
|
||||
sa.sa_handler = SIG_IGN;
|
||||
(void)sigaction(SIGALRM, &sa, NULL);
|
||||
|
||||
now = time(NULL);
|
||||
|
||||
(void) walk_db(notify_one, &now);
|
||||
|
||||
if (walk_db(check_work, &now) == 0) {
|
||||
/*
|
||||
* No more work to be done.
|
||||
*/
|
||||
CLR_ALARM;
|
||||
return;
|
||||
}
|
||||
sync_file();
|
||||
ALARM;
|
||||
alarm(5);
|
||||
}
|
||||
|
||||
/* sync_file --------------------------------------------------------------- */
|
||||
|
@ -177,72 +209,296 @@ handle_sigchld(sig)
|
|||
void
|
||||
sync_file()
|
||||
{
|
||||
if (msync((caddr_t)status_info, 0, 0) < 0)
|
||||
syslog(LOG_ERR, "msync() failed: %s", strerror(errno));
|
||||
DBT data;
|
||||
|
||||
data.data = &status_info;
|
||||
data.size = sizeof(status_info);
|
||||
switch ((*db->put)(db, &undefkey, &data, 0)) {
|
||||
case 0:
|
||||
return;
|
||||
case -1:
|
||||
goto bad;
|
||||
default:
|
||||
abort();
|
||||
}
|
||||
if ((*db->sync)(db, 0) == -1) {
|
||||
bad:
|
||||
syslog(LOG_ERR, "database corrupted %m");
|
||||
die(1);
|
||||
}
|
||||
}
|
||||
|
||||
/* change_host -------------------------------------------------------------- */
|
||||
/*
|
||||
* Purpose: Update/Create an entry for host
|
||||
* Returns: Nothing
|
||||
* Notes:
|
||||
*
|
||||
*/
|
||||
void
|
||||
change_host(hostname, hp)
|
||||
char *hostname;
|
||||
HostInfo *hp;
|
||||
{
|
||||
DBT key, data;
|
||||
char *ptr;
|
||||
|
||||
for (ptr = hostname; *ptr; ptr++)
|
||||
if (isupper((unsigned char) *ptr))
|
||||
*ptr = tolower((unsigned char) *ptr);
|
||||
|
||||
key.data = hostname;
|
||||
key.size = ptr - hostname + 1;
|
||||
data.data = hp;
|
||||
data.size = sizeof(*hp);
|
||||
|
||||
switch ((*db->put)(db, &key, &data, 0)) {
|
||||
case -1:
|
||||
syslog(LOG_ERR, "database corrupted %m");
|
||||
die(1);
|
||||
case 0:
|
||||
return;
|
||||
default:
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* find_host -------------------------------------------------------------- */
|
||||
/*
|
||||
* Purpose: Find the entry in the status file for a given host
|
||||
* Returns: Pointer to that entry in the mmap() region, or NULL.
|
||||
* Notes: Also creates entries if requested.
|
||||
* Failure to create also returns NULL.
|
||||
* Returns: Copy of entry in hd, or NULL
|
||||
* Notes:
|
||||
*
|
||||
*/
|
||||
HostInfo *
|
||||
find_host(hostname, create)
|
||||
find_host(hostname, hp)
|
||||
char *hostname;
|
||||
int create;
|
||||
{
|
||||
HostInfo *hp;
|
||||
HostInfo *spare_slot = NULL;
|
||||
HostInfo *result = NULL;
|
||||
int i;
|
||||
{
|
||||
DBT key, data;
|
||||
char *ptr;
|
||||
|
||||
for (i = 0, hp = status_info->hosts; i < status_info->noOfHosts;
|
||||
i++, hp++) {
|
||||
if (!strncasecmp(hostname, hp->hostname, SM_MAXSTRLEN)) {
|
||||
result = hp;
|
||||
for (ptr = hostname; *ptr; ptr++)
|
||||
if (isupper((unsigned char) *ptr))
|
||||
*ptr = tolower((unsigned char) *ptr);
|
||||
|
||||
key.data = hostname;
|
||||
key.size = ptr - hostname + 1;
|
||||
switch ((*db->get)(db, &key, &data, 0)) {
|
||||
case 0:
|
||||
if (data.size != sizeof(*hp))
|
||||
goto bad;
|
||||
return memcpy(hp, data.data, sizeof(*hp));
|
||||
case 1:
|
||||
return NULL;
|
||||
case -1:
|
||||
goto bad;
|
||||
default:
|
||||
abort();
|
||||
}
|
||||
|
||||
bad:
|
||||
syslog(LOG_ERR, "Database corrupted %m");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* walk_one ------------------------------------------------------------- */
|
||||
/*
|
||||
* Purpose: Call the given function if the element is valid
|
||||
* Returns: Nothing - exits on error
|
||||
* Notes:
|
||||
*/
|
||||
static int
|
||||
walk_one(fun, key, data, ptr)
|
||||
int (*fun) __P((DBT *, DBT *, void *));
|
||||
DBT *key, *data;
|
||||
void *ptr;
|
||||
{
|
||||
if (key->size == undefkey.size &&
|
||||
memcmp(key->data, undefkey.data, key->size) == 0)
|
||||
return 0;
|
||||
if (data->size != sizeof(HostInfo)) {
|
||||
syslog(LOG_ERR, "Bad data in database");
|
||||
die(1);
|
||||
}
|
||||
|
||||
return (*fun)(key, data, ptr);
|
||||
}
|
||||
|
||||
/* walk_db -------------------------------------------------------------- */
|
||||
/*
|
||||
* Purpose: Iterate over all elements calling the given function
|
||||
* Returns: -1 if function failed, 0 on success
|
||||
* Notes:
|
||||
*/
|
||||
static int
|
||||
walk_db(fun, ptr)
|
||||
int (*fun) __P((DBT *, DBT *, void *));
|
||||
void *ptr;
|
||||
{
|
||||
DBT key, data;
|
||||
|
||||
switch ((*db->seq)(db, &key, &data, R_FIRST)) {
|
||||
case -1:
|
||||
goto bad;
|
||||
case 1:
|
||||
/* We should have at least the magic entry at this point */
|
||||
abort();
|
||||
case 0:
|
||||
if (walk_one(fun, &key, &data, ptr) == -1)
|
||||
return -1;
|
||||
break;
|
||||
default:
|
||||
abort();
|
||||
}
|
||||
|
||||
|
||||
for (;;)
|
||||
switch ((*db->seq)(db, &key, &data, R_NEXT)) {
|
||||
case -1:
|
||||
goto bad;
|
||||
case 1:
|
||||
if (walk_one(fun, &key, &data, ptr) == -1)
|
||||
return -1;
|
||||
break;
|
||||
case 0:
|
||||
return 0;
|
||||
default:
|
||||
abort();
|
||||
}
|
||||
if (!spare_slot && !hp->monList && !hp->notifyReqd)
|
||||
spare_slot = hp;
|
||||
bad:
|
||||
syslog(LOG_ERR, "Corrupted database %m");
|
||||
die(1);
|
||||
}
|
||||
|
||||
/* reset_host ------------------------------------------------------------ */
|
||||
/*
|
||||
* Purpose: Clean up existing hosts in file.
|
||||
* Returns: Always success 0.
|
||||
* Notes: Clean-up of existing file - monitored hosts will have a
|
||||
* pointer to a list of clients, which refers to memory in
|
||||
* the previous incarnation of the program and so are
|
||||
* meaningless now. These pointers are zeroed and the fact
|
||||
* that the host was previously monitored is recorded by
|
||||
* setting the notifyReqd flag, which will in due course
|
||||
* cause a SM_NOTIFY to be sent.
|
||||
*
|
||||
* Note that if we crash twice in quick succession, some hosts
|
||||
* may already have notifyReqd set, where we didn't manage to
|
||||
* notify them before the second crash occurred.
|
||||
*/
|
||||
static int
|
||||
reset_host(key, data, ptr)
|
||||
DBT *key, *data;
|
||||
void *ptr;
|
||||
{
|
||||
HostInfo *hi = data->data;
|
||||
|
||||
if (hi->monList) {
|
||||
hi->notifyReqd = *(time_t *) data;
|
||||
hi->attempts = 0;
|
||||
hi->monList = NULL;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Return if entry found, or if not asked to create one. */
|
||||
if (result || !create)
|
||||
return (result);
|
||||
/* check_work ------------------------------------------------------------ */
|
||||
/*
|
||||
* Purpose: Check if there is work to be done.
|
||||
* Returns: 0 if there is no work to be done -1 if there is.
|
||||
* Notes:
|
||||
*/
|
||||
static int
|
||||
check_work(key, data, ptr)
|
||||
DBT *key, *data;
|
||||
void *ptr;
|
||||
{
|
||||
HostInfo *hi = data->data;
|
||||
|
||||
/*
|
||||
* Now create an entry, using the spare slot if one was found or
|
||||
* adding to the end of the list otherwise, extending file if req'd
|
||||
*/
|
||||
if (!spare_slot) {
|
||||
off_t desired_size;
|
||||
spare_slot = &status_info->hosts[status_info->noOfHosts];
|
||||
desired_size = ((char *)spare_slot - (char *)status_info) +
|
||||
sizeof(HostInfo);
|
||||
return hi->notifyReqd ? -1 : 0;
|
||||
}
|
||||
|
||||
if (desired_size > status_file_len) {
|
||||
/* Extend file by writing 1 byte of junk at the
|
||||
* desired end pos */
|
||||
lseek(status_fd, desired_size - 1, SEEK_SET);
|
||||
i = write(status_fd, &i, 1);
|
||||
if (i < 1) {
|
||||
syslog(LOG_ERR, "Unable to extend status file");
|
||||
return (NULL);
|
||||
}
|
||||
status_file_len = desired_size;
|
||||
/* unmon_host ------------------------------------------------------------ */
|
||||
/*
|
||||
* Purpose: Unmonitor a host
|
||||
* Returns: 0
|
||||
* Notes:
|
||||
*/
|
||||
static int
|
||||
unmon_host(key, data, ptr)
|
||||
DBT *key, *data;
|
||||
void *ptr;
|
||||
{
|
||||
char *name = key->data;
|
||||
HostInfo *hi = data->data;
|
||||
|
||||
if (do_unmon(name, hi, ptr))
|
||||
change_host(name, hi);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* notify_one ------------------------------------------------------------ */
|
||||
/*
|
||||
* Purpose: Notify one host.
|
||||
* Returns: 0 if success -1 on failure
|
||||
* Notes:
|
||||
*/
|
||||
static int
|
||||
notify_one(key, data, ptr)
|
||||
DBT *key, *data;
|
||||
void *ptr;
|
||||
{
|
||||
time_t now = *(time_t *) ptr;
|
||||
char *name = key->data;
|
||||
HostInfo *hi = data->data;
|
||||
|
||||
if (hi->notifyReqd == 0 || hi->notifyReqd > now)
|
||||
return 0;
|
||||
|
||||
if (notify_one_host(name)) {
|
||||
give_up:
|
||||
hi->notifyReqd = 0;
|
||||
hi->attempts = 0;
|
||||
switch ((*db->put)(db, key, data, 0)) {
|
||||
case -1:
|
||||
syslog(LOG_ERR, "Error storing %s (%m)", name);
|
||||
case 0:
|
||||
return 0;
|
||||
|
||||
default:
|
||||
abort();
|
||||
}
|
||||
status_info->noOfHosts++;
|
||||
}
|
||||
/*
|
||||
* Initialise the spare slot that has been found/created
|
||||
* Note that we do not msync(), since the caller is presumed to be
|
||||
* about to modify the entry further
|
||||
*/
|
||||
memset(spare_slot, 0, sizeof(HostInfo));
|
||||
strncpy(spare_slot->hostname, hostname, SM_MAXSTRLEN);
|
||||
return (spare_slot);
|
||||
else {
|
||||
/*
|
||||
* If one of the initial attempts fails, we wait
|
||||
* for a while and have another go. This is necessary
|
||||
* because when we have crashed, (eg. a power outage)
|
||||
* it is quite possible that we won't be able to
|
||||
* contact all monitored hosts immediately on restart,
|
||||
* either because they crashed too and take longer
|
||||
* to come up (in which case the notification isn't
|
||||
* really required), or more importantly if some
|
||||
* router etc. needed to reach the monitored host
|
||||
* has not come back up yet. In this case, we will
|
||||
* be a bit late in re-establishing locks (after the
|
||||
* grace period) but that is the best we can do. We
|
||||
* try 10 times at 5 sec intervals, 10 more times at
|
||||
* 1 minute intervals, then 24 more times at hourly
|
||||
* intervals, finally giving up altogether if the
|
||||
* host hasn't come back to life after 24 hours.
|
||||
*/
|
||||
if (hi->attempts++ >= 44)
|
||||
goto give_up;
|
||||
else if (hi->attempts < 10)
|
||||
hi->notifyReqd += 5;
|
||||
else if (hi->attempts < 20)
|
||||
hi->notifyReqd += 60;
|
||||
else
|
||||
hi->notifyReqd += 60 * 60;
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/* init_file -------------------------------------------------------------- */
|
||||
|
@ -257,89 +513,73 @@ find_host(hostname, create)
|
|||
* all hosts that had a monitor list, and incrementing
|
||||
* the state number to the next even value.
|
||||
*/
|
||||
void
|
||||
static void
|
||||
init_file(filename)
|
||||
char *filename;
|
||||
{
|
||||
char buf[HEADER_LEN];
|
||||
int new_file = FALSE;
|
||||
int i;
|
||||
DBT data;
|
||||
|
||||
/* try to open existing file - if not present, create one */
|
||||
status_fd = open(filename, O_RDWR);
|
||||
if ((status_fd < 0) && (errno == ENOENT)) {
|
||||
status_fd = open(filename, O_RDWR | O_CREAT, 0644);
|
||||
new_file = TRUE;
|
||||
}
|
||||
if (status_fd < 0) {
|
||||
err(1, "unable to open status file %s", filename);
|
||||
/* NOTREACHED */
|
||||
db = dbopen(filename, O_RDWR|O_CREAT|O_NDELAY|O_EXLOCK, 644, DB_HASH,
|
||||
NULL);
|
||||
if (db == NULL)
|
||||
err(1, "Cannot open `%s'", filename);
|
||||
|
||||
switch ((*db->get)(db, &undefkey, &data, 0)) {
|
||||
case 1:
|
||||
/* New database */
|
||||
(void)memset(&status_info, 0, sizeof(status_info));
|
||||
sync_file();
|
||||
return;
|
||||
|
||||
case -1:
|
||||
err(1, "error accessing database (%m)");
|
||||
case 0:
|
||||
/* Existing database */
|
||||
if (data.size != sizeof(status_info))
|
||||
errx(1, "database corrupted %d != %d",
|
||||
data.size, sizeof(status_info));
|
||||
break;
|
||||
default:
|
||||
abort();
|
||||
}
|
||||
|
||||
/*
|
||||
* File now open. mmap() it, with a generous size to allow for
|
||||
* later growth, where we will extend the file but not re-map it.
|
||||
*/
|
||||
status_info = (FileLayout *)mmap(NULL, 0x1000000,
|
||||
PROT_READ | PROT_WRITE, MAP_SHARED, status_fd, 0);
|
||||
|
||||
if (status_info == (FileLayout *)(-1)) {
|
||||
perror("rpc.statd");
|
||||
fprintf(stderr, "Unable to mmap() status file\n");
|
||||
}
|
||||
status_file_len = lseek(status_fd, 0L, SEEK_END);
|
||||
|
||||
/*
|
||||
* If the file was not newly created, validate the contents, and if
|
||||
* defective, re-create from scratch.
|
||||
*/
|
||||
if (!new_file) {
|
||||
if ((status_file_len < HEADER_LEN) || (status_file_len <
|
||||
(HEADER_LEN + sizeof(HostInfo) * status_info->noOfHosts))) {
|
||||
fprintf(stderr, "rpc.statd: status file is corrupt\n");
|
||||
new_file = TRUE;
|
||||
}
|
||||
}
|
||||
/* Initialisation of a new, empty file. */
|
||||
if (new_file) {
|
||||
memset(buf, 0, sizeof(buf));
|
||||
lseek(status_fd, 0L, SEEK_SET);
|
||||
write(status_fd, buf, HEADER_LEN);
|
||||
status_file_len = HEADER_LEN;
|
||||
} else {
|
||||
/*
|
||||
* Clean-up of existing file - monitored hosts will have a
|
||||
* pointer to a list of clients, which refers to memory in
|
||||
* the previous incarnation of the program and so are
|
||||
* meaningless now. These pointers are zeroed and the fact
|
||||
* that the host was previously monitored is recorded by
|
||||
* setting the notifyReqd flag, which will in due course
|
||||
* cause a SM_NOTIFY to be sent.
|
||||
*
|
||||
* Note that if we crash twice in quick succession, some hosts
|
||||
* may already have notifyReqd set, where we didn't manage to
|
||||
* notify them before the second crash occurred.
|
||||
*/
|
||||
for (i = 0; i < status_info->noOfHosts; i++) {
|
||||
HostInfo *this_host = &status_info->hosts[i];
|
||||
|
||||
if (this_host->monList) {
|
||||
this_host->notifyReqd = TRUE;
|
||||
this_host->monList = NULL;
|
||||
}
|
||||
}
|
||||
/* Select the next higher even number for the state counter */
|
||||
status_info->ourState =
|
||||
(status_info->ourState + 2) & 0xfffffffe;
|
||||
status_info->ourState++; /* XXX - ??? */
|
||||
}
|
||||
reset_database();
|
||||
return;
|
||||
}
|
||||
|
||||
/* notify_one_host --------------------------------------------------------- */
|
||||
/* reset_database --------------------------------------------------------- */
|
||||
/*
|
||||
* Purpose: Perform SM_NOTIFY procedure at specified host
|
||||
* Returns: TRUE if success, FALSE if failed.
|
||||
* Purpose: Clears the statd database
|
||||
* Returns: Nothing
|
||||
* Notes: If this is not called on reset, it will leak memory.
|
||||
*/
|
||||
void
|
||||
reset_database()
|
||||
{
|
||||
time_t now = time(NULL);
|
||||
walk_db(reset_host, &now);
|
||||
|
||||
/* Select the next higher even number for the state counter */
|
||||
status_info.ourState =
|
||||
(status_info.ourState + 2) & 0xfffffffe;
|
||||
status_info.ourState++; /* XXX - ??? */
|
||||
sync_file();
|
||||
}
|
||||
|
||||
/* unmon_hosts --------------------------------------------------------- */
|
||||
/*
|
||||
* Purpose: Unmonitor all the hosts
|
||||
* Returns: Nothing
|
||||
* Notes:
|
||||
*/
|
||||
void
|
||||
unmon_hosts()
|
||||
{
|
||||
time_t now = time(NULL);
|
||||
walk_db(unmon_host, &now);
|
||||
sync_file();
|
||||
}
|
||||
|
||||
static int
|
||||
notify_one_host(hostname)
|
||||
char *hostname;
|
||||
|
@ -353,7 +593,7 @@ notify_one_host(hostname)
|
|||
gethostname(our_hostname, sizeof(our_hostname));
|
||||
our_hostname[SM_MAXSTRLEN] = '\0';
|
||||
arg.mon_name = our_hostname;
|
||||
arg.state = status_info->ourState;
|
||||
arg.state = status_info.ourState;
|
||||
|
||||
if (debug)
|
||||
syslog(LOG_DEBUG, "Sending SM_NOTIFY to host %s from %s",
|
||||
|
@ -376,81 +616,11 @@ notify_one_host(hostname)
|
|||
return (TRUE);
|
||||
}
|
||||
|
||||
/* notify_hosts ------------------------------------------------------------ */
|
||||
/*
|
||||
* Purpose: Send SM_NOTIFY to all hosts marked as requiring it
|
||||
* Returns: Nothing, immediately - forks a process to do the work.
|
||||
* Notes: Does nothing if there are no monitored hosts.
|
||||
* Called after all the initialisation has been done -
|
||||
* logs to syslog.
|
||||
*/
|
||||
void
|
||||
notify_hosts(void)
|
||||
|
||||
static void
|
||||
die(n)
|
||||
int n;
|
||||
{
|
||||
HostInfo *hp;
|
||||
int i, attempts;
|
||||
int work_to_do = FALSE;
|
||||
pid_t pid;
|
||||
|
||||
/* First check if there is in fact any work to do. */
|
||||
for (i = status_info->noOfHosts, hp = status_info->hosts; i;
|
||||
i--, hp++) {
|
||||
if (hp->notifyReqd) {
|
||||
work_to_do = TRUE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!work_to_do)
|
||||
return; /* No work found */
|
||||
|
||||
pid = fork();
|
||||
if (pid == -1) {
|
||||
syslog(LOG_ERR, "Unable to fork notify process - %s",
|
||||
strerror(errno));
|
||||
return;
|
||||
}
|
||||
if (pid)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Here in the child process. We continue until all the hosts marked
|
||||
* as requiring notification have been duly notified.
|
||||
* If one of the initial attempts fails, we sleep for a while and
|
||||
* have another go. This is necessary because when we have crashed,
|
||||
* (eg. a power outage) it is quite possible that we won't be able to
|
||||
* contact all monitored hosts immediately on restart, either because
|
||||
* they crashed too and take longer to come up (in which case the
|
||||
* notification isn't really required), or more importantly if some
|
||||
* router etc. needed to reach the monitored host has not come back
|
||||
* up yet. In this case, we will be a bit late in re-establishing
|
||||
* locks (after the grace period) but that is the best we can do.
|
||||
* We try 10 times at 5 sec intervals, 10 more times at 1 minute
|
||||
* intervals, then 24 more times at hourly intervals, finally
|
||||
* giving up altogether if the host hasn't come back to life after
|
||||
* 24 hours.
|
||||
*/
|
||||
for (attempts = 0; attempts < 44; attempts++) {
|
||||
work_to_do = FALSE; /* Unless anything fails */
|
||||
for (i = status_info->noOfHosts, hp = status_info->hosts; i > 0;
|
||||
i--, hp++) {
|
||||
if (hp->notifyReqd) {
|
||||
if (notify_one_host(hp->hostname)) {
|
||||
hp->notifyReqd = FALSE;
|
||||
sync_file();
|
||||
} else
|
||||
work_to_do = TRUE;
|
||||
}
|
||||
}
|
||||
if (!work_to_do)
|
||||
break;
|
||||
if (attempts < 10)
|
||||
sleep(5);
|
||||
else
|
||||
if (attempts < 20)
|
||||
sleep(60);
|
||||
else
|
||||
sleep(60 * 60);
|
||||
}
|
||||
exit(0);
|
||||
(*db->close)(db);
|
||||
exit(n);
|
||||
}
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* $NetBSD: statd.h,v 1.1 1997/03/10 06:28:32 scottr Exp $ */
|
||||
/* $NetBSD: statd.h,v 1.2 1997/10/21 20:38:19 christos Exp $ */
|
||||
|
||||
/*
|
||||
* Copyright (c) 1995
|
||||
|
@ -52,7 +52,7 @@
|
|||
*
|
||||
* We handle this by keeping the list of monitored hosts in a file
|
||||
* (/var/statd.state) which is mmap()ed and whose format is described
|
||||
* by the typedef FileLayout. The lists of client callbacks are chained
|
||||
* by the typedef Header. The lists of client callbacks are chained
|
||||
* off this structure, but are held in normal memory and so will be
|
||||
* lost after a re-boot. Hence the actual values of MonList * pointers
|
||||
* in the copy on disc have no significance, but their NULL/non-NULL
|
||||
|
@ -70,9 +70,9 @@ typedef struct MonList_s {
|
|||
} MonList;
|
||||
|
||||
typedef struct {
|
||||
char hostname[SM_MAXSTRLEN + 1]; /* Name of monitored host */
|
||||
int notifyReqd; /* TRUE if we've crashed and not yet
|
||||
int notifyReqd; /* Time of our next attempt or 0
|
||||
informed the monitored host */
|
||||
int attempts; /* Number of attempts we tried so far */
|
||||
MonList *monList; /* List of clients to inform if we
|
||||
hear that the monitored host has
|
||||
crashed, NULL if no longer monitored */
|
||||
|
@ -82,23 +82,39 @@ typedef struct {
|
|||
/* Overall file layout. */
|
||||
|
||||
typedef struct {
|
||||
int magic; /* Zero magic */
|
||||
int ourState; /* State number as defined in statd protocol */
|
||||
int noOfHosts; /* Number of elements in hosts[] */
|
||||
char reserved[248]; /* Reserved for future use */
|
||||
HostInfo hosts[1]; /* vector of monitored hosts */
|
||||
} FileLayout;
|
||||
#define HEADER_LEN (sizeof(FileLayout) - sizeof(HostInfo))
|
||||
} Header;
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
|
||||
/* Global variables */
|
||||
|
||||
extern FileLayout *status_info; /* The mmap()ed status file */
|
||||
extern int debug; /* = 1 to enable diagnostics to syslog */
|
||||
extern struct sigaction sa;
|
||||
extern Header status_info;
|
||||
|
||||
/* Function prototypes */
|
||||
|
||||
extern HostInfo *find_host __P((char *hostname, int create));
|
||||
extern void init_file __P((char *filename));
|
||||
extern void notify_hosts __P((void));
|
||||
extern void sync_file __P((void));
|
||||
/* stat_proc.c */
|
||||
struct sm_stat_res *sm_stat_1_svc __P((sm_name *, struct svc_req *));
|
||||
struct sm_stat_res *sm_mon_1_svc __P((mon *, struct svc_req *));
|
||||
struct sm_stat *sm_unmon_1_svc __P((mon_id *, struct svc_req *));
|
||||
struct sm_stat *sm_unmon_all_1_svc __P((my_id *, struct svc_req *));
|
||||
void *sm_simu_crash_1_svc __P((void *, struct svc_req *));
|
||||
void *sm_notify_1_svc __P((stat_chge *, struct svc_req *));
|
||||
int do_unmon __P((char *, HostInfo *, void *));
|
||||
|
||||
/* statd.c */
|
||||
void notify_handler __P((int));
|
||||
void sync_file __P((void));
|
||||
void unmon_hosts __P((void));
|
||||
void change_host __P((char *, HostInfo *));
|
||||
HostInfo *find_host __P((char *, HostInfo *));
|
||||
void reset_database __P((void));
|
||||
|
||||
void sm_prog_1 __P((struct svc_req *, SVCXPRT *));
|
||||
|
||||
#define NO_ALARM sa.sa_handler == SIG_DFL ? 0 : (sa.sa_handler = SIG_IGN, sigaction(SIGALRM, &sa, NULL))
|
||||
#define ALARM sa.sa_handler == SIG_DFL ? 0 : (sa.sa_handler = notify_handler, sigaction(SIGALRM, &sa, NULL))
|
||||
#define CLR_ALARM sa.sa_handler == SIG_DFL ? 0 : (sa.sa_handler = SIG_DFL, sigaction(SIGALRM, &sa, NULL))
|
||||
|
|
Loading…
Reference in New Issue