429 lines
12 KiB
Bash
Executable File
429 lines
12 KiB
Bash
Executable File
#!/bin/sh
|
|
#
|
|
# plugin for munin to monitor usage of NSD.
|
|
#
|
|
# (C) 2008 W.C.A. Wijngaards. BSD Licensed.
|
|
#
|
|
# To install; compile with --enable-bind8-stats (enabled by default)
|
|
# and enable nsd-control in nsd.conf with the line
|
|
# remote-control: control-enable: yes
|
|
# Run the command nsd-control-setup as root to generate the key files.
|
|
#
|
|
# Environment variables for this script
|
|
# statefile - where to put temporary statefile.
|
|
# nsd_conf - where the nsd.conf file is located.
|
|
# nsd_control - where to find nsd-control executable.
|
|
# nsd_checkconf - where to find nsd-checkconf executable.
|
|
#
|
|
# You can set them in your munin/plugin-conf.d/plugins.conf file
|
|
# with:
|
|
# [nsd_munin*]
|
|
# user root
|
|
# env.statefile /usr/local/var/munin/plugin-state/nsd-state
|
|
# env.nsd_conf /usr/local/etc/nsd.conf
|
|
# env.nsd_control /usr/local/sbin/nsd-control
|
|
# env.nsd_checkconf /usr/local/sbin/nsd-checkconf
|
|
#
|
|
# This plugin can create different graphs depending on what name
|
|
# you link it as (with ln -s) into the plugins directory
|
|
# You can link it multiple times.
|
|
# If you are only a casual user, the _hits and _by_type are most interesting,
|
|
# possibly followed by _by_rcode.
|
|
#
|
|
# nsd_munin_hits - base volume, transport type, failures
|
|
# nsd_munin_memory - memory usage
|
|
# nsd_munin_by_type - incoming queries by type
|
|
# nsd_munin_by_class - incoming queries by class
|
|
# nsd_munin_by_opcode - incoming queries by opcode
|
|
# nsd_munin_by_rcode - answers by rcode
|
|
# nsd_munin_zones - number of zones
|
|
#
|
|
# Magic markers - optional - used by installation scripts and
|
|
# munin-config:
|
|
#
|
|
#%# family=contrib
|
|
#%# capabilities=autoconf suggest
|
|
|
|
# POD documentation
|
|
: <<=cut
|
|
=head1 NAME
|
|
|
|
nsd_munin_ - Munin plugin to monitor the NSD server.
|
|
|
|
=head1 APPLICABLE SYSTEMS
|
|
|
|
System with NSD daemon.
|
|
|
|
=head1 CONFIGURATION
|
|
|
|
[nsd_munin*]
|
|
user root
|
|
env.statefile /usr/local/var/munin/plugin-state/nsd-state
|
|
env.nsd_conf /usr/local/etc/nsd.conf
|
|
env.nsd_control /usr/local/sbin/nsd-control
|
|
env.nsd_checkconf /usr/local/sbin/nsd-checkconf
|
|
|
|
Use the .env settings to override the defaults.
|
|
|
|
=head1 USAGE
|
|
|
|
Can be used to present different graphs. Use ln -s for that name in
|
|
the plugins directory to enable the graph.
|
|
nsd_munin_hits - base volume, transport type, failures
|
|
nsd_munin_memory - memory usage
|
|
nsd_munin_by_type - incoming queries by type
|
|
nsd_munin_by_class - incoming queries by class
|
|
nsd_munin_by_opcode - incoming queries by opcode
|
|
nsd_munin_by_rcode - answers by rcode
|
|
nsd_munin_zones - number of zones
|
|
|
|
=head1 AUTHOR
|
|
|
|
Copyright 2008 W.C.A. Wijngaards
|
|
|
|
=head1 LICENSE
|
|
|
|
BSD
|
|
|
|
=cut
|
|
|
|
state=${statefile:-/usr/local/var/munin/plugin-state/nsd-state}
|
|
conf=${nsd_conf:-/usr/local/etc/nsd.conf}
|
|
ctrl=${nsd_control:-/usr/local/sbin/nsd-control}
|
|
chkconf=${nsd_checkconf:-/usr/local/sbin/nsd-checkconf}
|
|
lock=$state.lock
|
|
|
|
# number of seconds between polling attempts.
|
|
# makes the statefile hang around for at least this many seconds,
|
|
# so that multiple links of this script can share the results.
|
|
lee=55
|
|
|
|
# to keep things within 19 characters
|
|
ABBREV="-e s/num/n/ -e s/type/t/ -e s/opcode/o/ -e s/rcode/r/ -e s/class/c/"
|
|
|
|
# get value from $1 into return variable $value
|
|
get_value ( ) {
|
|
value="`grep '^'$1'=' $state | sed -e 's/^.*=//'`"
|
|
if test "$value"x = ""x; then
|
|
value="0"
|
|
fi
|
|
}
|
|
|
|
# download the state from NSD.
|
|
get_state ( ) {
|
|
# obtain lock for fetching the state
|
|
# because there is a race condition in fetching and writing to file
|
|
|
|
# see if the lock is stale, if so, take it
|
|
if test -f $lock ; then
|
|
pid="`cat $lock 2>&1`"
|
|
kill -0 "$pid" >/dev/null 2>&1
|
|
if test $? -ne 0 -a "$pid" != $$ ; then
|
|
echo $$ >$lock
|
|
fi
|
|
fi
|
|
|
|
i=0
|
|
while test ! -f $lock || test "`cat $lock 2>&1`" != $$; do
|
|
while test -f $lock; do
|
|
# wait
|
|
i=`expr $i + 1`
|
|
if test $i -gt 1000; then
|
|
sleep 1;
|
|
fi
|
|
if test $i -gt 1500; then
|
|
echo "error locking $lock" "=" `cat $lock`
|
|
rm -f $lock
|
|
exit 1
|
|
fi
|
|
done
|
|
# try to get it
|
|
echo $$ >$lock
|
|
done
|
|
# do not refetch if the file exists and only LEE seconds old
|
|
if test -f $state; then
|
|
now=`date +%s`
|
|
get_value "timestamp"
|
|
if test $now -lt `expr $value + $lee`; then
|
|
rm -f $lock
|
|
return
|
|
fi
|
|
fi
|
|
$ctrl -c $conf stats > $state
|
|
if test $? -ne 0; then
|
|
echo "error retrieving data from the server"
|
|
rm -f $lock
|
|
exit 1
|
|
fi
|
|
echo "timestamp="`date +%s` >> $state
|
|
rm -f $lock
|
|
}
|
|
|
|
if test "$1" = "autoconf" ; then
|
|
if test ! -f $conf; then
|
|
echo no "($conf does not exist)"
|
|
exit 1
|
|
fi
|
|
if test ! -d `dirname $state`; then
|
|
mkdir -p `dirname $state`
|
|
if test ! -d `dirname $state`; then
|
|
echo no "($state directory does not exist)"
|
|
exit 1
|
|
fi
|
|
fi
|
|
echo yes
|
|
exit 0
|
|
fi
|
|
|
|
if test "$1" = "suggest" ; then
|
|
echo "hits"
|
|
echo "memory"
|
|
echo "by_type"
|
|
echo "by_class"
|
|
echo "by_opcode"
|
|
echo "by_rcode"
|
|
echo "zones"
|
|
exit 0
|
|
fi
|
|
|
|
# determine my type, by name
|
|
id=`echo $0 | sed -e 's/^.*nsd_munin_//'`
|
|
if test "$id"x = ""x; then
|
|
# some default to keep people sane.
|
|
id="hits"
|
|
fi
|
|
|
|
# if $1 exists in statefile, config is echoed with label $2
|
|
exist_config ( ) {
|
|
mn=`echo $1 | sed $ABBREV | tr . _`
|
|
if grep '^'$1'=' $state >/dev/null 2>&1; then
|
|
echo "$mn.label $2"
|
|
echo "$mn.min 0"
|
|
echo "$mn.type ABSOLUTE"
|
|
fi
|
|
}
|
|
|
|
# print label and min 0 for a name $1 in nsd format
|
|
p_config ( ) {
|
|
mn=`echo $1 | sed $ABBREV | tr . _`
|
|
echo $mn.label "$2"
|
|
echo $mn.min 0
|
|
echo $mn.type $3
|
|
}
|
|
|
|
if test "$1" = "config" ; then
|
|
if test ! -f $state; then
|
|
get_state
|
|
fi
|
|
case $id in
|
|
hits)
|
|
echo "graph_title NSD traffic"
|
|
echo "graph_args --base 1000 -l 0"
|
|
echo "graph_vlabel queries / \${graph_period}"
|
|
echo "graph_scale no"
|
|
echo "graph_category DNS"
|
|
for x in server0.queries server1.queries server2.queries \
|
|
server3.queries server4.queries server5.queries \
|
|
server6.queries server7.queries server8.queries \
|
|
server9.queries server10.queries server11.queries \
|
|
server12.queries server13.queries server14.queries \
|
|
server15.queries ; do
|
|
exist_config $x "queries handled by `basename $x .queries`"
|
|
done
|
|
p_config "num.queries" "total queries" "ABSOLUTE"
|
|
p_config "num.udp" "UDP ip4 queries" "ABSOLUTE"
|
|
p_config "num.udp6" "UDP ip6 queries" "ABSOLUTE"
|
|
p_config "num.tcp" "TCP ip4 queries" "ABSOLUTE"
|
|
p_config "num.tcp6" "TCP ip6 queries" "ABSOLUTE"
|
|
p_config "num.edns" "queries with EDNS OPT" "ABSOLUTE"
|
|
p_config "num.ednserr" "queries failed EDNS parse" "ABSOLUTE"
|
|
p_config "num.answer_wo_aa" "nonauthor. queries (referrals)" "ABSOLUTE"
|
|
p_config "num.rxerr" "receive failed" "ABSOLUTE"
|
|
p_config "num.txerr" "transmit failed" "ABSOLUTE"
|
|
p_config "num.truncated" "truncated replies with TC" "ABSOLUTE"
|
|
p_config "num.raxfr" "AXFR from allowed client" "ABSOLUTE"
|
|
p_config "num.rixfr" "IXFR from allowed client" "ABSOLUTE"
|
|
p_config "num.dropped" "dropped due to sanity check" "ABSOLUTE"
|
|
echo "graph_info DNS queries."
|
|
;;
|
|
memory)
|
|
echo "graph_title NSD memory usage"
|
|
echo "graph_args --base 1024 -l 0"
|
|
echo "graph_vlabel memory used in bytes"
|
|
echo "graph_category DNS"
|
|
p_config "size.vsz" "Total virtual memory (VSZ)" "GAUGE"
|
|
p_config "size.rss" "Total resident memory (RSS)" "GAUGE"
|
|
p_config "size.db.mem" "data in memory" "GAUGE"
|
|
p_config "size.xfrd.mem" "xfr and notify memory" "GAUGE"
|
|
p_config "size.config.mem" "config memory" "GAUGE"
|
|
p_config "size.db.disk" "mmap of nsd.db file" "GAUGE"
|
|
p_config "size.config.disk" "config zonelist on disk" "GAUGE"
|
|
echo "graph_info The memory used by NSD, xfrd and config. Disk size of nsd.db and zonelist."
|
|
;;
|
|
by_type)
|
|
echo "graph_title NSD queries by type"
|
|
echo "graph_args --base 1000 -l 0"
|
|
echo "graph_vlabel queries / \${graph_period}"
|
|
echo "graph_scale no"
|
|
echo "graph_category DNS"
|
|
for x in `grep "^num.type" $state`; do
|
|
nm=`echo $x | sed -e 's/=.*$//'`
|
|
tp=`echo $nm | sed -e s/num.type.//`
|
|
p_config "$nm" "$tp" "ABSOLUTE"
|
|
done
|
|
echo "graph_info queries by DNS RR type queried for"
|
|
;;
|
|
by_class)
|
|
echo "graph_title NSD queries by class"
|
|
echo "graph_args --base 1000 -l 0"
|
|
echo "graph_vlabel queries / \${graph_period}"
|
|
echo "graph_scale no"
|
|
echo "graph_category DNS"
|
|
for x in `grep "^num.class" $state`; do
|
|
nm=`echo $x | sed -e 's/=.*$//'`
|
|
tp=`echo $nm | sed -e s/num.class.//`
|
|
p_config "$nm" "$tp" "ABSOLUTE"
|
|
done
|
|
echo "graph_info queries by DNS RR class queried for."
|
|
;;
|
|
by_opcode)
|
|
echo "graph_title NSD queries by opcode"
|
|
echo "graph_args --base 1000 -l 0"
|
|
echo "graph_vlabel queries / \${graph_period}"
|
|
echo "graph_scale no"
|
|
echo "graph_category DNS"
|
|
for x in `grep "^num.opcode" $state`; do
|
|
nm=`echo $x | sed -e 's/=.*$//'`
|
|
tp=`echo $nm | sed -e s/num.opcode.//`
|
|
p_config "$nm" "$tp" "ABSOLUTE"
|
|
done
|
|
echo "graph_info queries by opcode in the query packet."
|
|
;;
|
|
by_rcode)
|
|
echo "graph_title NSD answers by return code"
|
|
echo "graph_args --base 1000 -l 0"
|
|
echo "graph_vlabel answer packets / \${graph_period}"
|
|
echo "graph_scale no"
|
|
echo "graph_category DNS"
|
|
for x in `grep "^num.rcode" $state`; do
|
|
nm=`echo $x | sed -e 's/=.*$//'`
|
|
tp=`echo $nm | sed -e s/num.rcode.//`
|
|
p_config "$nm" "$tp" "ABSOLUTE"
|
|
done
|
|
echo "graph_info answers split out by return value."
|
|
;;
|
|
zones)
|
|
echo "graph_title NSD number of zones"
|
|
echo "graph_args --base 1000 -l 0"
|
|
echo "graph_vlabel zone count"
|
|
echo "graph_category DNS"
|
|
p_config "zone.total" "total zones" "GAUGE"
|
|
p_config "zone.master" "master zones" "GAUGE"
|
|
p_config "zone.slave" "slave zones" "GAUGE"
|
|
echo "graph_info number of zones served by NSD."
|
|
;;
|
|
esac
|
|
|
|
exit 0
|
|
fi
|
|
|
|
# do the stats itself
|
|
get_state
|
|
|
|
# get the time elapsed
|
|
get_value "time.elapsed"
|
|
if test $value = 0 || test $value = "0.000000"; then
|
|
echo "error: time elapsed 0 or could not retrieve data"
|
|
exit 1
|
|
fi
|
|
elapsed="$value"
|
|
|
|
# print value for $1
|
|
print_value ( ) {
|
|
mn=`echo $1 | sed $ABBREV | tr . _`
|
|
get_value $1
|
|
echo "$mn.value" $value
|
|
}
|
|
|
|
# print value if line already found in $2
|
|
print_value_line ( ) {
|
|
mn=`echo $1 | sed $ABBREV | tr . _`
|
|
value="`echo $2 | sed -e 's/^.*=//'`"
|
|
echo "$mn.value" $value
|
|
}
|
|
|
|
|
|
case $id in
|
|
hits)
|
|
for x in server0.queries server1.queries server2.queries \
|
|
server3.queries server4.queries server5.queries \
|
|
server6.queries server7.queries server8.queries \
|
|
server9.queries server10.queries server11.queries \
|
|
server12.queries server13.queries server14.queries \
|
|
server15.queries \
|
|
num.queries num.udp num.udp6 num.tcp num.tcp6 \
|
|
num.edns num.ednserr num.answer_wo_aa num.rxerr num.txerr \
|
|
num.truncated num.raxfr num.rixfr num.dropped ; do
|
|
if grep "^"$x"=" $state >/dev/null 2>&1; then
|
|
print_value $x
|
|
fi
|
|
done
|
|
;;
|
|
memory)
|
|
# get the total memory for NSD
|
|
serverpid=`$ctrl -c $conf serverpid 2>&1`
|
|
# small race condition, if reload happens between previous and next
|
|
# lines, if so, detect by checking if we have a number as output.
|
|
rssval=`ps -p $serverpid -o rss= 2>&1`
|
|
vszval=`ps -p $serverpid -o vsz= 2>&1`
|
|
if test "`expr $rssval + 1 - 1 2>&1`" -eq "$rssval" >/dev/null 2>&1; then
|
|
rssval=`expr $rssval \* 1024`
|
|
else
|
|
rssval=0
|
|
fi
|
|
if test "`expr $vszval + 1 - 1 2>&1`" -eq "$vszval" >/dev/null 2>&1; then
|
|
vszval=`expr $vszval \* 1024`
|
|
else
|
|
vszval=0
|
|
fi
|
|
echo "size_vsz.value" $vszval
|
|
echo "size_rss.value" $rssval
|
|
for x in size.db.mem size.xfrd.mem size.config.mem \
|
|
size.db.disk size.config.disk; do
|
|
print_value $x
|
|
done
|
|
;;
|
|
by_type)
|
|
for x in `grep "^num.type" $state`; do
|
|
nm=`echo $x | sed -e 's/=.*$//'`
|
|
print_value_line $nm $x
|
|
done
|
|
;;
|
|
by_class)
|
|
for x in `grep "^num.class" $state`; do
|
|
nm=`echo $x | sed -e 's/=.*$//'`
|
|
print_value_line $nm $x
|
|
done
|
|
;;
|
|
by_opcode)
|
|
for x in `grep "^num.opcode" $state`; do
|
|
nm=`echo $x | sed -e 's/=.*$//'`
|
|
print_value_line $nm $x
|
|
done
|
|
;;
|
|
by_rcode)
|
|
for x in `grep "^num.rcode" $state`; do
|
|
nm=`echo $x | sed -e 's/=.*$//'`
|
|
print_value_line $nm $x
|
|
done
|
|
;;
|
|
zones)
|
|
get_value "zone.master"
|
|
nummas="$value"
|
|
get_value "zone.slave"
|
|
numsla="$value"
|
|
echo "zone_total.value" `expr $nummas + $numsla`
|
|
echo "zone_master.value" "$nummas"
|
|
echo "zone_slave.value" "$numsla"
|
|
esac
|