423 lines
12 KiB
Plaintext
423 lines
12 KiB
Plaintext
|
#!/bin/ksh
|
||
|
#
|
||
|
# iotop - display top disk I/O events by process.
|
||
|
# Written using DTrace (Solaris 10 3/05).
|
||
|
#
|
||
|
# This is measuring disk events that have made it past system caches.
|
||
|
#
|
||
|
# $Id: iotop,v 1.1.1.1 2015/09/30 22:01:06 christos Exp $
|
||
|
#
|
||
|
# USAGE: iotop [-C] [-D|-o|-P] [-j|-Z] [-d device] [-f filename]
|
||
|
# [-m mount_point] [-t top] [interval [count]]
|
||
|
#
|
||
|
# iotop # default output, 5 second intervals
|
||
|
#
|
||
|
# -C # don't clear the screen
|
||
|
# -D # print delta times, elapsed, us
|
||
|
# -j # print project ID
|
||
|
# -o # print disk delta times, us
|
||
|
# -P # print %I/O (disk delta times)
|
||
|
# -Z # print zone ID
|
||
|
# -d device # instance name to snoop (eg, dad0)
|
||
|
# -f filename # full pathname of file to snoop
|
||
|
# -m mount_point # this FS only (will skip raw events)
|
||
|
# -t top # print top number only
|
||
|
# eg,
|
||
|
# iotop 1 # 1 second samples
|
||
|
# iotop -C # don't clear the screen
|
||
|
# iotop -P # print %I/O (time based)
|
||
|
# iotop -j # print project IDs
|
||
|
# iotop -Z # print zone IDs
|
||
|
# iotop -t 20 # print top 20 lines only
|
||
|
# iotop -C 5 12 # print 12 x 5 second samples
|
||
|
#
|
||
|
# FIELDS:
|
||
|
# UID user ID
|
||
|
# PID process ID
|
||
|
# PPID parent process ID
|
||
|
# PROJ project ID
|
||
|
# ZONE zone ID
|
||
|
# CMD process command name
|
||
|
# DEVICE device name
|
||
|
# MAJ device major number
|
||
|
# MIN device minor number
|
||
|
# D direction, Read or Write
|
||
|
# BYTES total size of operations, bytes
|
||
|
# ELAPSED total elapsed from request to completion, us
|
||
|
# DISKTIME total time for disk to complete request, us
|
||
|
# %I/O percent disk I/O, based on time (DISKTIME)
|
||
|
# load 1 min load average
|
||
|
# disk_r total disk read Kbytes for sample
|
||
|
# disk_w total disk write Kbytes for sample
|
||
|
#
|
||
|
# NOTE:
|
||
|
# * There are two different delta times reported. -D prints the
|
||
|
# elapsed time from the disk request (strategy) to the disk completion
|
||
|
# (iodone); -o prints the time for the disk to complete that event
|
||
|
# since it's last event (time between iodones), or, the time to the
|
||
|
# strategy if the disk had been idle.
|
||
|
# * The %I/O value can exceed 100%. It represents how busy a process is
|
||
|
# making the disks, in terms of a single disk. A value of 200% could
|
||
|
# mean 2 disks are busy at 100%, or 4 disks at 50%...
|
||
|
#
|
||
|
# SEE ALSO: iosnoop
|
||
|
# BigAdmin: DTrace, http://www.sun.com/bigadmin/content/dtrace
|
||
|
# Solaris Dynamic Tracing Guide, http://docs.sun.com
|
||
|
# DTrace Tools, http://www.brendangregg.com/dtrace.html
|
||
|
#
|
||
|
# INSPIRATION: top(1) by William LeFebvre
|
||
|
#
|
||
|
# COPYRIGHT: Copyright (c) 2005, 2006 Brendan Gregg.
|
||
|
#
|
||
|
# CDDL HEADER START
|
||
|
#
|
||
|
# The contents of this file are subject to the terms of the
|
||
|
# Common Development and Distribution License, Version 1.0 only
|
||
|
# (the "License"). You may not use this file except in compliance
|
||
|
# with the License.
|
||
|
#
|
||
|
# You can obtain a copy of the license at Docs/cddl1.txt
|
||
|
# or http://www.opensolaris.org/os/licensing.
|
||
|
# See the License for the specific language governing permissions
|
||
|
# and limitations under the License.
|
||
|
#
|
||
|
# CDDL HEADER END
|
||
|
#
|
||
|
# KNOWN BUGS:
|
||
|
# - This can print errors while running on servers with Veritas volumes.
|
||
|
#
|
||
|
# Author: Brendan Gregg [Sydney, Australia]
|
||
|
#
|
||
|
# 15-Jul-2005 Brendan Gregg Created this.
|
||
|
# 20-Apr-2006 " " Last update.
|
||
|
#
|
||
|
|
||
|
|
||
|
##############################
|
||
|
# --- Process Arguments ---
|
||
|
#
|
||
|
|
||
|
### default variables
|
||
|
opt_device=0; opt_file=0; opt_mount=0; opt_clear=1; opt_proj=0; opt_zone=0
|
||
|
opt_percent=0; opt_def=1; opt_bytes=1; filter=0; device=.; filename=.; mount=.
|
||
|
opt_top=0; opt_elapsed=0; opt_dtime=0; interval=5; count=-1; top=0
|
||
|
|
||
|
### process options
|
||
|
while getopts CDd:f:hjm:oPt:Z name
|
||
|
do
|
||
|
case $name in
|
||
|
C) opt_clear=0 ;;
|
||
|
D) opt_elapsed=1; opt_bytes=0 ;;
|
||
|
d) opt_device=1; device=$OPTARG ;;
|
||
|
f) opt_file=1; filename=$OPTARG ;;
|
||
|
j) opt_proj=1; opt_def=0 ;;
|
||
|
m) opt_mount=1; mount=$OPTARG ;;
|
||
|
o) opt_dtime=1; opt_bytes=0 ;;
|
||
|
P) opt_percent=1; opt_dtime=1; opt_bytes=0 ;;
|
||
|
t) opt_top=1; top=$OPTARG ;;
|
||
|
Z) opt_zone=1; opt_def=0 ;;
|
||
|
h|?) cat <<-END >&2
|
||
|
USAGE: iotop [-C] [-D|-o|-P] [-j|-Z] [-d device] [-f filename]
|
||
|
[-m mount_point] [-t top] [interval [count]]
|
||
|
|
||
|
-C # don't clear the screen
|
||
|
-D # print delta times, elapsed, us
|
||
|
-j # print project ID
|
||
|
-o # print disk delta times, us
|
||
|
-P # print %I/O (disk delta times)
|
||
|
-Z # print zone ID
|
||
|
-d device # instance name to snoop
|
||
|
-f filename # snoop this file only
|
||
|
-m mount_point # this FS only
|
||
|
-t top # print top number only
|
||
|
eg,
|
||
|
iotop # default output, 5 second samples
|
||
|
iotop 1 # 1 second samples
|
||
|
iotop -P # print %I/O (time based)
|
||
|
iotop -m / # snoop events on filesystem / only
|
||
|
iotop -t 20 # print top 20 lines only
|
||
|
iotop -C 5 12 # print 12 x 5 second samples
|
||
|
END
|
||
|
exit 1
|
||
|
esac
|
||
|
done
|
||
|
|
||
|
shift $(( $OPTIND - 1 ))
|
||
|
|
||
|
### option logic
|
||
|
if [[ "$1" > 0 ]]; then
|
||
|
interval=$1; shift
|
||
|
fi
|
||
|
if [[ "$1" > 0 ]]; then
|
||
|
count=$1; shift
|
||
|
fi
|
||
|
if (( opt_proj && opt_zone )); then
|
||
|
opt_proj=0
|
||
|
fi
|
||
|
if (( opt_elapsed && opt_dtime )); then
|
||
|
opt_elapsed=0
|
||
|
fi
|
||
|
if (( opt_device || opt_mount || opt_file )); then
|
||
|
filter=1
|
||
|
fi
|
||
|
if (( opt_clear )); then
|
||
|
clearstr=`clear`
|
||
|
else
|
||
|
clearstr=.
|
||
|
fi
|
||
|
|
||
|
|
||
|
|
||
|
#################################
|
||
|
# --- Main Program, DTrace ---
|
||
|
#
|
||
|
/usr/sbin/dtrace -n '
|
||
|
/*
|
||
|
* Command line arguments
|
||
|
*/
|
||
|
inline int OPT_def = '$opt_def';
|
||
|
inline int OPT_proj = '$opt_proj';
|
||
|
inline int OPT_zone = '$opt_zone';
|
||
|
inline int OPT_clear = '$opt_clear';
|
||
|
inline int OPT_bytes = '$opt_bytes';
|
||
|
inline int OPT_elapsed = '$opt_elapsed';
|
||
|
inline int OPT_dtime = '$opt_dtime';
|
||
|
inline int OPT_percent = '$opt_percent';
|
||
|
inline int OPT_device = '$opt_device';
|
||
|
inline int OPT_mount = '$opt_mount';
|
||
|
inline int OPT_file = '$opt_file';
|
||
|
inline int OPT_top = '$opt_top';
|
||
|
inline int INTERVAL = '$interval';
|
||
|
inline int COUNTER = '$count';
|
||
|
inline int FILTER = '$filter';
|
||
|
inline int TOP = '$top';
|
||
|
inline string DEVICE = "'$device'";
|
||
|
inline string FILENAME = "'$filename'";
|
||
|
inline string MOUNT = "'$mount'";
|
||
|
inline string CLEAR = "'$clearstr'";
|
||
|
|
||
|
#pragma D option quiet
|
||
|
|
||
|
/* boost the following if you get "dynamic variable drops" */
|
||
|
#pragma D option dynvarsize=8m
|
||
|
|
||
|
/*
|
||
|
* Print header
|
||
|
*/
|
||
|
dtrace:::BEGIN
|
||
|
{
|
||
|
last_event[""] = 0;
|
||
|
|
||
|
/* starting values */
|
||
|
counts = COUNTER;
|
||
|
secs = INTERVAL;
|
||
|
disk_r = 0;
|
||
|
disk_w = 0;
|
||
|
|
||
|
printf("Tracing... Please wait.\n");
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Check event is being traced
|
||
|
*/
|
||
|
io:genunix::start,
|
||
|
io:genunix::done
|
||
|
{
|
||
|
/* default is to trace unless filtering, */
|
||
|
this->ok = FILTER ? 0 : 1;
|
||
|
|
||
|
/* check each filter, */
|
||
|
(OPT_device == 1 && DEVICE == args[1]->dev_statname)? this->ok = 1 : 1;
|
||
|
(OPT_file == 1 && FILENAME == args[2]->fi_pathname) ? this->ok = 1 : 1;
|
||
|
(OPT_mount == 1 && MOUNT == args[2]->fi_mount) ? this->ok = 1 : 1;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Reset last_event for disk idle -> start
|
||
|
* this prevents idle time being counted as disk time.
|
||
|
*/
|
||
|
io:genunix::start
|
||
|
/! pending[args[1]->dev_statname]/
|
||
|
{
|
||
|
/* save last disk event */
|
||
|
last_event[args[1]->dev_statname] = timestamp;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Store entry details
|
||
|
*/
|
||
|
io:genunix::start
|
||
|
/this->ok/
|
||
|
{
|
||
|
/* these are used as a unique disk event key, */
|
||
|
this->dev = args[0]->b_edev;
|
||
|
this->blk = args[0]->b_blkno;
|
||
|
|
||
|
/* save disk event details, */
|
||
|
start_uid[this->dev, this->blk] = uid;
|
||
|
start_pid[this->dev, this->blk] = pid;
|
||
|
start_ppid[this->dev, this->blk] = ppid;
|
||
|
start_comm[this->dev, this->blk] = execname;
|
||
|
start_time[this->dev, this->blk] = timestamp;
|
||
|
start_proj[this->dev, this->blk] = curpsinfo->pr_projid;
|
||
|
start_zone[this->dev, this->blk] = curpsinfo->pr_zoneid;
|
||
|
start_rw[this->dev, this->blk] = args[0]->b_flags & B_READ ? "R" : "W";
|
||
|
disk_r += args[0]->b_flags & B_READ ? args[0]->b_bcount : 0;
|
||
|
disk_w += args[0]->b_flags & B_READ ? 0 : args[0]->b_bcount;
|
||
|
|
||
|
/* increase disk event pending count */
|
||
|
pending[args[1]->dev_statname]++;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Process and Print completion
|
||
|
*/
|
||
|
io:genunix::done
|
||
|
/this->ok/
|
||
|
{
|
||
|
/* decrease disk event pending count */
|
||
|
pending[args[1]->dev_statname]--;
|
||
|
|
||
|
/*
|
||
|
* Process details
|
||
|
*/
|
||
|
|
||
|
/* fetch entry values */
|
||
|
this->dev = args[0]->b_edev;
|
||
|
this->blk = args[0]->b_blkno;
|
||
|
this->suid = start_uid[this->dev, this->blk];
|
||
|
this->spid = start_pid[this->dev, this->blk];
|
||
|
this->sppid = start_ppid[this->dev, this->blk];
|
||
|
this->sproj = start_proj[this->dev, this->blk];
|
||
|
this->szone = start_zone[this->dev, this->blk];
|
||
|
self->scomm = start_comm[this->dev, this->blk];
|
||
|
this->stime = start_time[this->dev, this->blk];
|
||
|
this->etime = timestamp; /* endtime */
|
||
|
this->elapsed = this->etime - this->stime;
|
||
|
self->rw = start_rw[this->dev, this->blk];
|
||
|
this->dtime = last_event[args[1]->dev_statname] == 0 ? 0 :
|
||
|
timestamp - last_event[args[1]->dev_statname];
|
||
|
|
||
|
/* memory cleanup */
|
||
|
start_uid[this->dev, this->blk] = 0;
|
||
|
start_pid[this->dev, this->blk] = 0;
|
||
|
start_ppid[this->dev, this->blk] = 0;
|
||
|
start_time[this->dev, this->blk] = 0;
|
||
|
start_comm[this->dev, this->blk] = 0;
|
||
|
start_zone[this->dev, this->blk] = 0;
|
||
|
start_proj[this->dev, this->blk] = 0;
|
||
|
start_rw[this->dev, this->blk] = 0;
|
||
|
|
||
|
/*
|
||
|
* Choose statistic to track
|
||
|
*/
|
||
|
OPT_bytes ? this->value = args[0]->b_bcount : 1;
|
||
|
OPT_elapsed ? this->value = this->elapsed / 1000 : 1;
|
||
|
OPT_dtime ? this->value = this->dtime / 1000 : 1;
|
||
|
|
||
|
/*
|
||
|
* Save details
|
||
|
*/
|
||
|
OPT_def ? @out[this->suid, this->spid, this->sppid, self->scomm,
|
||
|
args[1]->dev_statname, args[1]->dev_major, args[1]->dev_minor,
|
||
|
self->rw] = sum(this->value) : 1;
|
||
|
OPT_proj ? @out[this->sproj, this->spid, this->sppid, self->scomm,
|
||
|
args[1]->dev_statname, args[1]->dev_major, args[1]->dev_minor,
|
||
|
self->rw] = sum(this->value) : 1;
|
||
|
OPT_zone ? @out[this->szone, this->spid, this->sppid, self->scomm,
|
||
|
args[1]->dev_statname, args[1]->dev_major, args[1]->dev_minor,
|
||
|
self->rw] = sum(this->value) : 1;
|
||
|
|
||
|
/* save last disk event */
|
||
|
last_event[args[1]->dev_statname] = timestamp;
|
||
|
|
||
|
self->scomm = 0;
|
||
|
self->rw = 0;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Prevent pending from underflowing
|
||
|
* this can happen if this program is started during disk events.
|
||
|
*/
|
||
|
io:genunix::done
|
||
|
/pending[args[1]->dev_statname] < 0/
|
||
|
{
|
||
|
pending[args[1]->dev_statname] = 0;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Timer
|
||
|
*/
|
||
|
profile:::tick-1sec
|
||
|
{
|
||
|
secs--;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Print Report
|
||
|
*/
|
||
|
profile:::tick-1sec
|
||
|
/secs == 0/
|
||
|
{
|
||
|
/* fetch 1 min load average */
|
||
|
this->load1a = `hp_avenrun[0] / 65536;
|
||
|
this->load1b = ((`hp_avenrun[0] % 65536) * 100) / 65536;
|
||
|
|
||
|
/* convert counters to Kbytes */
|
||
|
disk_r /= 1024;
|
||
|
disk_w /= 1024;
|
||
|
|
||
|
/* print status */
|
||
|
OPT_clear ? printf("%s", CLEAR) : 1;
|
||
|
printf("%Y, load: %d.%02d, disk_r: %6d KB, disk_w: %6d KB\n\n",
|
||
|
walltimestamp, this->load1a, this->load1b, disk_r, disk_w);
|
||
|
|
||
|
/* print headers */
|
||
|
OPT_def ? printf(" UID ") : 1;
|
||
|
OPT_proj ? printf(" PROJ ") : 1;
|
||
|
OPT_zone ? printf(" ZONE ") : 1;
|
||
|
printf("%6s %6s %-16s %-7s %3s %3s %1s",
|
||
|
"PID", "PPID", "CMD", "DEVICE", "MAJ", "MIN", "D");
|
||
|
OPT_bytes ? printf(" %16s\n", "BYTES") : 1;
|
||
|
OPT_elapsed ? printf(" %16s\n", "ELAPSED") : 1;
|
||
|
OPT_dtime && ! OPT_percent ? printf(" %16s\n", "DISKTIME") : 1;
|
||
|
OPT_dtime && OPT_percent ? printf(" %6s\n", "%I/O") : 1;
|
||
|
|
||
|
/* truncate to top lines if needed */
|
||
|
OPT_top ? trunc(@out, TOP) : 1;
|
||
|
|
||
|
/* normalise to percentage if needed */
|
||
|
OPT_percent ? normalize(@out, INTERVAL * 10000) : 1;
|
||
|
|
||
|
/* print data */
|
||
|
! OPT_percent ?
|
||
|
printa("%5d %6d %6d %-16s %-7s %3d %3d %1s %16@d\n", @out) :
|
||
|
printa("%5d %6d %6d %-16s %-7s %3d %3d %1s %6@d\n", @out);
|
||
|
printf("\n");
|
||
|
|
||
|
/* clear data */
|
||
|
trunc(@out);
|
||
|
disk_r = 0;
|
||
|
disk_w = 0;
|
||
|
secs = INTERVAL;
|
||
|
counts--;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* End of program
|
||
|
*/
|
||
|
profile:::tick-1sec
|
||
|
/counts == 0/
|
||
|
{
|
||
|
exit(0);
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Cleanup for Ctrl-C
|
||
|
*/
|
||
|
dtrace:::END
|
||
|
{
|
||
|
trunc(@out);
|
||
|
}
|
||
|
'
|