qemu/scripts/kvm/kvm_stat
Paolo Bonzini b763adf1a6 kvm_stat: allow choosing between tracepoints and old stats
The old stats contain information not available in the tracepoints.
By default, keep the old behavior, but allow choosing which set of stats
to present, or even both.

Inspired by a patch from Marcelo Tosatti.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2014-05-21 17:56:37 +02:00

534 lines
15 KiB
Python
Executable File

#!/usr/bin/python
#
# top-like utility for displaying kvm statistics
#
# Copyright 2006-2008 Qumranet Technologies
# Copyright 2008-2011 Red Hat, Inc.
#
# Authors:
# Avi Kivity <avi@redhat.com>
#
# This work is licensed under the terms of the GNU GPL, version 2. See
# the COPYING file in the top-level directory.
import curses
import sys, os, time, optparse
class DebugfsProvider(object):
def __init__(self):
self.base = '/sys/kernel/debug/kvm'
self._fields = os.listdir(self.base)
def fields(self):
return self._fields
def select(self, fields):
self._fields = fields
def read(self):
def val(key):
return int(file(self.base + '/' + key).read())
return dict([(key, val(key)) for key in self._fields])
vmx_exit_reasons = {
0: 'EXCEPTION_NMI',
1: 'EXTERNAL_INTERRUPT',
2: 'TRIPLE_FAULT',
7: 'PENDING_INTERRUPT',
8: 'NMI_WINDOW',
9: 'TASK_SWITCH',
10: 'CPUID',
12: 'HLT',
14: 'INVLPG',
15: 'RDPMC',
16: 'RDTSC',
18: 'VMCALL',
19: 'VMCLEAR',
20: 'VMLAUNCH',
21: 'VMPTRLD',
22: 'VMPTRST',
23: 'VMREAD',
24: 'VMRESUME',
25: 'VMWRITE',
26: 'VMOFF',
27: 'VMON',
28: 'CR_ACCESS',
29: 'DR_ACCESS',
30: 'IO_INSTRUCTION',
31: 'MSR_READ',
32: 'MSR_WRITE',
33: 'INVALID_STATE',
36: 'MWAIT_INSTRUCTION',
39: 'MONITOR_INSTRUCTION',
40: 'PAUSE_INSTRUCTION',
41: 'MCE_DURING_VMENTRY',
43: 'TPR_BELOW_THRESHOLD',
44: 'APIC_ACCESS',
48: 'EPT_VIOLATION',
49: 'EPT_MISCONFIG',
54: 'WBINVD',
55: 'XSETBV',
}
svm_exit_reasons = {
0x000: 'READ_CR0',
0x003: 'READ_CR3',
0x004: 'READ_CR4',
0x008: 'READ_CR8',
0x010: 'WRITE_CR0',
0x013: 'WRITE_CR3',
0x014: 'WRITE_CR4',
0x018: 'WRITE_CR8',
0x020: 'READ_DR0',
0x021: 'READ_DR1',
0x022: 'READ_DR2',
0x023: 'READ_DR3',
0x024: 'READ_DR4',
0x025: 'READ_DR5',
0x026: 'READ_DR6',
0x027: 'READ_DR7',
0x030: 'WRITE_DR0',
0x031: 'WRITE_DR1',
0x032: 'WRITE_DR2',
0x033: 'WRITE_DR3',
0x034: 'WRITE_DR4',
0x035: 'WRITE_DR5',
0x036: 'WRITE_DR6',
0x037: 'WRITE_DR7',
0x040: 'EXCP_BASE',
0x060: 'INTR',
0x061: 'NMI',
0x062: 'SMI',
0x063: 'INIT',
0x064: 'VINTR',
0x065: 'CR0_SEL_WRITE',
0x066: 'IDTR_READ',
0x067: 'GDTR_READ',
0x068: 'LDTR_READ',
0x069: 'TR_READ',
0x06a: 'IDTR_WRITE',
0x06b: 'GDTR_WRITE',
0x06c: 'LDTR_WRITE',
0x06d: 'TR_WRITE',
0x06e: 'RDTSC',
0x06f: 'RDPMC',
0x070: 'PUSHF',
0x071: 'POPF',
0x072: 'CPUID',
0x073: 'RSM',
0x074: 'IRET',
0x075: 'SWINT',
0x076: 'INVD',
0x077: 'PAUSE',
0x078: 'HLT',
0x079: 'INVLPG',
0x07a: 'INVLPGA',
0x07b: 'IOIO',
0x07c: 'MSR',
0x07d: 'TASK_SWITCH',
0x07e: 'FERR_FREEZE',
0x07f: 'SHUTDOWN',
0x080: 'VMRUN',
0x081: 'VMMCALL',
0x082: 'VMLOAD',
0x083: 'VMSAVE',
0x084: 'STGI',
0x085: 'CLGI',
0x086: 'SKINIT',
0x087: 'RDTSCP',
0x088: 'ICEBP',
0x089: 'WBINVD',
0x08a: 'MONITOR',
0x08b: 'MWAIT',
0x08c: 'MWAIT_COND',
0x400: 'NPF',
}
s390_exit_reasons = {
0x000: 'UNKNOWN',
0x001: 'EXCEPTION',
0x002: 'IO',
0x003: 'HYPERCALL',
0x004: 'DEBUG',
0x005: 'HLT',
0x006: 'MMIO',
0x007: 'IRQ_WINDOW_OPEN',
0x008: 'SHUTDOWN',
0x009: 'FAIL_ENTRY',
0x010: 'INTR',
0x011: 'SET_TPR',
0x012: 'TPR_ACCESS',
0x013: 'S390_SIEIC',
0x014: 'S390_RESET',
0x015: 'DCR',
0x016: 'NMI',
0x017: 'INTERNAL_ERROR',
0x018: 'OSI',
0x019: 'PAPR_HCALL',
}
vendor_exit_reasons = {
'vmx': vmx_exit_reasons,
'svm': svm_exit_reasons,
'IBM/S390': s390_exit_reasons,
}
syscall_numbers = {
'IBM/S390': 331,
}
sc_perf_evt_open = 298
exit_reasons = None
for line in file('/proc/cpuinfo').readlines():
if line.startswith('flags') or line.startswith('vendor_id'):
for flag in line.split():
if flag in vendor_exit_reasons:
exit_reasons = vendor_exit_reasons[flag]
if flag in syscall_numbers:
sc_perf_evt_open = syscall_numbers[flag]
filters = {
'kvm_exit': ('exit_reason', exit_reasons)
}
def invert(d):
return dict((x[1], x[0]) for x in d.iteritems())
for f in filters:
filters[f] = (filters[f][0], invert(filters[f][1]))
import ctypes, struct, array
libc = ctypes.CDLL('libc.so.6')
syscall = libc.syscall
class perf_event_attr(ctypes.Structure):
_fields_ = [('type', ctypes.c_uint32),
('size', ctypes.c_uint32),
('config', ctypes.c_uint64),
('sample_freq', ctypes.c_uint64),
('sample_type', ctypes.c_uint64),
('read_format', ctypes.c_uint64),
('flags', ctypes.c_uint64),
('wakeup_events', ctypes.c_uint32),
('bp_type', ctypes.c_uint32),
('bp_addr', ctypes.c_uint64),
('bp_len', ctypes.c_uint64),
]
def _perf_event_open(attr, pid, cpu, group_fd, flags):
return syscall(sc_perf_evt_open, ctypes.pointer(attr), ctypes.c_int(pid),
ctypes.c_int(cpu), ctypes.c_int(group_fd),
ctypes.c_long(flags))
PERF_TYPE_HARDWARE = 0
PERF_TYPE_SOFTWARE = 1
PERF_TYPE_TRACEPOINT = 2
PERF_TYPE_HW_CACHE = 3
PERF_TYPE_RAW = 4
PERF_TYPE_BREAKPOINT = 5
PERF_SAMPLE_IP = 1 << 0
PERF_SAMPLE_TID = 1 << 1
PERF_SAMPLE_TIME = 1 << 2
PERF_SAMPLE_ADDR = 1 << 3
PERF_SAMPLE_READ = 1 << 4
PERF_SAMPLE_CALLCHAIN = 1 << 5
PERF_SAMPLE_ID = 1 << 6
PERF_SAMPLE_CPU = 1 << 7
PERF_SAMPLE_PERIOD = 1 << 8
PERF_SAMPLE_STREAM_ID = 1 << 9
PERF_SAMPLE_RAW = 1 << 10
PERF_FORMAT_TOTAL_TIME_ENABLED = 1 << 0
PERF_FORMAT_TOTAL_TIME_RUNNING = 1 << 1
PERF_FORMAT_ID = 1 << 2
PERF_FORMAT_GROUP = 1 << 3
import re
sys_tracing = '/sys/kernel/debug/tracing'
class Group(object):
def __init__(self, cpu):
self.events = []
self.group_leader = None
self.cpu = cpu
def add_event(self, name, event_set, tracepoint, filter = None):
self.events.append(Event(group = self,
name = name, event_set = event_set,
tracepoint = tracepoint, filter = filter))
if len(self.events) == 1:
self.file = os.fdopen(self.events[0].fd)
def read(self):
bytes = 8 * (1 + len(self.events))
fmt = 'xxxxxxxx' + 'q' * len(self.events)
return dict(zip([event.name for event in self.events],
struct.unpack(fmt, self.file.read(bytes))))
class Event(object):
def __init__(self, group, name, event_set, tracepoint, filter = None):
self.name = name
attr = perf_event_attr()
attr.type = PERF_TYPE_TRACEPOINT
attr.size = ctypes.sizeof(attr)
id_path = os.path.join(sys_tracing, 'events', event_set,
tracepoint, 'id')
id = int(file(id_path).read())
attr.config = id
attr.sample_type = (PERF_SAMPLE_RAW
| PERF_SAMPLE_TIME
| PERF_SAMPLE_CPU)
attr.sample_period = 1
attr.read_format = PERF_FORMAT_GROUP
group_leader = -1
if group.events:
group_leader = group.events[0].fd
fd = _perf_event_open(attr, -1, group.cpu, group_leader, 0)
if fd == -1:
raise Exception('perf_event_open failed')
if filter:
import fcntl
fcntl.ioctl(fd, 0x40082406, filter)
self.fd = fd
def enable(self):
import fcntl
fcntl.ioctl(self.fd, 0x00002400, 0)
def disable(self):
import fcntl
fcntl.ioctl(self.fd, 0x00002401, 0)
class TracepointProvider(object):
def __init__(self):
path = os.path.join(sys_tracing, 'events', 'kvm')
fields = [f
for f in os.listdir(path)
if os.path.isdir(os.path.join(path, f))]
extra = []
for f in fields:
if f in filters:
subfield, values = filters[f]
for name, number in values.iteritems():
extra.append(f + '(' + name + ')')
fields += extra
self._setup(fields)
self.select(fields)
def fields(self):
return self._fields
def _setup(self, _fields):
self._fields = _fields
cpure = r'cpu([0-9]+)'
self.cpus = [int(re.match(cpure, x).group(1))
for x in os.listdir('/sys/devices/system/cpu')
if re.match(cpure, x)]
import resource
nfiles = len(self.cpus) * 1000
resource.setrlimit(resource.RLIMIT_NOFILE, (nfiles, nfiles))
events = []
self.group_leaders = []
for cpu in self.cpus:
group = Group(cpu)
for name in _fields:
tracepoint = name
filter = None
m = re.match(r'(.*)\((.*)\)', name)
if m:
tracepoint, sub = m.groups()
filter = '%s==%d\0' % (filters[tracepoint][0],
filters[tracepoint][1][sub])
event = group.add_event(name, event_set = 'kvm',
tracepoint = tracepoint,
filter = filter)
self.group_leaders.append(group)
def select(self, fields):
for group in self.group_leaders:
for event in group.events:
if event.name in fields:
event.enable()
else:
event.disable()
def read(self):
from collections import defaultdict
ret = defaultdict(int)
for group in self.group_leaders:
for name, val in group.read().iteritems():
ret[name] += val
return ret
class Stats:
def __init__(self, providers, fields = None):
self.providers = providers
self.fields_filter = fields
self._update()
def _update(self):
def wanted(key):
import re
if not self.fields_filter:
return True
return re.match(self.fields_filter, key) is not None
self.values = dict()
for d in providers:
provider_fields = [key for key in d.fields() if wanted(key)]
for key in provider_fields:
self.values[key] = None
d.select(provider_fields)
def set_fields_filter(self, fields_filter):
self.fields_filter = fields_filter
self._update()
def get(self):
for d in providers:
new = d.read()
for key in d.fields():
oldval = self.values.get(key, (0, 0))
newval = new[key]
newdelta = None
if oldval is not None:
newdelta = newval - oldval[0]
self.values[key] = (newval, newdelta)
return self.values
if not os.access('/sys/kernel/debug', os.F_OK):
print 'Please enable CONFIG_DEBUG_FS in your kernel'
sys.exit(1)
if not os.access('/sys/kernel/debug/kvm', os.F_OK):
print "Please mount debugfs ('mount -t debugfs debugfs /sys/kernel/debug')"
print "and ensure the kvm modules are loaded"
sys.exit(1)
label_width = 40
number_width = 10
def tui(screen, stats):
curses.use_default_colors()
curses.noecho()
drilldown = False
fields_filter = stats.fields_filter
def update_drilldown():
if not fields_filter:
if drilldown:
stats.set_fields_filter(None)
else:
stats.set_fields_filter(r'^[^\(]*$')
update_drilldown()
def refresh(sleeptime):
screen.erase()
screen.addstr(0, 0, 'kvm statistics')
row = 2
s = stats.get()
def sortkey(x):
if s[x][1]:
return (-s[x][1], -s[x][0])
else:
return (0, -s[x][0])
for key in sorted(s.keys(), key = sortkey):
if row >= screen.getmaxyx()[0]:
break
values = s[key]
if not values[0] and not values[1]:
break
col = 1
screen.addstr(row, col, key)
col += label_width
screen.addstr(row, col, '%10d' % (values[0],))
col += number_width
if values[1] is not None:
screen.addstr(row, col, '%8d' % (values[1] / sleeptime,))
row += 1
screen.refresh()
sleeptime = 0.25
while True:
refresh(sleeptime)
curses.halfdelay(int(sleeptime * 10))
sleeptime = 3
try:
c = screen.getkey()
if c == 'x':
drilldown = not drilldown
update_drilldown()
if c == 'q':
break
except KeyboardInterrupt:
break
except curses.error:
continue
def batch(stats):
s = stats.get()
time.sleep(1)
s = stats.get()
for key in sorted(s.keys()):
values = s[key]
print '%-22s%10d%10d' % (key, values[0], values[1])
def log(stats):
keys = sorted(stats.get().iterkeys())
def banner():
for k in keys:
print '%10s' % k[0:9],
print
def statline():
s = stats.get()
for k in keys:
print ' %9d' % s[k][1],
print
line = 0
banner_repeat = 20
while True:
time.sleep(1)
if line % banner_repeat == 0:
banner()
statline()
line += 1
options = optparse.OptionParser()
options.add_option('-1', '--once', '--batch',
action = 'store_true',
default = False,
dest = 'once',
help = 'run in batch mode for one second',
)
options.add_option('-l', '--log',
action = 'store_true',
default = False,
dest = 'log',
help = 'run in logging mode (like vmstat)',
)
options.add_option('-t', '--tracepoints',
action = 'store_true',
default = False,
dest = 'tracepoints',
help = 'retrieve statistics from tracepoints',
)
options.add_option('-d', '--debugfs',
action = 'store_true',
default = False,
dest = 'debugfs',
help = 'retrieve statistics from debugfs',
)
options.add_option('-f', '--fields',
action = 'store',
default = None,
dest = 'fields',
help = 'fields to display (regex)',
)
(options, args) = options.parse_args(sys.argv)
providers = []
if options.tracepoints:
providers.append(TracepointProvider())
if options.debugfs:
providers.append(DebugfsProvider())
if len(providers) == 0:
try:
providers = [TracepointProvider()]
except:
providers = [DebugfsProvider()]
stats = Stats(providers, fields = options.fields)
if options.log:
log(stats)
elif not options.once:
import curses.wrapper
curses.wrapper(tui, stats)
else:
batch(stats)