113 lines
4.1 KiB
Plaintext
113 lines
4.1 KiB
Plaintext
$NetBSD: README,v 1.9 2008/01/30 14:16:42 ad Exp $
|
|
|
|
NOTE:
|
|
- tprof driver currently only supports pentium4 (netburst) processors.
|
|
- it samples program counters on every PMIs.
|
|
- it's currently hardcoded to use global_power_events events.
|
|
for details, see x86/x86/tprof_pmi.c and intel's processor manuals.
|
|
|
|
usage:
|
|
|
|
0. set SIZEOF_PTR environment variable, which is used by tpfmt.sh and tpann.sh.
|
|
if not set, SIZEOF_PTR=4 is assumed.
|
|
|
|
1. add a line to your kernel config.
|
|
|
|
pseudo-device tprof
|
|
|
|
2. create a device special file.
|
|
|
|
# mknod /dev/tprof c 191 0
|
|
|
|
3. run the tprof command.
|
|
|
|
# tprof -o /tmp/foo sleep 1
|
|
|
|
tprof statistics:
|
|
sample 57
|
|
overflow 0
|
|
buf 3
|
|
emptybuf 3
|
|
dropbuf 0
|
|
dropbuf_sample 0
|
|
|
|
4. format the result.
|
|
the first line in the following example means that 11 samples have been
|
|
taken at 0xc0396c36, whose symbolic name is lapic_gettick+0x6.
|
|
|
|
# sh ./tpfmt.sh < /tmp/foo
|
|
11 c0396c36 lapic_gettick+0x6
|
|
5 c039b98a x86_pause+0x2
|
|
4 c010cf9d __cpu_simple_lock+0xd
|
|
2 c010cfcd __cpu_simple_lock_try+0xd
|
|
2 c039b571 bus_space_read_4+0x11
|
|
1 c01005c8 sse2_zero_page+0x18
|
|
1 c0100624 sse2_copy_page+0x34
|
|
1 c010ceeb mutex_spin_enter+0x2b
|
|
1 c010cef5 mutex_spin_enter+0x35
|
|
1 c010cf32 mutex_spin_exit+0x32
|
|
1 c0119ed0 in_localaddr+0x30
|
|
1 c012d0fd tcp_output+0x1fbd
|
|
1 c02980c2 amap_copy+0x42
|
|
1 c02a0100 uvm_map_lookup_entry_bytree+0x20
|
|
1 c02a27fe uvm_tree_RB_REMOVE+0xee
|
|
1 c02a8914 uvm_pagelookup+0x4
|
|
1 c02a9d5c uvm_pagefree+0xfc
|
|
1 c02a9e36 uvm_pagefree+0x1d6
|
|
1 c02dd9d1 _kernel_unlock+0xa1
|
|
1 c02e0285 mutex_vector_enter+0x15
|
|
1 c02eb83a sleepq_wake+0x5a
|
|
1 c0303467 pool_cache_get_paddr+0x97
|
|
1 c030368b pool_cache_put_slow+0x6b
|
|
1 c0321ed3 pffasttimo+0x33
|
|
1 c034547a VOP_LOCK+0xa
|
|
1 c0346235 VOP_ACCESS+0x45
|
|
1 c034a749 genfs_unlock+0x29
|
|
1 c038f251 cpu_idle+0x31
|
|
1 c03938da pmap_write_protect+0xaa
|
|
1 c0394305 pmap_do_remove+0x2e5
|
|
1 c03944b3 pmap_do_remove+0x493
|
|
1 c0396cdf lapic_delay+0x5f
|
|
1 c0396d19 lapic_delay+0x99
|
|
1 c0396d1d lapic_delay+0x9d
|
|
1 c0397429 lapic_clockintr+0x19
|
|
1 c039b984 x86_mwait+0xc
|
|
1 c042f66a _atomic_swap_32+0xa
|
|
|
|
5. tpann.sh is another formatter. it outputs "objdump -d" with numbers of
|
|
samples for each addresses.
|
|
|
|
# tprof -o /tmp/bar sleep 100
|
|
# sh ./tpann.sh < /tmp/bar
|
|
|
|
:
|
|
snip
|
|
:
|
|
|
|
c01005e0 <sse2_zero_page>:
|
|
4 c01005e0: 55 push %ebp
|
|
11 c01005e1: 89 e5 mov %esp,%ebp
|
|
1 c01005e3: 8b 54 24 08 mov 0x8(%esp),%edx
|
|
3 c01005e7: b9 00 10 00 00 mov $0x1000,%ecx
|
|
1 c01005ec: 31 c0 xor %eax,%eax
|
|
1 c01005ee: 89 f6 mov %esi,%esi
|
|
7936 c01005f0: 0f c3 42 00 movnti %eax,0x0(%edx)
|
|
6371 c01005f4: 0f c3 42 04 movnti %eax,0x4(%edx)
|
|
1220 c01005f8: 0f c3 42 08 movnti %eax,0x8(%edx)
|
|
741 c01005fc: 0f c3 42 0c movnti %eax,0xc(%edx)
|
|
1178 c0100600: 0f c3 42 10 movnti %eax,0x10(%edx)
|
|
1334 c0100604: 0f c3 42 14 movnti %eax,0x14(%edx)
|
|
976 c0100608: 0f c3 42 18 movnti %eax,0x18(%edx)
|
|
1299 c010060c: 0f c3 42 1c movnti %eax,0x1c(%edx)
|
|
954 c0100610: 83 e9 20 sub $0x20,%ecx
|
|
45 c0100613: 8d 52 20 lea 0x20(%edx),%edx
|
|
238 c0100616: 75 d8 jne c01005f0 <sse2_zero_page+0x10>
|
|
71 c0100618: 0f ae f8 sfence
|
|
297 c010061b: 5d pop %ebp
|
|
19 c010061c: c3 ret
|
|
0 c010061d: 8d 76 00 lea 0x0(%esi),%esi
|
|
|
|
:
|
|
snip
|
|
:
|