NetBSD/usr.sbin/tprof
lukem d877c4c3c0 Enable WARNS=4 by default, except for:
cpuctl  dumplfs  hprop  ipf  iprop-log  kadmin  kcm  kdc  kdigest
	kimpersonate  kstash  ktutil  makefs  ndbootd  ntp  pppd  quot
	racoon  racoonctl  rtadvd  sntp  sup  tcpdchk  tcpdmatch  tcpdump
	traceroute  traceroute6  user  veriexecgen  wsmoused  zic
(Mostly third-party applications)
2009-04-22 15:23:01 +00:00
..
Makefile Enable WARNS=4 by default, except for: 2009-04-22 15:23:01 +00:00
README
tpann.sh
tpfmt.sh
tprof.c fix an error message. 2009-01-26 05:53:10 +00:00

$NetBSD: README,v 1.9 2008/01/30 14:16:42 ad Exp $

NOTE:
	- tprof driver currently only supports pentium4 (netburst) processors.
	- it samples program counters on every PMIs.
	- it's currently hardcoded to use global_power_events events.
	  for details, see x86/x86/tprof_pmi.c and intel's processor manuals.

usage:

0. set SIZEOF_PTR environment variable, which is used by tpfmt.sh and tpann.sh.
   if not set, SIZEOF_PTR=4 is assumed.

1. add a line to your kernel config.

	pseudo-device	tprof

2. create a device special file.

	# mknod /dev/tprof c 191 0

3. run the tprof command.

	# tprof -o /tmp/foo sleep 1

	tprof statistics:
		sample 57
		overflow 0
		buf 3
		emptybuf 3
		dropbuf 0
		dropbuf_sample 0

4. format the result.
   the first line in the following example means that 11 samples have been
   taken at 0xc0396c36, whose symbolic name is lapic_gettick+0x6.

	# sh ./tpfmt.sh < /tmp/foo
	11      c0396c36        lapic_gettick+0x6
	5       c039b98a        x86_pause+0x2
	4       c010cf9d        __cpu_simple_lock+0xd
	2       c010cfcd        __cpu_simple_lock_try+0xd
	2       c039b571        bus_space_read_4+0x11
	1       c01005c8        sse2_zero_page+0x18
	1       c0100624        sse2_copy_page+0x34
	1       c010ceeb        mutex_spin_enter+0x2b
	1       c010cef5        mutex_spin_enter+0x35
	1       c010cf32        mutex_spin_exit+0x32
	1       c0119ed0        in_localaddr+0x30
	1       c012d0fd        tcp_output+0x1fbd
	1       c02980c2        amap_copy+0x42
	1       c02a0100        uvm_map_lookup_entry_bytree+0x20
	1       c02a27fe        uvm_tree_RB_REMOVE+0xee
	1       c02a8914        uvm_pagelookup+0x4
	1       c02a9d5c        uvm_pagefree+0xfc
	1       c02a9e36        uvm_pagefree+0x1d6
	1       c02dd9d1        _kernel_unlock+0xa1
	1       c02e0285        mutex_vector_enter+0x15
	1       c02eb83a        sleepq_wake+0x5a
	1       c0303467        pool_cache_get_paddr+0x97
	1       c030368b        pool_cache_put_slow+0x6b
	1       c0321ed3        pffasttimo+0x33
	1       c034547a        VOP_LOCK+0xa
	1       c0346235        VOP_ACCESS+0x45
	1       c034a749        genfs_unlock+0x29
	1       c038f251        cpu_idle+0x31
	1       c03938da        pmap_write_protect+0xaa
	1       c0394305        pmap_do_remove+0x2e5
	1       c03944b3        pmap_do_remove+0x493
	1       c0396cdf        lapic_delay+0x5f
	1       c0396d19        lapic_delay+0x99
	1       c0396d1d        lapic_delay+0x9d
	1       c0397429        lapic_clockintr+0x19
	1       c039b984        x86_mwait+0xc
	1       c042f66a        _atomic_swap_32+0xa

5. tpann.sh is another formatter.  it outputs "objdump -d" with numbers of
   samples for each addresses.

	# tprof -o /tmp/bar sleep 100
	# sh ./tpann.sh < /tmp/bar

		:
		snip
		:

	c01005e0 <sse2_zero_page>:
	       4  c01005e0:     55                      push   %ebp
	      11  c01005e1:     89 e5                   mov    %esp,%ebp
	       1  c01005e3:     8b 54 24 08             mov    0x8(%esp),%edx
	       3  c01005e7:     b9 00 10 00 00          mov    $0x1000,%ecx
	       1  c01005ec:     31 c0                   xor    %eax,%eax
	       1  c01005ee:     89 f6                   mov    %esi,%esi
	    7936  c01005f0:     0f c3 42 00             movnti %eax,0x0(%edx)
	    6371  c01005f4:     0f c3 42 04             movnti %eax,0x4(%edx)
	    1220  c01005f8:     0f c3 42 08             movnti %eax,0x8(%edx)
	     741  c01005fc:     0f c3 42 0c             movnti %eax,0xc(%edx)
	    1178  c0100600:     0f c3 42 10             movnti %eax,0x10(%edx)
	    1334  c0100604:     0f c3 42 14             movnti %eax,0x14(%edx)
	     976  c0100608:     0f c3 42 18             movnti %eax,0x18(%edx)
	    1299  c010060c:     0f c3 42 1c             movnti %eax,0x1c(%edx)
	     954  c0100610:     83 e9 20                sub    $0x20,%ecx
	      45  c0100613:     8d 52 20                lea    0x20(%edx),%edx
	     238  c0100616:     75 d8                   jne    c01005f0 <sse2_zero_page+0x10>
	      71  c0100618:     0f ae f8                sfence 
	     297  c010061b:     5d                      pop    %ebp
	      19  c010061c:     c3                      ret    
	       0  c010061d:     8d 76 00                lea    0x0(%esi),%esi

		:
		snip
		: