* Increased the maximum profiling caller stack depth significantly.

* Extended the profiling API by an option to record a variable number of
  samples per tick. The stack depth is used as a maximum.
* Added new option "-f" to the "profile" tool. When specified it
  increments the hit counts of all symbols in the full available caller
  stack. I.e. the resulting hit counts will approximate the total time
  spent in each function or any function directly or indirectly called
  by it. Thus "_start" and "main" will usually get 100% and leaf
  functions only what time has actually been spent in them.


git-svn-id: file:///srv/svn/repos/haiku/haiku/trunk@27699 a95241bf-73f2-0310-859d-f6bbb57e9c96
This commit is contained in:
Ingo Weinhold 2008-09-23 01:08:27 +00:00
parent 114c07e6e2
commit 0135e2e324
4 changed files with 150 additions and 38 deletions

View File

@ -367,6 +367,9 @@ typedef struct {
area_id sample_area; // area into which the sample will be
// written
int32 stack_depth; // number of return address per hit
bool variable_stack_depth;
// variable number of samples per hit;
// cf. debug_profiler_update
} debug_nub_start_profiler;
typedef struct {
@ -549,18 +552,22 @@ typedef struct {
typedef struct {
debug_origin origin;
int32 image_event; // number of the last image event;
// all samples were recorded after
// this event and before the next
// one
int32 stack_depth; // number of return addresses per
// tick
int32 sample_count; // number of samples in the buffer
int32 dropped_ticks; // number of ticks that had been
// dropped, since the buffer was
// full
bool stopped; // if true, the thread is no longer
// being profiled
int32 image_event; // number of the last image event; all
// samples were recorded after this
// event and before the next one
int32 stack_depth; // number of return addresses per tick
int32 sample_count; // number of samples in the buffer
int32 dropped_ticks; // number of ticks that had been
// dropped, since the buffer was full
bool variable_stack_depth;
// the number of samples per hit is
// variable, i.e. the format for the
// samples of a hit in the buffer is
// <n> <sample 1> ... <sample n>
// instead of
// <sample 1> ... <sample stack_depth>
bool stopped; // if true, the thread is no longer
// being profiled
} debug_profiler_update;
// B_DEBUGGER_MESSAGE_HANDED_OVER

View File

@ -16,7 +16,7 @@
// limits
#define B_DEBUG_MIN_PROFILE_INTERVAL 10 /* in us */
#define B_DEBUG_STACK_TRACE_DEPTH 20
#define B_DEBUG_STACK_TRACE_DEPTH 128
#define B_DEBUG_PROFILE_BUFFER_FLUSH_THRESHOLD 70 /* in % */
@ -96,6 +96,8 @@ struct thread_debug_info {
int32 image_event;
// number of the image event when the first sample was written into
// the buffer
bool variable_stack_depth;
// record a variable number of samples per hit
bool buffer_full;
// indicates that the sample buffer is full
union {

View File

@ -51,6 +51,10 @@ static const char* kUsage =
" thread.\n"
" -C - Don't profile child teams. Default is to recursively\n"
" profile all teams created by a profiled team.\n"
" -f - Always analyze the full caller stack. The hit count\n"
" for every encountered function will be incremented.\n"
" This increases the default for the caller stack depth\n"
" (\"-s\") to 64.\n"
" -h, --help - Print this usage info.\n"
" -i <interval> - Use a tick interval of <interval> microseconds.\n"
" Default is 1000 (1 ms). On a fast machine, a shorter\n"
@ -75,7 +79,8 @@ struct Options {
profile_kernel(true),
profile_loading(false),
profile_teams(true),
profile_threads(true)
profile_threads(true),
analyze_full_stack(false)
{
}
@ -86,6 +91,7 @@ struct Options {
bool profile_loading;
bool profile_teams;
bool profile_threads;
bool analyze_full_stack;
};
static Options sOptions;
@ -335,6 +341,16 @@ public:
fTotalHits++;
}
void AddSymbolHit(int32 symbolIndex)
{
fSymbolHits[symbolIndex]++;
}
void AddImageHit()
{
fTotalHits++;
}
Image* GetImage() const
{
return fImage;
@ -376,7 +392,8 @@ public:
fTotalTicks(0),
fUnkownTicks(0),
fDroppedTicks(0),
fInterval(1)
fInterval(1),
fTotalSampleCount(0)
{
}
@ -456,7 +473,8 @@ public:
return NULL;
}
void AddSamples(int32 count, int32 dropped, int32 stackDepth, int32 event)
void AddSamples(int32 count, int32 dropped, int32 stackDepth,
bool variableStackDepth, int32 event)
{
_RemoveObsoleteImages(event);
@ -471,24 +489,76 @@ public:
}
}
count = count / stackDepth * stackDepth;
if (variableStackDepth) {
// Variable stack depth means we have full caller stack analyzes
// enabled.
int32 totalSampleCount = 0;
int32 tickCount = 0;
addr_t* samples = fSamples;
while (count > 0) {
int32 sampleCount = *(samples++);
for (int32 i = 0; i < count; i += stackDepth) {
ThreadImage* image = NULL;
for (int32 k = 0; k < stackDepth; k++) {
addr_t address = fSamples[i + k];
image = FindImage(address);
if (image != NULL) {
image->AddHit(address);
break;
// Sort the samples. This way hits of the same symbol are
// successive and we can avoid incrementing the hit count of the
// same symbol twice. Same for images.
std::sort(samples, samples + sampleCount);
int32 unknownSamples = 0;
ThreadImage* previousImage = NULL;
int32 previousSymbol = -1;
for (int32 i = 0; i < sampleCount; i++) {
addr_t address = samples[i];
ThreadImage* image = FindImage(address);
int32 symbol = -1;
if (image != NULL) {
symbol = image->GetImage()->FindSymbol(address);
if (symbol < 0) {
// TODO: Count unknown image hits?
} else if (symbol != previousSymbol)
image->AddSymbolHit(symbol);
if (image != previousImage)
image->AddImageHit();
} else
unknownSamples++;
previousImage = image;
previousSymbol = symbol;
}
if (unknownSamples == sampleCount)
fUnkownTicks++;
samples += sampleCount;
count -= sampleCount + 1;
tickCount++;
totalSampleCount += sampleCount;
}
if (image == NULL)
fUnkownTicks++;
fTotalTicks += tickCount;
fTotalSampleCount += totalSampleCount;
} else {
count = count / stackDepth * stackDepth;
for (int32 i = 0; i < count; i += stackDepth) {
ThreadImage* image = NULL;
for (int32 k = 0; k < stackDepth; k++) {
addr_t address = fSamples[i + k];
image = FindImage(address);
if (image != NULL) {
image->AddHit(address);
break;
}
}
if (image == NULL)
fUnkownTicks++;
}
fTotalTicks += count / stackDepth;
}
fTotalTicks += count / stackDepth;
fDroppedTicks += dropped;
// re-add the new images
@ -573,6 +643,10 @@ public:
fprintf(sOptions.output, " dropped ticks: %lld (%lld us, %6.2f%%)\n",
fDroppedTicks, fDroppedTicks * fInterval,
100.0 * fDroppedTicks / totalTicks);
if (sOptions.analyze_full_stack) {
fprintf(sOptions.output, " samples/tick: %.1f\n",
(double)fTotalSampleCount / totalTicks);
}
if (imageCount > 0) {
fprintf(sOptions.output, "\n");
@ -649,6 +723,7 @@ private:
int64 fUnkownTicks;
int64 fDroppedTicks;
bigtime_t fInterval;
int64 fTotalSampleCount;
};
@ -786,6 +861,7 @@ public:
message.interval = sOptions.interval;
message.sample_area = sampleArea;
message.stack_depth = sOptions.stack_depth;
message.variable_stack_depth = sOptions.analyze_full_stack;
debug_nub_start_profiler_reply reply;
status_t error = send_debug_message(&fDebugContext,
@ -1114,6 +1190,7 @@ get_id(const char *str, int32 &id)
int
main(int argc, const char* const* argv)
{
int32 stackDepth = 0;
const char* outputFile = NULL;
while (true) {
@ -1123,7 +1200,7 @@ main(int argc, const char* const* argv)
};
opterr = 0; // don't print errors
int c = getopt_long(argc, (char**)argv, "+cChi:klo:s:", sLongOptions,
int c = getopt_long(argc, (char**)argv, "+cCfhi:klo:s:", sLongOptions,
NULL);
if (c == -1)
break;
@ -1135,6 +1212,10 @@ main(int argc, const char* const* argv)
case 'C':
sOptions.profile_teams = false;
break;
case 'f':
sOptions.stack_depth = 64;
sOptions.analyze_full_stack = true;
break;
case 'h':
print_usage_and_exit(false);
break;
@ -1151,7 +1232,7 @@ main(int argc, const char* const* argv)
outputFile = optarg;
break;
case 's':
sOptions.stack_depth = atol(optarg);
stackDepth = atol(optarg);
break;
default:
print_usage_and_exit(true);
@ -1162,6 +1243,9 @@ main(int argc, const char* const* argv)
if (optind >= argc)
print_usage_and_exit(true);
if (stackDepth != 0)
sOptions.stack_depth = stackDepth;
const char* const* programArgs = argv + optind;
int programArgCount = argc - optind;
@ -1245,6 +1329,7 @@ main(int argc, const char* const* argv)
thread->AddSamples(message.profiler_update.sample_count,
message.profiler_update.dropped_ticks,
message.profiler_update.stack_depth,
message.profiler_update.variable_stack_depth,
message.profiler_update.image_event);
if (message.profiler_update.stopped) {

View File

@ -1217,16 +1217,25 @@ profiling_do_sample(bool& flushBuffer)
// get the samples
addr_t* returnAddresses = debugInfo.profile.samples
+ debugInfo.profile.sample_count;
if (stackDepth > 1) {
int32 count = arch_debug_get_stack_trace(returnAddresses, stackDepth, 1,
0, false);
if (debugInfo.profile.variable_stack_depth) {
// variable sample count per hit
*returnAddresses = arch_debug_get_stack_trace(returnAddresses + 1,
stackDepth - 1, 1, 0, false);
for (int32 i = count; i < stackDepth; i++)
returnAddresses[i] = 0;
} else
*returnAddresses = (addr_t)arch_debug_get_interrupt_pc();
debugInfo.profile.sample_count += *returnAddresses + 1;
} else {
// fixed sample count per hit
if (stackDepth > 1) {
int32 count = arch_debug_get_stack_trace(returnAddresses,
stackDepth, 1, 0, false);
debugInfo.profile.sample_count += stackDepth;
for (int32 i = count; i < stackDepth; i++)
returnAddresses[i] = 0;
} else
*returnAddresses = (addr_t)arch_debug_get_interrupt_pc();
debugInfo.profile.sample_count += stackDepth;
}
return true;
}
@ -2214,6 +2223,8 @@ debug_nub_thread(void *)
replyPort = message.start_profiler.reply_port;
area_id sampleArea = message.start_profiler.sample_area;
int32 stackDepth = message.start_profiler.stack_depth;
bool variableStackDepth
= message.start_profiler.variable_stack_depth;
bigtime_t interval = max_c(message.start_profiler.interval,
B_DEBUG_MIN_PROFILE_INTERVAL);
status_t result = B_OK;
@ -2227,6 +2238,11 @@ debug_nub_thread(void *)
else if (stackDepth > B_DEBUG_STACK_TRACE_DEPTH)
stackDepth = B_DEBUG_STACK_TRACE_DEPTH;
// provision for an extra entry per hit (for the number of
// samples), if variable stack depth
if (variableStackDepth)
stackDepth++;
// clone the sample area
area_info areaInfo;
if (result == B_OK)
@ -2274,6 +2290,8 @@ debug_nub_thread(void *)
threadDebugInfo.profile.sample_count = 0;
threadDebugInfo.profile.dropped_ticks = 0;
threadDebugInfo.profile.stack_depth = stackDepth;
threadDebugInfo.profile.variable_stack_depth
= variableStackDepth;
threadDebugInfo.profile.buffer_full = false;
threadDebugInfo.profile.interval_left = interval;
threadDebugInfo.profile.installed_timer = NULL;