* Increased the maximum profiling caller stack depth significantly.
* Extended the profiling API by an option to record a variable number of samples per tick. The stack depth is used as a maximum. * Added new option "-f" to the "profile" tool. When specified it increments the hit counts of all symbols in the full available caller stack. I.e. the resulting hit counts will approximate the total time spent in each function or any function directly or indirectly called by it. Thus "_start" and "main" will usually get 100% and leaf functions only what time has actually been spent in them. git-svn-id: file:///srv/svn/repos/haiku/haiku/trunk@27699 a95241bf-73f2-0310-859d-f6bbb57e9c96
This commit is contained in:
parent
114c07e6e2
commit
0135e2e324
@ -367,6 +367,9 @@ typedef struct {
|
||||
area_id sample_area; // area into which the sample will be
|
||||
// written
|
||||
int32 stack_depth; // number of return address per hit
|
||||
bool variable_stack_depth;
|
||||
// variable number of samples per hit;
|
||||
// cf. debug_profiler_update
|
||||
} debug_nub_start_profiler;
|
||||
|
||||
typedef struct {
|
||||
@ -549,18 +552,22 @@ typedef struct {
|
||||
|
||||
typedef struct {
|
||||
debug_origin origin;
|
||||
int32 image_event; // number of the last image event;
|
||||
// all samples were recorded after
|
||||
// this event and before the next
|
||||
// one
|
||||
int32 stack_depth; // number of return addresses per
|
||||
// tick
|
||||
int32 sample_count; // number of samples in the buffer
|
||||
int32 dropped_ticks; // number of ticks that had been
|
||||
// dropped, since the buffer was
|
||||
// full
|
||||
bool stopped; // if true, the thread is no longer
|
||||
// being profiled
|
||||
int32 image_event; // number of the last image event; all
|
||||
// samples were recorded after this
|
||||
// event and before the next one
|
||||
int32 stack_depth; // number of return addresses per tick
|
||||
int32 sample_count; // number of samples in the buffer
|
||||
int32 dropped_ticks; // number of ticks that had been
|
||||
// dropped, since the buffer was full
|
||||
bool variable_stack_depth;
|
||||
// the number of samples per hit is
|
||||
// variable, i.e. the format for the
|
||||
// samples of a hit in the buffer is
|
||||
// <n> <sample 1> ... <sample n>
|
||||
// instead of
|
||||
// <sample 1> ... <sample stack_depth>
|
||||
bool stopped; // if true, the thread is no longer
|
||||
// being profiled
|
||||
} debug_profiler_update;
|
||||
|
||||
// B_DEBUGGER_MESSAGE_HANDED_OVER
|
||||
|
@ -16,7 +16,7 @@
|
||||
|
||||
// limits
|
||||
#define B_DEBUG_MIN_PROFILE_INTERVAL 10 /* in us */
|
||||
#define B_DEBUG_STACK_TRACE_DEPTH 20
|
||||
#define B_DEBUG_STACK_TRACE_DEPTH 128
|
||||
#define B_DEBUG_PROFILE_BUFFER_FLUSH_THRESHOLD 70 /* in % */
|
||||
|
||||
|
||||
@ -96,6 +96,8 @@ struct thread_debug_info {
|
||||
int32 image_event;
|
||||
// number of the image event when the first sample was written into
|
||||
// the buffer
|
||||
bool variable_stack_depth;
|
||||
// record a variable number of samples per hit
|
||||
bool buffer_full;
|
||||
// indicates that the sample buffer is full
|
||||
union {
|
||||
|
@ -51,6 +51,10 @@ static const char* kUsage =
|
||||
" thread.\n"
|
||||
" -C - Don't profile child teams. Default is to recursively\n"
|
||||
" profile all teams created by a profiled team.\n"
|
||||
" -f - Always analyze the full caller stack. The hit count\n"
|
||||
" for every encountered function will be incremented.\n"
|
||||
" This increases the default for the caller stack depth\n"
|
||||
" (\"-s\") to 64.\n"
|
||||
" -h, --help - Print this usage info.\n"
|
||||
" -i <interval> - Use a tick interval of <interval> microseconds.\n"
|
||||
" Default is 1000 (1 ms). On a fast machine, a shorter\n"
|
||||
@ -75,7 +79,8 @@ struct Options {
|
||||
profile_kernel(true),
|
||||
profile_loading(false),
|
||||
profile_teams(true),
|
||||
profile_threads(true)
|
||||
profile_threads(true),
|
||||
analyze_full_stack(false)
|
||||
{
|
||||
}
|
||||
|
||||
@ -86,6 +91,7 @@ struct Options {
|
||||
bool profile_loading;
|
||||
bool profile_teams;
|
||||
bool profile_threads;
|
||||
bool analyze_full_stack;
|
||||
};
|
||||
|
||||
static Options sOptions;
|
||||
@ -335,6 +341,16 @@ public:
|
||||
fTotalHits++;
|
||||
}
|
||||
|
||||
void AddSymbolHit(int32 symbolIndex)
|
||||
{
|
||||
fSymbolHits[symbolIndex]++;
|
||||
}
|
||||
|
||||
void AddImageHit()
|
||||
{
|
||||
fTotalHits++;
|
||||
}
|
||||
|
||||
Image* GetImage() const
|
||||
{
|
||||
return fImage;
|
||||
@ -376,7 +392,8 @@ public:
|
||||
fTotalTicks(0),
|
||||
fUnkownTicks(0),
|
||||
fDroppedTicks(0),
|
||||
fInterval(1)
|
||||
fInterval(1),
|
||||
fTotalSampleCount(0)
|
||||
{
|
||||
}
|
||||
|
||||
@ -456,7 +473,8 @@ public:
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void AddSamples(int32 count, int32 dropped, int32 stackDepth, int32 event)
|
||||
void AddSamples(int32 count, int32 dropped, int32 stackDepth,
|
||||
bool variableStackDepth, int32 event)
|
||||
{
|
||||
_RemoveObsoleteImages(event);
|
||||
|
||||
@ -471,24 +489,76 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
count = count / stackDepth * stackDepth;
|
||||
if (variableStackDepth) {
|
||||
// Variable stack depth means we have full caller stack analyzes
|
||||
// enabled.
|
||||
int32 totalSampleCount = 0;
|
||||
int32 tickCount = 0;
|
||||
addr_t* samples = fSamples;
|
||||
while (count > 0) {
|
||||
int32 sampleCount = *(samples++);
|
||||
|
||||
for (int32 i = 0; i < count; i += stackDepth) {
|
||||
ThreadImage* image = NULL;
|
||||
for (int32 k = 0; k < stackDepth; k++) {
|
||||
addr_t address = fSamples[i + k];
|
||||
image = FindImage(address);
|
||||
if (image != NULL) {
|
||||
image->AddHit(address);
|
||||
break;
|
||||
// Sort the samples. This way hits of the same symbol are
|
||||
// successive and we can avoid incrementing the hit count of the
|
||||
// same symbol twice. Same for images.
|
||||
std::sort(samples, samples + sampleCount);
|
||||
|
||||
int32 unknownSamples = 0;
|
||||
ThreadImage* previousImage = NULL;
|
||||
int32 previousSymbol = -1;
|
||||
|
||||
for (int32 i = 0; i < sampleCount; i++) {
|
||||
addr_t address = samples[i];
|
||||
ThreadImage* image = FindImage(address);
|
||||
int32 symbol = -1;
|
||||
if (image != NULL) {
|
||||
symbol = image->GetImage()->FindSymbol(address);
|
||||
if (symbol < 0) {
|
||||
// TODO: Count unknown image hits?
|
||||
} else if (symbol != previousSymbol)
|
||||
image->AddSymbolHit(symbol);
|
||||
|
||||
if (image != previousImage)
|
||||
image->AddImageHit();
|
||||
} else
|
||||
unknownSamples++;
|
||||
|
||||
previousImage = image;
|
||||
previousSymbol = symbol;
|
||||
}
|
||||
|
||||
if (unknownSamples == sampleCount)
|
||||
fUnkownTicks++;
|
||||
|
||||
samples += sampleCount;
|
||||
count -= sampleCount + 1;
|
||||
tickCount++;
|
||||
totalSampleCount += sampleCount;
|
||||
}
|
||||
|
||||
if (image == NULL)
|
||||
fUnkownTicks++;
|
||||
fTotalTicks += tickCount;
|
||||
fTotalSampleCount += totalSampleCount;
|
||||
} else {
|
||||
count = count / stackDepth * stackDepth;
|
||||
|
||||
for (int32 i = 0; i < count; i += stackDepth) {
|
||||
ThreadImage* image = NULL;
|
||||
for (int32 k = 0; k < stackDepth; k++) {
|
||||
addr_t address = fSamples[i + k];
|
||||
image = FindImage(address);
|
||||
if (image != NULL) {
|
||||
image->AddHit(address);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (image == NULL)
|
||||
fUnkownTicks++;
|
||||
}
|
||||
|
||||
fTotalTicks += count / stackDepth;
|
||||
}
|
||||
|
||||
fTotalTicks += count / stackDepth;
|
||||
fDroppedTicks += dropped;
|
||||
|
||||
// re-add the new images
|
||||
@ -573,6 +643,10 @@ public:
|
||||
fprintf(sOptions.output, " dropped ticks: %lld (%lld us, %6.2f%%)\n",
|
||||
fDroppedTicks, fDroppedTicks * fInterval,
|
||||
100.0 * fDroppedTicks / totalTicks);
|
||||
if (sOptions.analyze_full_stack) {
|
||||
fprintf(sOptions.output, " samples/tick: %.1f\n",
|
||||
(double)fTotalSampleCount / totalTicks);
|
||||
}
|
||||
|
||||
if (imageCount > 0) {
|
||||
fprintf(sOptions.output, "\n");
|
||||
@ -649,6 +723,7 @@ private:
|
||||
int64 fUnkownTicks;
|
||||
int64 fDroppedTicks;
|
||||
bigtime_t fInterval;
|
||||
int64 fTotalSampleCount;
|
||||
};
|
||||
|
||||
|
||||
@ -786,6 +861,7 @@ public:
|
||||
message.interval = sOptions.interval;
|
||||
message.sample_area = sampleArea;
|
||||
message.stack_depth = sOptions.stack_depth;
|
||||
message.variable_stack_depth = sOptions.analyze_full_stack;
|
||||
|
||||
debug_nub_start_profiler_reply reply;
|
||||
status_t error = send_debug_message(&fDebugContext,
|
||||
@ -1114,6 +1190,7 @@ get_id(const char *str, int32 &id)
|
||||
int
|
||||
main(int argc, const char* const* argv)
|
||||
{
|
||||
int32 stackDepth = 0;
|
||||
const char* outputFile = NULL;
|
||||
|
||||
while (true) {
|
||||
@ -1123,7 +1200,7 @@ main(int argc, const char* const* argv)
|
||||
};
|
||||
|
||||
opterr = 0; // don't print errors
|
||||
int c = getopt_long(argc, (char**)argv, "+cChi:klo:s:", sLongOptions,
|
||||
int c = getopt_long(argc, (char**)argv, "+cCfhi:klo:s:", sLongOptions,
|
||||
NULL);
|
||||
if (c == -1)
|
||||
break;
|
||||
@ -1135,6 +1212,10 @@ main(int argc, const char* const* argv)
|
||||
case 'C':
|
||||
sOptions.profile_teams = false;
|
||||
break;
|
||||
case 'f':
|
||||
sOptions.stack_depth = 64;
|
||||
sOptions.analyze_full_stack = true;
|
||||
break;
|
||||
case 'h':
|
||||
print_usage_and_exit(false);
|
||||
break;
|
||||
@ -1151,7 +1232,7 @@ main(int argc, const char* const* argv)
|
||||
outputFile = optarg;
|
||||
break;
|
||||
case 's':
|
||||
sOptions.stack_depth = atol(optarg);
|
||||
stackDepth = atol(optarg);
|
||||
break;
|
||||
default:
|
||||
print_usage_and_exit(true);
|
||||
@ -1162,6 +1243,9 @@ main(int argc, const char* const* argv)
|
||||
if (optind >= argc)
|
||||
print_usage_and_exit(true);
|
||||
|
||||
if (stackDepth != 0)
|
||||
sOptions.stack_depth = stackDepth;
|
||||
|
||||
const char* const* programArgs = argv + optind;
|
||||
int programArgCount = argc - optind;
|
||||
|
||||
@ -1245,6 +1329,7 @@ main(int argc, const char* const* argv)
|
||||
thread->AddSamples(message.profiler_update.sample_count,
|
||||
message.profiler_update.dropped_ticks,
|
||||
message.profiler_update.stack_depth,
|
||||
message.profiler_update.variable_stack_depth,
|
||||
message.profiler_update.image_event);
|
||||
|
||||
if (message.profiler_update.stopped) {
|
||||
|
@ -1217,16 +1217,25 @@ profiling_do_sample(bool& flushBuffer)
|
||||
// get the samples
|
||||
addr_t* returnAddresses = debugInfo.profile.samples
|
||||
+ debugInfo.profile.sample_count;
|
||||
if (stackDepth > 1) {
|
||||
int32 count = arch_debug_get_stack_trace(returnAddresses, stackDepth, 1,
|
||||
0, false);
|
||||
if (debugInfo.profile.variable_stack_depth) {
|
||||
// variable sample count per hit
|
||||
*returnAddresses = arch_debug_get_stack_trace(returnAddresses + 1,
|
||||
stackDepth - 1, 1, 0, false);
|
||||
|
||||
for (int32 i = count; i < stackDepth; i++)
|
||||
returnAddresses[i] = 0;
|
||||
} else
|
||||
*returnAddresses = (addr_t)arch_debug_get_interrupt_pc();
|
||||
debugInfo.profile.sample_count += *returnAddresses + 1;
|
||||
} else {
|
||||
// fixed sample count per hit
|
||||
if (stackDepth > 1) {
|
||||
int32 count = arch_debug_get_stack_trace(returnAddresses,
|
||||
stackDepth, 1, 0, false);
|
||||
|
||||
debugInfo.profile.sample_count += stackDepth;
|
||||
for (int32 i = count; i < stackDepth; i++)
|
||||
returnAddresses[i] = 0;
|
||||
} else
|
||||
*returnAddresses = (addr_t)arch_debug_get_interrupt_pc();
|
||||
|
||||
debugInfo.profile.sample_count += stackDepth;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
@ -2214,6 +2223,8 @@ debug_nub_thread(void *)
|
||||
replyPort = message.start_profiler.reply_port;
|
||||
area_id sampleArea = message.start_profiler.sample_area;
|
||||
int32 stackDepth = message.start_profiler.stack_depth;
|
||||
bool variableStackDepth
|
||||
= message.start_profiler.variable_stack_depth;
|
||||
bigtime_t interval = max_c(message.start_profiler.interval,
|
||||
B_DEBUG_MIN_PROFILE_INTERVAL);
|
||||
status_t result = B_OK;
|
||||
@ -2227,6 +2238,11 @@ debug_nub_thread(void *)
|
||||
else if (stackDepth > B_DEBUG_STACK_TRACE_DEPTH)
|
||||
stackDepth = B_DEBUG_STACK_TRACE_DEPTH;
|
||||
|
||||
// provision for an extra entry per hit (for the number of
|
||||
// samples), if variable stack depth
|
||||
if (variableStackDepth)
|
||||
stackDepth++;
|
||||
|
||||
// clone the sample area
|
||||
area_info areaInfo;
|
||||
if (result == B_OK)
|
||||
@ -2274,6 +2290,8 @@ debug_nub_thread(void *)
|
||||
threadDebugInfo.profile.sample_count = 0;
|
||||
threadDebugInfo.profile.dropped_ticks = 0;
|
||||
threadDebugInfo.profile.stack_depth = stackDepth;
|
||||
threadDebugInfo.profile.variable_stack_depth
|
||||
= variableStackDepth;
|
||||
threadDebugInfo.profile.buffer_full = false;
|
||||
threadDebugInfo.profile.interval_left = interval;
|
||||
threadDebugInfo.profile.installed_timer = NULL;
|
||||
|
Loading…
Reference in New Issue
Block a user