haiku/src/system/kernel/scheduler/scheduling_analysis.cpp
Ingo Weinhold 53892c92a0 * Replaced scheduler_remove_from_run_queue() by
scheduler_set_thread_priority(). Setting the thread priority was the
  only situation in which it was used.
* Renamed scheduler.cpp to scheduler_simple.cpp.
* The scheduler functions are no longer called directly. Instead there's
  an operation vector now, which is initialized at kernel init time.
  This allows for picking the most suitable scheduler for the machine
  (e.g. a non-SMP scheduler on a non-SMP machine).


git-svn-id: file:///srv/svn/repos/haiku/haiku/trunk@28262 a95241bf-73f2-0310-859d-f6bbb57e9c96
2008-10-21 12:37:13 +00:00

869 lines
21 KiB
C++

/*
* Copyright 2008, Ingo Weinhold, ingo_weinhold@gmx.de.
* Distributed under the terms of the MIT License.
*/
#include <scheduling_analysis.h>
#include <elf.h>
#include <kernel.h>
#include <scheduler_defs.h>
#include <tracing.h>
#include <util/AutoLock.h>
#include <util/khash.h>
#include "scheduler_tracing.h"
#if SCHEDULER_TRACING
namespace SchedulingAnalysis {
using namespace SchedulerTracing;
#if SCHEDULING_ANALYSIS_TRACING
using namespace SchedulingAnalysisTracing;
#endif
struct ThreadWaitObject;
struct HashObjectKey {
virtual ~HashObjectKey()
{
}
virtual uint32 HashKey() const = 0;
};
struct HashObject {
HashObject* next;
virtual ~HashObject()
{
}
virtual uint32 HashKey() const = 0;
virtual bool Equals(const HashObjectKey* key) const = 0;
};
struct ThreadKey : HashObjectKey {
thread_id id;
ThreadKey(thread_id id)
:
id(id)
{
}
virtual uint32 HashKey() const
{
return id;
}
};
struct Thread : HashObject, scheduling_analysis_thread {
ScheduleState state;
bigtime_t lastTime;
ThreadWaitObject* waitObject;
Thread(thread_id id)
:
state(UNKNOWN),
lastTime(0),
waitObject(NULL)
{
this->id = id;
name[0] = '\0';
runs = 0;
total_run_time = 0;
min_run_time = 1;
max_run_time = -1;
latencies = 0;
total_latency = 0;
min_latency = -1;
max_latency = -1;
reruns = 0;
total_rerun_time = 0;
min_rerun_time = -1;
max_rerun_time = -1;
unspecified_wait_time = 0;
preemptions = 0;
wait_objects = NULL;
}
virtual uint32 HashKey() const
{
return id;
}
virtual bool Equals(const HashObjectKey* _key) const
{
const ThreadKey* key = dynamic_cast<const ThreadKey*>(_key);
if (key == NULL)
return false;
return key->id == id;
}
};
struct WaitObjectKey : HashObjectKey {
uint32 type;
void* object;
WaitObjectKey(uint32 type, void* object)
:
type(type),
object(object)
{
}
virtual uint32 HashKey() const
{
return type ^ (uint32)(addr_t)object;
}
};
struct WaitObject : HashObject, scheduling_analysis_wait_object {
WaitObject(uint32 type, void* object)
{
this->type = type;
this->object = object;
name[0] = '\0';
referenced_object = NULL;
}
virtual uint32 HashKey() const
{
return type ^ (uint32)(addr_t)object;
}
virtual bool Equals(const HashObjectKey* _key) const
{
const WaitObjectKey* key = dynamic_cast<const WaitObjectKey*>(_key);
if (key == NULL)
return false;
return key->type == type && key->object == object;
}
};
struct ThreadWaitObjectKey : HashObjectKey {
thread_id thread;
uint32 type;
void* object;
ThreadWaitObjectKey(thread_id thread, uint32 type, void* object)
:
thread(thread),
type(type),
object(object)
{
}
virtual uint32 HashKey() const
{
return thread ^ type ^ (uint32)(addr_t)object;
}
};
struct ThreadWaitObject : HashObject, scheduling_analysis_thread_wait_object {
ThreadWaitObject(thread_id thread, WaitObject* waitObject)
{
this->thread = thread;
wait_object = waitObject;
wait_time = 0;
waits = 0;
next_in_list = NULL;
}
virtual uint32 HashKey() const
{
return thread ^ wait_object->type ^ (uint32)(addr_t)wait_object->object;
}
virtual bool Equals(const HashObjectKey* _key) const
{
const ThreadWaitObjectKey* key
= dynamic_cast<const ThreadWaitObjectKey*>(_key);
if (key == NULL)
return false;
return key->thread == thread && key->type == wait_object->type
&& key->object == wait_object->object;
}
};
class SchedulingAnalysisManager {
public:
SchedulingAnalysisManager(void* buffer, size_t size)
:
fBuffer(buffer),
fSize(size),
fHashTable(),
fHashTableSize(0)
{
fAnalysis.thread_count = 0;
fAnalysis.threads = 0;
fAnalysis.wait_object_count = 0;
fAnalysis.thread_wait_object_count = 0;
size_t maxObjectSize = max_c(max_c(sizeof(Thread), sizeof(WaitObject)),
sizeof(ThreadWaitObject));
fHashTableSize = size / (maxObjectSize + sizeof(HashObject*));
fHashTable = (HashObject**)((uint8*)fBuffer + fSize) - fHashTableSize;
fNextAllocation = (uint8*)fBuffer;
fRemainingBytes = (addr_t)fHashTable - (addr_t)fBuffer;
image_info info;
if (elf_get_image_info_for_address((addr_t)&scheduler_init, &info)
== B_OK) {
fKernelStart = (addr_t)info.text;
fKernelEnd = (addr_t)info.data + info.data_size;
} else {
fKernelStart = 0;
fKernelEnd = 0;
}
}
const scheduling_analysis* Analysis() const
{
return &fAnalysis;
}
void* Allocate(size_t size)
{
size = (size + 7) & ~(size_t)7;
if (size > fRemainingBytes)
return NULL;
void* address = fNextAllocation;
fNextAllocation += size;
fRemainingBytes -= size;
return address;
}
void Insert(HashObject* object)
{
uint32 index = object->HashKey() % fHashTableSize;
object->next = fHashTable[index];
fHashTable[index] = object;
}
void Remove(HashObject* object)
{
uint32 index = object->HashKey() % fHashTableSize;
HashObject** slot = &fHashTable[index];
while (*slot != object)
slot = &(*slot)->next;
*slot = object->next;
}
HashObject* Lookup(const HashObjectKey& key) const
{
uint32 index = key.HashKey() % fHashTableSize;
HashObject* object = fHashTable[index];
while (object != NULL && !object->Equals(&key))
object = object->next;
return object;
}
Thread* ThreadFor(thread_id id) const
{
return dynamic_cast<Thread*>(Lookup(ThreadKey(id)));
}
WaitObject* WaitObjectFor(uint32 type, void* object) const
{
return dynamic_cast<WaitObject*>(Lookup(WaitObjectKey(type, object)));
}
ThreadWaitObject* ThreadWaitObjectFor(thread_id thread, uint32 type,
void* object) const
{
return dynamic_cast<ThreadWaitObject*>(
Lookup(ThreadWaitObjectKey(thread, type, object)));
}
status_t AddThread(thread_id id, const char* name)
{
Thread* thread = ThreadFor(id);
if (thread == NULL) {
void* memory = Allocate(sizeof(Thread));
if (memory == NULL)
return B_NO_MEMORY;
thread = new(memory) Thread(id);
Insert(thread);
fAnalysis.thread_count++;
}
if (name != NULL && thread->name[0] == '\0')
strlcpy(thread->name, name, sizeof(thread->name));
return B_OK;
}
status_t AddWaitObject(uint32 type, void* object,
WaitObject** _waitObject = NULL)
{
if (WaitObjectFor(type, object) != NULL)
return B_OK;
void* memory = Allocate(sizeof(WaitObject));
if (memory == NULL)
return B_NO_MEMORY;
WaitObject* waitObject = new(memory) WaitObject(type, object);
Insert(waitObject);
fAnalysis.wait_object_count++;
// Set a dummy name for snooze() and waiting for signals, so we don't
// try to update them later on.
if (type == THREAD_BLOCK_TYPE_SNOOZE
|| type == THREAD_BLOCK_TYPE_SIGNAL) {
strcpy(waitObject->name, "?");
}
if (_waitObject != NULL)
*_waitObject = waitObject;
return B_OK;
}
status_t UpdateWaitObject(uint32 type, void* object, const char* name,
void* referencedObject)
{
WaitObject* waitObject = WaitObjectFor(type, object);
if (waitObject == NULL)
return B_OK;
if (waitObject->name[0] != '\0') {
// This is a new object at the same address. Replace the old one.
Remove(waitObject);
status_t error = AddWaitObject(type, object, &waitObject);
if (error != B_OK)
return error;
}
if (name == NULL)
name = "?";
strlcpy(waitObject->name, name, sizeof(waitObject->name));
waitObject->referenced_object = referencedObject;
return B_OK;
}
bool UpdateWaitObjectDontAdd(uint32 type, void* object, const char* name,
void* referencedObject)
{
WaitObject* waitObject = WaitObjectFor(type, object);
if (waitObject == NULL || waitObject->name[0] != '\0')
return false;
if (name == NULL)
name = "?";
strlcpy(waitObject->name, name, sizeof(waitObject->name));
waitObject->referenced_object = referencedObject;
return B_OK;
}
status_t AddThreadWaitObject(Thread* thread, uint32 type, void* object)
{
WaitObject* waitObject = WaitObjectFor(type, object);
if (waitObject == NULL) {
// The algorithm should prevent this case.
return B_ERROR;
}
ThreadWaitObject* threadWaitObject = ThreadWaitObjectFor(thread->id,
type, object);
if (threadWaitObject == NULL
|| threadWaitObject->wait_object != waitObject) {
if (threadWaitObject != NULL)
Remove(threadWaitObject);
void* memory = Allocate(sizeof(ThreadWaitObject));
if (memory == NULL)
return B_NO_MEMORY;
threadWaitObject = new(memory) ThreadWaitObject(thread->id,
waitObject);
Insert(threadWaitObject);
fAnalysis.thread_wait_object_count++;
threadWaitObject->next_in_list = thread->wait_objects;
thread->wait_objects = threadWaitObject;
}
thread->waitObject = threadWaitObject;
return B_OK;
}
int32 MissingWaitObjects() const
{
// Iterate through the hash table and count the wait objects that don't
// have a name yet.
int32 count = 0;
for (uint32 i = 0; i < fHashTableSize; i++) {
HashObject* object = fHashTable[i];
while (object != NULL) {
WaitObject* waitObject = dynamic_cast<WaitObject*>(object);
if (waitObject != NULL && waitObject->name[0] == '\0')
count++;
object = object->next;
}
}
return count;
}
status_t FinishAnalysis()
{
// allocate the thread array
scheduling_analysis_thread** threads
= (scheduling_analysis_thread**)Allocate(
sizeof(Thread*) * fAnalysis.thread_count);
if (threads == NULL)
return B_NO_MEMORY;
// Iterate through the hash table and collect all threads. Also polish
// all wait objects that haven't been update yet.
int32 index = 0;
for (uint32 i = 0; i < fHashTableSize; i++) {
HashObject* object = fHashTable[i];
while (object != NULL) {
Thread* thread = dynamic_cast<Thread*>(object);
if (thread != NULL) {
threads[index++] = thread;
} else if (WaitObject* waitObject
= dynamic_cast<WaitObject*>(object)) {
_PolishWaitObject(waitObject);
}
object = object->next;
}
}
fAnalysis.threads = threads;
dprintf("scheduling analysis: free bytes: %lu/%lu\n", fRemainingBytes, fSize);
return B_OK;
}
private:
void _PolishWaitObject(WaitObject* waitObject)
{
if (waitObject->name[0] != '\0')
return;
switch (waitObject->type) {
case THREAD_BLOCK_TYPE_SEMAPHORE:
{
sem_info info;
if (get_sem_info((sem_id)(addr_t)waitObject->object, &info)
== B_OK) {
strlcpy(waitObject->name, info.name,
sizeof(waitObject->name));
}
break;
}
case THREAD_BLOCK_TYPE_CONDITION_VARIABLE:
{
// If the condition variable object is in the kernel image,
// assume, it is still initialized.
ConditionVariable* variable
= (ConditionVariable*)waitObject->object;
if (!_IsInKernelImage(variable))
break;
waitObject->referenced_object = (void*)variable->Object();
strlcpy(waitObject->name, variable->ObjectType(),
sizeof(waitObject->name));
break;
}
case THREAD_BLOCK_TYPE_MUTEX:
{
// If the mutex object is in the kernel image, assume, it is
// still initialized.
mutex* lock = (mutex*)waitObject->object;
if (!_IsInKernelImage(lock))
break;
strlcpy(waitObject->name, lock->name, sizeof(waitObject->name));
break;
}
case THREAD_BLOCK_TYPE_RW_LOCK:
{
// If the mutex object is in the kernel image, assume, it is
// still initialized.
rw_lock* lock = (rw_lock*)waitObject->object;
if (!_IsInKernelImage(lock))
break;
strlcpy(waitObject->name, lock->name, sizeof(waitObject->name));
break;
}
case THREAD_BLOCK_TYPE_OTHER:
{
const char* name = (const char*)waitObject->object;
if (name == NULL || _IsInKernelImage(name))
return;
strlcpy(waitObject->name, name, sizeof(waitObject->name));
}
case THREAD_BLOCK_TYPE_SNOOZE:
case THREAD_BLOCK_TYPE_SIGNAL:
default:
break;
}
if (waitObject->name[0] != '\0')
return;
strcpy(waitObject->name, "?");
}
bool _IsInKernelImage(const void* _address)
{
addr_t address = (addr_t)_address;
return address >= fKernelStart && address < fKernelEnd;
}
private:
scheduling_analysis fAnalysis;
void* fBuffer;
size_t fSize;
HashObject** fHashTable;
uint32 fHashTableSize;
uint8* fNextAllocation;
size_t fRemainingBytes;
addr_t fKernelStart;
addr_t fKernelEnd;
};
static status_t
analyze_scheduling(bigtime_t from, bigtime_t until,
SchedulingAnalysisManager& manager)
{
// analyze how much threads and locking primitives we're talking about
TraceEntryIterator iterator;
iterator.MoveTo(INT_MAX);
while (TraceEntry* _entry = iterator.Previous()) {
SchedulerTraceEntry* baseEntry
= dynamic_cast<SchedulerTraceEntry*>(_entry);
if (baseEntry == NULL || baseEntry->Time() >= until)
continue;
if (baseEntry->Time() < from)
break;
status_t error = manager.AddThread(baseEntry->ThreadID(),
baseEntry->Name());
if (error != B_OK)
return error;
if (ScheduleThread* entry = dynamic_cast<ScheduleThread*>(_entry)) {
error = manager.AddThread(entry->PreviousThreadID(), NULL);
if (error != B_OK)
return error;
if (entry->PreviousState() == B_THREAD_WAITING) {
void* waitObject = (void*)entry->PreviousWaitObject();
switch (entry->PreviousWaitObjectType()) {
case THREAD_BLOCK_TYPE_SNOOZE:
case THREAD_BLOCK_TYPE_SIGNAL:
waitObject = NULL;
break;
case THREAD_BLOCK_TYPE_SEMAPHORE:
case THREAD_BLOCK_TYPE_CONDITION_VARIABLE:
case THREAD_BLOCK_TYPE_MUTEX:
case THREAD_BLOCK_TYPE_RW_LOCK:
case THREAD_BLOCK_TYPE_OTHER:
default:
break;
}
error = manager.AddWaitObject(entry->PreviousWaitObjectType(),
waitObject);
if (error != B_OK)
return error;
}
}
}
#if SCHEDULING_ANALYSIS_TRACING
int32 startEntryIndex = iterator.Index();
#endif
while (TraceEntry* _entry = iterator.Next()) {
#if SCHEDULING_ANALYSIS_TRACING
// might be info on a wait object
if (WaitObjectTraceEntry* waitObjectEntry
= dynamic_cast<WaitObjectTraceEntry*>(_entry)) {
status_t error = manager.UpdateWaitObject(waitObjectEntry->Type(),
waitObjectEntry->Object(), waitObjectEntry->Name(),
waitObjectEntry->ReferencedObject());
if (error != B_OK)
return error;
continue;
}
#endif
SchedulerTraceEntry* baseEntry
= dynamic_cast<SchedulerTraceEntry*>(_entry);
if (baseEntry == NULL)
continue;
if (baseEntry->Time() >= until)
break;
if (ScheduleThread* entry = dynamic_cast<ScheduleThread*>(_entry)) {
// scheduled thread
Thread* thread = manager.ThreadFor(entry->ThreadID());
bigtime_t diffTime = entry->Time() - thread->lastTime;
if (thread->state == READY) {
// thread scheduled after having been woken up
thread->latencies++;
thread->total_latency += diffTime;
if (thread->min_latency < 0 || diffTime < thread->min_latency)
thread->min_latency = diffTime;
if (diffTime > thread->max_latency)
thread->max_latency = diffTime;
} else if (thread->state == PREEMPTED) {
// thread scheduled after having been preempted before
thread->reruns++;
thread->total_rerun_time += diffTime;
if (thread->min_rerun_time < 0
|| diffTime < thread->min_rerun_time) {
thread->min_rerun_time = diffTime;
}
if (diffTime > thread->max_rerun_time)
thread->max_rerun_time = diffTime;
}
if (thread->state == STILL_RUNNING) {
// Thread was running and continues to run.
thread->state = RUNNING;
}
if (thread->state != RUNNING) {
thread->lastTime = entry->Time();
thread->state = RUNNING;
}
// unscheduled thread
if (entry->ThreadID() == entry->PreviousThreadID())
continue;
thread = manager.ThreadFor(entry->PreviousThreadID());
diffTime = entry->Time() - thread->lastTime;
if (thread->state == STILL_RUNNING) {
// thread preempted
thread->runs++;
thread->preemptions++;
thread->total_run_time += diffTime;
if (thread->min_run_time < 0 || diffTime < thread->min_run_time)
thread->min_run_time = diffTime;
if (diffTime > thread->max_run_time)
thread->max_run_time = diffTime;
thread->lastTime = entry->Time();
thread->state = PREEMPTED;
} else if (thread->state == RUNNING) {
// thread starts waiting (it hadn't been added to the run
// queue before being unscheduled)
thread->runs++;
thread->total_run_time += diffTime;
if (thread->min_run_time < 0 || diffTime < thread->min_run_time)
thread->min_run_time = diffTime;
if (diffTime > thread->max_run_time)
thread->max_run_time = diffTime;
if (entry->PreviousState() == B_THREAD_WAITING) {
void* waitObject = (void*)entry->PreviousWaitObject();
switch (entry->PreviousWaitObjectType()) {
case THREAD_BLOCK_TYPE_SNOOZE:
case THREAD_BLOCK_TYPE_SIGNAL:
waitObject = NULL;
break;
case THREAD_BLOCK_TYPE_SEMAPHORE:
case THREAD_BLOCK_TYPE_CONDITION_VARIABLE:
case THREAD_BLOCK_TYPE_MUTEX:
case THREAD_BLOCK_TYPE_RW_LOCK:
case THREAD_BLOCK_TYPE_OTHER:
default:
break;
}
status_t error = manager.AddThreadWaitObject(thread,
entry->PreviousWaitObjectType(), waitObject);
if (error != B_OK)
return error;
}
thread->lastTime = entry->Time();
thread->state = WAITING;
} else if (thread->state == UNKNOWN) {
uint32 threadState = entry->PreviousState();
if (threadState == B_THREAD_WAITING
|| threadState == B_THREAD_SUSPENDED) {
thread->lastTime = entry->Time();
thread->state = WAITING;
} else if (threadState == B_THREAD_READY) {
thread->lastTime = entry->Time();
thread->state = PREEMPTED;
}
}
} else if (EnqueueThread* entry
= dynamic_cast<EnqueueThread*>(_entry)) {
// thread enqueued in run queue
Thread* thread = manager.ThreadFor(entry->ThreadID());
if (thread->state == RUNNING || thread->state == STILL_RUNNING) {
// Thread was running and is reentered into the run queue. This
// is done by the scheduler, if the thread remains ready.
thread->state = STILL_RUNNING;
} else {
// Thread was waiting and is ready now.
bigtime_t diffTime = entry->Time() - thread->lastTime;
if (thread->waitObject != NULL) {
thread->waitObject->wait_time += diffTime;
thread->waitObject->waits++;
thread->waitObject = NULL;
} else if (thread->state != UNKNOWN)
thread->unspecified_wait_time += diffTime;
thread->lastTime = entry->Time();
thread->state = READY;
}
} else if (RemoveThread* entry = dynamic_cast<RemoveThread*>(_entry)) {
// thread removed from run queue
Thread* thread = manager.ThreadFor(entry->ThreadID());
// This really only happens when the thread priority is changed
// while the thread is ready.
bigtime_t diffTime = entry->Time() - thread->lastTime;
if (thread->state == RUNNING) {
// This should never happen.
thread->runs++;
thread->total_run_time += diffTime;
if (thread->min_run_time < 0 || diffTime < thread->min_run_time)
thread->min_run_time = diffTime;
if (diffTime > thread->max_run_time)
thread->max_run_time = diffTime;
} else if (thread->state == READY || thread->state == PREEMPTED) {
// Not really correct, but the case is rare and we keep it
// simple.
thread->unspecified_wait_time += diffTime;
}
thread->lastTime = entry->Time();
thread->state = WAITING;
}
}
#if SCHEDULING_ANALYSIS_TRACING
int32 missingWaitObjects = manager.MissingWaitObjects();
if (missingWaitObjects > 0) {
iterator.MoveTo(startEntryIndex + 1);
while (TraceEntry* _entry = iterator.Previous()) {
if (WaitObjectTraceEntry* waitObjectEntry
= dynamic_cast<WaitObjectTraceEntry*>(_entry)) {
if (manager.UpdateWaitObjectDontAdd(
waitObjectEntry->Type(), waitObjectEntry->Object(),
waitObjectEntry->Name(),
waitObjectEntry->ReferencedObject())) {
if (--missingWaitObjects == 0)
break;
}
}
}
}
#endif
return B_OK;
}
} // namespace SchedulingAnalysis
#endif // SCHEDULER_TRACING
status_t
_user_analyze_scheduling(bigtime_t from, bigtime_t until, void* buffer,
size_t size, scheduling_analysis* analysis)
{
#if SCHEDULER_TRACING
using namespace SchedulingAnalysis;
if ((addr_t)buffer & 0x7) {
addr_t diff = (addr_t)buffer & 0x7;
buffer = (void*)((addr_t)buffer + 8 - diff);
size -= 8 - diff;
}
size &= ~(size_t)0x7;
if (buffer == NULL || !IS_USER_ADDRESS(buffer) || size == 0)
return B_BAD_VALUE;
status_t error = lock_memory(buffer, size, B_READ_DEVICE);
if (error != B_OK)
return error;
SchedulingAnalysisManager manager(buffer, size);
InterruptsLocker locker;
lock_tracing_buffer();
error = analyze_scheduling(from, until, manager);
unlock_tracing_buffer();
locker.Unlock();
if (error == B_OK)
error = manager.FinishAnalysis();
unlock_memory(buffer, size, B_READ_DEVICE);
if (error == B_OK) {
error = user_memcpy(analysis, manager.Analysis(),
sizeof(scheduling_analysis));
}
return error;
#else
return B_BAD_VALUE;
#endif
}