diff --git a/src/system/kernel/Jamfile b/src/system/kernel/Jamfile
index c84c61e277..b9f373d323 100644
--- a/src/system/kernel/Jamfile
+++ b/src/system/kernel/Jamfile
@@ -10,6 +10,8 @@ SubDir HAIKU_TOP src system kernel ;
 	SubDirC++Flags $(defines) ;
 }
 
+SEARCH_SOURCE += [ FDirName $(SUBDIR) scheduler ] ;
+
 UsePrivateHeaders libroot ;
 UsePrivateHeaders shared ;
 UsePrivateHeaders runtime_loader ;
@@ -35,7 +37,6 @@ KernelMergeObject kernel_core.o :
 	Notifications.cpp
 	port.cpp
 	real_time_clock.c
-	scheduler.cpp
 	sem.cpp
 	shutdown.c
 	signal.cpp
@@ -48,6 +49,11 @@ KernelMergeObject kernel_core.o :
 	usergroup.cpp
 	wait_for_objects.cpp
 
+	# scheduler
+	scheduler.cpp
+	scheduler_tracing.cpp
+	scheduling_analysis.cpp
+
 	: $(TARGET_KERNEL_PIC_CCFLAGS)
 ;
 
diff --git a/src/system/kernel/scheduler/scheduler.cpp b/src/system/kernel/scheduler/scheduler.cpp
new file mode 100644
index 0000000000..67ef33c8c5
--- /dev/null
+++ b/src/system/kernel/scheduler/scheduler.cpp
@@ -0,0 +1,393 @@
+/*
+ * Copyright 2008, Ingo Weinhold, ingo_weinhold@gmx.de.
+ * Copyright 2002-2007, Axel Dörfler, axeld@pinc-software.de.
+ * Copyright 2002, Angelo Mottola, a.mottola@libero.it.
+ * Distributed under the terms of the MIT License.
+ *
+ * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
+ * Distributed under the terms of the NewOS License.
+ */
+
+/*! The thread scheduler */
+
+
+#include <OS.h>
+
+#include <cpu.h>
+#include <int.h>
+#include <kernel.h>
+#include <kscheduler.h>
+#include <scheduler_defs.h>
+#include <smp.h>
+#include <thread.h>
+#include <timer.h>
+#include <user_debugger.h>
+
+#include "scheduler_tracing.h"
+
+
+//#define TRACE_SCHEDULER
+#ifdef TRACE_SCHEDULER
+#	define TRACE(x) dprintf x
+#else
+#	define TRACE(x) ;
+#endif
+
+
+// The run queue. Holds the threads ready to run ordered by priority.
+static struct thread *sRunQueue = NULL;
+static cpu_mask_t sIdleCPUs = 0;
+
+
+static int
+_rand(void)
+{
+	static int next = 0;
+
+	if (next == 0)
+		next = system_time();
+
+	next = next * 1103515245 + 12345;
+	return (next >> 16) & 0x7FFF;
+}
+
+
+static int
+dump_run_queue(int argc, char **argv)
+{
+	struct thread *thread;
+
+	thread = sRunQueue;
+	if (!thread)
+		kprintf("Run queue is empty!\n");
+	else {
+		kprintf("thread    id      priority name\n");
+		while (thread) {
+			kprintf("%p  %-7ld %-8ld %s\n", thread, thread->id,
+				thread->priority, thread->name);
+			thread = thread->queue_next;
+		}
+	}
+
+	return 0;
+}
+
+
+/*!	Enqueues the thread into the run queue.
+	Note: thread lock must be held when entering this function
+*/
+void
+scheduler_enqueue_in_run_queue(struct thread *thread)
+{
+	if (thread->state == B_THREAD_RUNNING) {
+		// The thread is currently running (on another CPU) and we cannot
+		// insert it into the run queue. Set the next state to ready so the
+		// thread is inserted into the run queue on the next reschedule.
+		thread->next_state = B_THREAD_READY;
+		return;
+	}
+
+	thread->state = thread->next_state = B_THREAD_READY;
+
+	struct thread *curr, *prev;
+	for (curr = sRunQueue, prev = NULL; curr
+			&& curr->priority >= thread->next_priority;
+			curr = curr->queue_next) {
+		if (prev)
+			prev = prev->queue_next;
+		else
+			prev = sRunQueue;
+	}
+
+	T(EnqueueThread(thread, prev, curr));
+
+	thread->queue_next = curr;
+	if (prev)
+		prev->queue_next = thread;
+	else
+		sRunQueue = thread;
+
+	thread->next_priority = thread->priority;
+
+	if (thread->priority != B_IDLE_PRIORITY) {
+		int32 currentCPU = smp_get_current_cpu();
+		if (sIdleCPUs != 0) {
+			if (thread->pinned_to_cpu > 0) {
+				// thread is pinned to a CPU -- notify it, if it is idle
+				int32 targetCPU = thread->previous_cpu->cpu_num;
+				if ((sIdleCPUs & (1 << targetCPU)) != 0) {
+					sIdleCPUs &= ~(1 << targetCPU);
+					smp_send_ici(targetCPU, SMP_MSG_RESCHEDULE_IF_IDLE, 0, 0,
+						0, NULL, SMP_MSG_FLAG_ASYNC);
+				}
+			} else {
+				// Thread is not pinned to any CPU -- take it ourselves, if we
+				// are idle, otherwise notify the next idle CPU. In either case
+				// we clear the idle bit of the chosen CPU, so that the
+				// scheduler_enqueue_in_run_queue() won't try to bother the
+				// same CPU again, if invoked before it handled the interrupt.
+				cpu_mask_t idleCPUs = CLEAR_BIT(sIdleCPUs, currentCPU);
+				if ((sIdleCPUs & (1 << currentCPU)) != 0) {
+					sIdleCPUs = idleCPUs;
+				} else {
+					int32 targetCPU = 0;
+					for (; targetCPU < B_MAX_CPU_COUNT; targetCPU++) {
+						cpu_mask_t mask = 1 << targetCPU;
+						if ((idleCPUs & mask) != 0) {
+							sIdleCPUs &= ~mask;
+							break;
+						}
+					}
+
+					smp_send_ici(targetCPU, SMP_MSG_RESCHEDULE_IF_IDLE, 0, 0,
+						0, NULL, SMP_MSG_FLAG_ASYNC);
+				}
+			}
+		}
+	}
+}
+
+
+/*!	Removes a thread from the run queue.
+	Note: thread lock must be held when entering this function
+*/
+void
+scheduler_remove_from_run_queue(struct thread *thread)
+{
+	struct thread *item, *prev;
+
+	T(RemoveThread(thread));
+
+	// find thread in run queue
+	for (item = sRunQueue, prev = NULL; item && item != thread;
+			item = item->queue_next) {
+		if (prev)
+			prev = prev->queue_next;
+		else
+			prev = sRunQueue;
+	}
+
+	ASSERT(item == thread);
+
+	if (prev)
+		prev->queue_next = item->queue_next;
+	else
+		sRunQueue = item->queue_next;
+}
+
+
+static void
+context_switch(struct thread *fromThread, struct thread *toThread)
+{
+	if ((fromThread->flags & THREAD_FLAGS_DEBUGGER_INSTALLED) != 0)
+		user_debug_thread_unscheduled(fromThread);
+
+	toThread->previous_cpu = toThread->cpu = fromThread->cpu;
+	fromThread->cpu = NULL;
+
+	arch_thread_set_current_thread(toThread);
+	arch_thread_context_switch(fromThread, toThread);
+
+	// Looks weird, but is correct. fromThread had been unscheduled earlier,
+	// but is back now. The notification for a thread scheduled the first time
+	// happens in thread.cpp:thread_kthread_entry().
+	if ((fromThread->flags & THREAD_FLAGS_DEBUGGER_INSTALLED) != 0)
+		user_debug_thread_scheduled(fromThread);
+}
+
+
+static int32
+reschedule_event(timer *unused)
+{
+	if (thread_get_current_thread()->keep_scheduled > 0)
+		return B_HANDLED_INTERRUPT;
+
+	// this function is called as a result of the timer event set by the
+	// scheduler returning this causes a reschedule on the timer event
+	thread_get_current_thread()->cpu->preempted = 1;
+	return B_INVOKE_SCHEDULER;
+}
+
+
+/*!	Runs the scheduler.
+	Note: expects thread spinlock to be held
+*/
+void
+scheduler_reschedule(void)
+{
+	struct thread *oldThread = thread_get_current_thread();
+	struct thread *nextThread, *prevThread;
+
+	TRACE(("reschedule(): cpu %d, cur_thread = %ld\n", smp_get_current_cpu(), thread_get_current_thread()->id));
+
+	oldThread->cpu->invoke_scheduler = false;
+
+	oldThread->state = oldThread->next_state;
+	switch (oldThread->next_state) {
+		case B_THREAD_RUNNING:
+		case B_THREAD_READY:
+			TRACE(("enqueueing thread %ld into run q. pri = %ld\n", oldThread->id, oldThread->priority));
+			scheduler_enqueue_in_run_queue(oldThread);
+			break;
+		case B_THREAD_SUSPENDED:
+			TRACE(("reschedule(): suspending thread %ld\n", oldThread->id));
+			break;
+		case THREAD_STATE_FREE_ON_RESCHED:
+			break;
+		default:
+			TRACE(("not enqueueing thread %ld into run q. next_state = %ld\n", oldThread->id, oldThread->next_state));
+			break;
+	}
+
+	nextThread = sRunQueue;
+	prevThread = NULL;
+
+	if (oldThread->cpu->disabled) {
+		// CPU is disabled - just select an idle thread
+		while (nextThread && nextThread->priority > B_IDLE_PRIORITY) {
+			prevThread = nextThread;
+			nextThread = nextThread->queue_next;
+		}
+	} else {
+		while (nextThread) {
+			// select next thread from the run queue
+			while (nextThread && nextThread->priority > B_IDLE_PRIORITY) {
+#if 0
+				if (oldThread == nextThread && nextThread->was_yielded) {
+					// ignore threads that called thread_yield() once
+					nextThread->was_yielded = false;
+					prevThread = nextThread;
+					nextThread = nextThread->queue_next;
+				}
+#endif
+
+				// skip thread, if it doesn't want to run on this CPU
+				if (nextThread->pinned_to_cpu > 0
+					&& nextThread->previous_cpu != oldThread->cpu) {
+					prevThread = nextThread;
+					nextThread = nextThread->queue_next;
+					continue;
+				}
+
+				// always extract real time threads
+				if (nextThread->priority >= B_FIRST_REAL_TIME_PRIORITY)
+					break;
+
+				// never skip last non-idle normal thread
+				if (nextThread->queue_next && nextThread->queue_next->priority == B_IDLE_PRIORITY)
+					break;
+
+				// skip normal threads sometimes (roughly 20%)
+				if (_rand() > 0x1a00)
+					break;
+
+				// skip until next lower priority
+				int32 priority = nextThread->priority;
+				do {
+					prevThread = nextThread;
+					nextThread = nextThread->queue_next;
+				} while (nextThread->queue_next != NULL
+					&& priority == nextThread->queue_next->priority
+					&& nextThread->queue_next->priority > B_IDLE_PRIORITY);
+			}
+
+			if (nextThread->cpu
+				&& nextThread->cpu->cpu_num != oldThread->cpu->cpu_num) {
+				panic("thread in run queue that's still running on another CPU!\n");
+				// ToDo: remove this check completely when we're sure that this
+				// cannot happen anymore.
+				prevThread = nextThread;
+				nextThread = nextThread->queue_next;
+				continue;
+			}
+
+			break;
+		}
+	}
+
+	if (!nextThread)
+		panic("reschedule(): run queue is empty!\n");
+
+	// extract selected thread from the run queue
+	if (prevThread)
+		prevThread->queue_next = nextThread->queue_next;
+	else
+		sRunQueue = nextThread->queue_next;
+
+	T(ScheduleThread(nextThread, oldThread));
+
+	nextThread->state = B_THREAD_RUNNING;
+	nextThread->next_state = B_THREAD_READY;
+	oldThread->was_yielded = false;
+
+	// track kernel time (user time is tracked in thread_at_kernel_entry())
+	bigtime_t now = system_time();
+	oldThread->kernel_time += now - oldThread->last_time;
+	nextThread->last_time = now;
+
+	// track CPU activity
+	if (!thread_is_idle_thread(oldThread)) {
+		oldThread->cpu->active_time +=
+			(oldThread->kernel_time - oldThread->cpu->last_kernel_time)
+			+ (oldThread->user_time - oldThread->cpu->last_user_time);
+	}
+
+	if (!thread_is_idle_thread(nextThread)) {
+		oldThread->cpu->last_kernel_time = nextThread->kernel_time;
+		oldThread->cpu->last_user_time = nextThread->user_time;
+	}
+
+	if (nextThread != oldThread || oldThread->cpu->preempted) {
+		bigtime_t quantum = 3000;	// ToDo: calculate quantum!
+		timer *quantumTimer = &oldThread->cpu->quantum_timer;
+
+		if (!oldThread->cpu->preempted)
+			cancel_timer(quantumTimer);
+
+		oldThread->cpu->preempted = 0;
+		add_timer(quantumTimer, &reschedule_event, quantum,
+			B_ONE_SHOT_RELATIVE_TIMER | B_TIMER_ACQUIRE_THREAD_LOCK);
+
+		// update the idle bit for this CPU in the CPU mask
+		int32 cpuNum = smp_get_current_cpu();
+		if (nextThread->priority == B_IDLE_PRIORITY)
+			sIdleCPUs = SET_BIT(sIdleCPUs, cpuNum);
+		else
+			sIdleCPUs = CLEAR_BIT(sIdleCPUs, cpuNum);
+
+		if (nextThread != oldThread)
+			context_switch(oldThread, nextThread);
+	}
+}
+
+
+void
+scheduler_init(void)
+{
+	add_debugger_command_etc("run_queue", &dump_run_queue,
+		"List threads in run queue", "\nLists threads in run queue", 0);
+
+#if SCHEDULER_TRACING
+	add_debugger_command_etc("scheduler", &cmd_scheduler,
+		"Analyze scheduler tracing information",
+		"<thread>\n"
+		"Analyzes scheduler tracing information for a given thread.\n"
+		"  <thread>  - ID of the thread.\n", 0);
+#endif
+}
+
+
+/*!	This starts the scheduler. Must be run under the context of
+	the initial idle thread.
+*/
+void
+scheduler_start(void)
+{
+	cpu_status state = disable_interrupts();
+	GRAB_THREAD_LOCK();
+
+	scheduler_reschedule();
+
+	RELEASE_THREAD_LOCK();
+	restore_interrupts(state);
+}
diff --git a/src/system/kernel/scheduler/scheduler_tracing.cpp b/src/system/kernel/scheduler/scheduler_tracing.cpp
new file mode 100644
index 0000000000..c7c9e801ff
--- /dev/null
+++ b/src/system/kernel/scheduler/scheduler_tracing.cpp
@@ -0,0 +1,304 @@
+/*
+ * Copyright 2008, Ingo Weinhold, ingo_weinhold@gmx.de.
+ * Copyright 2002-2007, Axel Dörfler, axeld@pinc-software.de.
+ * Distributed under the terms of the MIT License.
+ */
+
+#include "scheduler_tracing.h"
+
+#include <debug.h>
+
+
+#if SCHEDULER_TRACING
+
+namespace SchedulerTracing {
+
+// #pragma mark - EnqueueThread
+
+
+void
+EnqueueThread::AddDump(TraceOutput& out)
+{
+	out.Print("scheduler enqueue %ld \"%s\", priority %d (previous %ld, "
+		"next %ld)", fID, fName, fPriority, fPreviousID, fNextID);
+}
+
+
+const char*
+EnqueueThread::Name() const
+{
+	return fName;
+}
+
+
+// #pragma mark - RemoveThread
+
+
+void
+RemoveThread::AddDump(TraceOutput& out)
+{
+	out.Print("scheduler remove %ld, priority %d", fID, fPriority);
+}
+
+const char*
+RemoveThread::Name() const
+{
+	return NULL;
+}
+
+
+// #pragma mark - ScheduleThread
+
+
+void
+ScheduleThread::AddDump(TraceOutput& out)
+{
+	out.Print("schedule %ld \"%s\", priority %d, CPU %ld, "
+		"previous thread: %ld (", fID, fName, fPriority, fCPU, fPreviousID);
+	if (fPreviousState == B_THREAD_WAITING) {
+		switch (fPreviousWaitObjectType) {
+			case THREAD_BLOCK_TYPE_SEMAPHORE:
+				out.Print("sem %ld", (sem_id)(addr_t)fPreviousWaitObject);
+				break;
+			case THREAD_BLOCK_TYPE_CONDITION_VARIABLE:
+				out.Print("cvar %p", fPreviousWaitObject);
+				break;
+			case THREAD_BLOCK_TYPE_SNOOZE:
+				out.Print("snooze()");
+				break;
+			case THREAD_BLOCK_TYPE_SIGNAL:
+				out.Print("signal");
+				break;
+			case THREAD_BLOCK_TYPE_MUTEX:
+				out.Print("mutex %p", fPreviousWaitObject);
+				break;
+			case THREAD_BLOCK_TYPE_RW_LOCK:
+				out.Print("rwlock %p", fPreviousWaitObject);
+				break;
+			case THREAD_BLOCK_TYPE_OTHER:
+				out.Print("other (%p)", fPreviousWaitObject);
+					// We could print the string, but it might come from a
+					// kernel module that has already been unloaded.
+				break;
+			default:
+				out.Print("unknown (%p)", fPreviousWaitObject);
+				break;
+		}
+#if SCHEDULER_TRACING >= 2
+	} else if (fPreviousState == B_THREAD_READY) {
+		out.Print("ready at %p", fPreviousPC);
+#endif
+	} else
+		out.Print("%s", thread_state_to_text(NULL, fPreviousState));
+
+	out.Print(")");
+}
+
+
+const char*
+ScheduleThread::Name() const
+{
+	return fName;
+}
+
+}	// namespace SchedulerTracing
+
+
+// #pragma mark -
+
+
+int
+cmd_scheduler(int argc, char** argv)
+{
+	using namespace SchedulerTracing;
+
+	int64 threadID;
+	if (argc != 2
+		|| !evaluate_debug_expression(argv[1], (uint64*)&threadID, true)) {
+		print_debugger_command_usage(argv[0]);
+		return 0;
+	}
+
+	if (threadID <= 0) {
+		kprintf("Invalid thread ID: %lld\n", threadID);
+		return 0;
+	}
+
+	ScheduleState state = UNKNOWN;
+	bigtime_t lastTime = 0;
+
+	int64 runs = 0;
+	bigtime_t totalRunTime = 0;
+	bigtime_t minRunTime = -1;
+	bigtime_t maxRunTime = -1;
+
+	int64 latencies = 0;
+	bigtime_t totalLatency = 0;
+	bigtime_t minLatency = -1;
+	bigtime_t maxLatency = -1;
+	int32 maxLatencyEntry = -1;
+
+	int64 reruns = 0;
+	bigtime_t totalRerunTime = 0;
+	bigtime_t minRerunTime = -1;
+	bigtime_t maxRerunTime = -1;
+	int32 maxRerunEntry = -1;
+
+	int64 preemptions = 0;
+
+	TraceEntryIterator iterator;
+	while (TraceEntry* _entry = iterator.Next()) {
+		if (dynamic_cast<SchedulerTraceEntry*>(_entry) == NULL)
+			continue;
+
+		if (ScheduleThread* entry = dynamic_cast<ScheduleThread*>(_entry)) {
+			if (entry->ThreadID() == threadID) {
+				// thread scheduled
+				bigtime_t diffTime = entry->Time() - lastTime;
+
+				if (state == READY) {
+					// thread scheduled after having been woken up
+					latencies++;
+					totalLatency += diffTime;
+					if (minLatency < 0 || diffTime < minLatency)
+						minLatency = diffTime;
+					if (diffTime > maxLatency) {
+						maxLatency = diffTime;
+						maxLatencyEntry = iterator.Index();
+					}
+				} else if (state == PREEMPTED) {
+					// thread scheduled after having been preempted before
+					reruns++;
+					totalRerunTime += diffTime;
+					if (minRerunTime < 0 || diffTime < minRerunTime)
+						minRerunTime = diffTime;
+					if (diffTime > maxRerunTime) {
+						maxRerunTime = diffTime;
+						maxRerunEntry = iterator.Index();
+					}
+				}
+
+				if (state == STILL_RUNNING) {
+					// Thread was running and continues to run.
+					state = RUNNING;
+				}
+
+				if (state != RUNNING) {
+					lastTime = entry->Time();
+					state = RUNNING;
+				}
+			} else if (entry->PreviousThreadID() == threadID) {
+				// thread unscheduled
+				bigtime_t diffTime = entry->Time() - lastTime;
+
+				if (state == STILL_RUNNING) {
+					// thread preempted
+					state = PREEMPTED;
+
+					runs++;
+					preemptions++;
+					totalRunTime += diffTime;
+					if (minRunTime < 0 || diffTime < minRunTime)
+						minRunTime = diffTime;
+					if (diffTime > maxRunTime)
+						maxRunTime = diffTime;
+				} else if (state == RUNNING) {
+					// thread starts waiting (it hadn't been added to the run
+					// queue before being unscheduled)
+					bigtime_t diffTime = entry->Time() - lastTime;
+					runs++;
+					totalRunTime += diffTime;
+					if (minRunTime < 0 || diffTime < minRunTime)
+						minRunTime = diffTime;
+					if (diffTime > maxRunTime)
+						maxRunTime = diffTime;
+
+					state = WAITING;
+				}
+			}
+		} else if (EnqueueThread* entry
+				= dynamic_cast<EnqueueThread*>(_entry)) {
+			if (entry->ThreadID() != threadID)
+				continue;
+
+			// thread enqueued in run queue
+
+			if (state == RUNNING || state == STILL_RUNNING) {
+				// Thread was running and is reentered into the run queue. This
+				// is done by the scheduler, if the thread remains ready.
+				state = STILL_RUNNING;
+			} else {
+				// Thread was waiting and is ready now.
+				lastTime = entry->Time();
+				state = READY;
+			}
+		} else if (RemoveThread* entry = dynamic_cast<RemoveThread*>(_entry)) {
+			if (entry->ThreadID() != threadID)
+				continue;
+
+			// thread removed from run queue
+
+			// This really only happens when the thread priority is changed
+			// while the thread is ready.
+
+			if (state == RUNNING) {
+				// This should never happen.
+				bigtime_t diffTime = entry->Time() - lastTime;
+				runs++;
+				totalRunTime += diffTime;
+				if (minRunTime < 0 || diffTime < minRunTime)
+					minRunTime = diffTime;
+				if (diffTime > maxRunTime)
+					maxRunTime = diffTime;
+			}
+
+			state = WAITING;
+		}
+	}
+
+	// print results
+	if (runs == 0) {
+		kprintf("thread %lld never ran.\n", threadID);
+		return 0;
+	}
+
+	kprintf("scheduling statistics for thread %lld:\n", threadID);
+	kprintf("runs:\n");
+	kprintf("  total #: %lld\n", runs);
+	kprintf("  total:   %lld us\n", totalRunTime);
+	kprintf("  average: %#.2f us\n", (double)totalRunTime / runs);
+	kprintf("  min:     %lld us\n", minRunTime);
+	kprintf("  max:     %lld us\n", maxRunTime);
+
+	if (latencies > 0) {
+		kprintf("scheduling latency after wake up:\n");
+		kprintf("  total #: %lld\n", latencies);
+		kprintf("  total:   %lld us\n", totalLatency);
+		kprintf("  average: %#.2f us\n", (double)totalLatency / latencies);
+		kprintf("  min:     %lld us\n", minLatency);
+		kprintf("  max:     %lld us\n", maxLatency);
+		kprintf("  max:     %lld us (at tracing entry %ld)\n", maxLatency,
+			maxLatencyEntry);
+	} else
+		kprintf("thread was never run after having been woken up\n");
+
+	if (reruns > 0) {
+		kprintf("scheduling latency after preemption:\n");
+		kprintf("  total #: %lld\n", reruns);
+		kprintf("  total:   %lld us\n", totalRerunTime);
+		kprintf("  average: %#.2f us\n", (double)totalRerunTime / reruns);
+		kprintf("  min:     %lld us\n", minRerunTime);
+		kprintf("  max:     %lld us (at tracing entry %ld)\n", maxRerunTime,
+			maxRerunEntry);
+	} else
+		kprintf("thread was never rerun after preemption\n");
+
+	if (preemptions > 0)
+		kprintf("thread was preempted %lld times\n", preemptions);
+	else
+		kprintf("thread was never preempted\n");
+
+	return 0;
+}
+
+#endif	// SCHEDULER_TRACING
diff --git a/src/system/kernel/scheduler/scheduler_tracing.h b/src/system/kernel/scheduler/scheduler_tracing.h
new file mode 100644
index 0000000000..2933558c34
--- /dev/null
+++ b/src/system/kernel/scheduler/scheduler_tracing.h
@@ -0,0 +1,159 @@
+/*
+ * Copyright 2008, Ingo Weinhold, ingo_weinhold@gmx.de.
+ * Copyright 2002-2007, Axel Dörfler, axeld@pinc-software.de.
+ * Distributed under the terms of the MIT License.
+ */
+#ifndef KERNEL_SCHEDULER_TRACING_H
+#define KERNEL_SCHEDULER_TRACING_H
+
+#include <arch/debug.h>
+#include <cpu.h>
+#include <thread.h>
+#include <tracing.h>
+
+
+#if SCHEDULER_TRACING
+
+namespace SchedulerTracing {
+
+class SchedulerTraceEntry : public AbstractTraceEntry {
+public:
+	SchedulerTraceEntry(struct thread* thread)
+		:
+		fID(thread->id)
+	{
+	}
+
+	thread_id ThreadID() const	{ return fID; }
+
+	virtual const char* Name() const = 0;
+
+protected:
+	thread_id			fID;
+};
+
+
+class EnqueueThread : public SchedulerTraceEntry {
+public:
+	EnqueueThread(struct thread* thread, struct thread* previous,
+			struct thread* next)
+		:
+		SchedulerTraceEntry(thread),
+		fPreviousID(-1),
+		fNextID(-1),
+		fPriority(thread->priority)
+	{
+		if (previous != NULL)
+			fPreviousID = previous->id;
+		if (next != NULL)
+			fNextID = next->id;
+		fName = alloc_tracing_buffer_strcpy(thread->name, B_OS_NAME_LENGTH,
+			false);
+		Initialized();
+	}
+
+	virtual void AddDump(TraceOutput& out);
+
+	virtual const char* Name() const;
+
+private:
+	thread_id			fPreviousID;
+	thread_id			fNextID;
+	char*				fName;
+	uint8				fPriority;
+};
+
+
+class RemoveThread : public SchedulerTraceEntry {
+public:
+	RemoveThread(struct thread* thread)
+		:
+		SchedulerTraceEntry(thread),
+		fPriority(thread->priority)
+	{
+		Initialized();
+	}
+
+	virtual void AddDump(TraceOutput& out);
+
+	virtual const char* Name() const;
+
+private:
+	uint8				fPriority;
+};
+
+
+class ScheduleThread : public SchedulerTraceEntry {
+public:
+	ScheduleThread(struct thread* thread, struct thread* previous)
+		:
+		SchedulerTraceEntry(thread),
+		fPreviousID(previous->id),
+		fCPU(previous->cpu->cpu_num),
+		fPriority(thread->priority),
+		fPreviousState(previous->state),
+		fPreviousWaitObjectType(previous->wait.type)
+	{
+		fName = alloc_tracing_buffer_strcpy(thread->name, B_OS_NAME_LENGTH,
+			false);
+
+#if SCHEDULER_TRACING >= 2
+		if (fPreviousState == B_THREAD_READY)
+			fPreviousPC = arch_debug_get_interrupt_pc();
+		else
+#endif
+			fPreviousWaitObject = previous->wait.object;
+
+		Initialized();
+	}
+
+	virtual void AddDump(TraceOutput& out);
+
+	virtual const char* Name() const;
+
+	thread_id PreviousThreadID() const		{ return fPreviousID; }
+	uint8 PreviousState() const				{ return fPreviousState; }
+	uint16 PreviousWaitObjectType() const	{ return fPreviousWaitObjectType; }
+	const void* PreviousWaitObject() const	{ return fPreviousWaitObject; }
+
+private:
+	thread_id			fPreviousID;
+	int32				fCPU;
+	char*				fName;
+	uint8				fPriority;
+	uint8				fPreviousState;
+	uint16				fPreviousWaitObjectType;
+	union {
+		const void*		fPreviousWaitObject;
+		void*			fPreviousPC;
+	};
+};
+
+}	// namespace SchedulerTracing
+
+#	define T(x) new(std::nothrow) SchedulerTracing::x;
+#else
+#	define T(x) ;
+#endif
+
+
+#if SCHEDULER_TRACING
+
+namespace SchedulerTracing {
+
+enum ScheduleState {
+	RUNNING,
+	STILL_RUNNING,
+	PREEMPTED,
+	READY,
+	WAITING,
+	UNKNOWN
+};
+
+}
+
+int cmd_scheduler(int argc, char** argv);
+
+#endif	// SCHEDULER_TRACING
+
+#endif	// KERNEL_SCHEDULER_TRACING_H
diff --git a/src/system/kernel/scheduler.cpp b/src/system/kernel/scheduler/scheduling_analysis.cpp
similarity index 51%
rename from src/system/kernel/scheduler.cpp
rename to src/system/kernel/scheduler/scheduling_analysis.cpp
index 8788256449..9f6868edf4 100644
--- a/src/system/kernel/scheduler.cpp
+++ b/src/system/kernel/scheduler/scheduling_analysis.cpp
@@ -1,798 +1,18 @@
 /*
  * Copyright 2008, Ingo Weinhold, ingo_weinhold@gmx.de.
- * Copyright 2002-2007, Axel Dörfler, axeld@pinc-software.de.
- * Copyright 2002, Angelo Mottola, a.mottola@libero.it.
  * Distributed under the terms of the MIT License.
- *
- * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
- * Distributed under the terms of the NewOS License.
  */
 
-/*! The thread scheduler */
-
-
-#include <OS.h>
-
-#include <arch/debug.h>
-#include <cpu.h>
-#include <debug.h>
-#include <elf.h>
-#include <int.h>
-#include <kernel.h>
-#include <kscheduler.h>
-#include <scheduler_defs.h>
 #include <scheduling_analysis.h>
-#include <smp.h>
-#include <thread.h>
-#include <timer.h>
+
+#include <elf.h>
+#include <kernel.h>
+#include <scheduler_defs.h>
 #include <tracing.h>
-#include <user_debugger.h>
 #include <util/AutoLock.h>
 #include <util/khash.h>
 
-
-//#define TRACE_SCHEDULER
-#ifdef TRACE_SCHEDULER
-#	define TRACE(x) dprintf x
-#else
-#	define TRACE(x) ;
-#endif
-
-
-#if SCHEDULER_TRACING
-namespace SchedulerTracing {
-
-class SchedulerTraceEntry : public AbstractTraceEntry {
-public:
-	SchedulerTraceEntry(struct thread* thread)
-		:
-		fID(thread->id)
-	{
-	}
-
-	thread_id ThreadID() const	{ return fID; }
-
-	virtual const char* Name() const = 0;
-
-protected:
-	thread_id			fID;
-};
-
-
-class EnqueueThread : public SchedulerTraceEntry {
-public:
-	EnqueueThread(struct thread* thread, struct thread* previous,
-			struct thread* next)
-		:
-		SchedulerTraceEntry(thread),
-		fPreviousID(-1),
-		fNextID(-1),
-		fPriority(thread->priority)
-	{
-		if (previous != NULL)
-			fPreviousID = previous->id;
-		if (next != NULL)
-			fNextID = next->id;
-		fName = alloc_tracing_buffer_strcpy(thread->name, B_OS_NAME_LENGTH,
-			false);
-		Initialized();
-	}
-
-	virtual void AddDump(TraceOutput& out)
-	{
-		out.Print("scheduler enqueue %ld \"%s\", priority %d (previous %ld, "
-			"next %ld)", fID, fName, fPriority, fPreviousID, fNextID);
-	}
-
-	virtual const char* Name() const
-	{
-		return fName;
-	}
-
-private:
-	thread_id			fPreviousID;
-	thread_id			fNextID;
-	char*				fName;
-	uint8				fPriority;
-};
-
-
-class RemoveThread : public SchedulerTraceEntry {
-public:
-	RemoveThread(struct thread* thread)
-		:
-		SchedulerTraceEntry(thread),
-		fPriority(thread->priority)
-	{
-		Initialized();
-	}
-
-	virtual void AddDump(TraceOutput& out)
-	{
-		out.Print("scheduler remove %ld, priority %d", fID, fPriority);
-	}
-
-	virtual const char* Name() const
-	{
-		return NULL;
-	}
-
-private:
-	uint8				fPriority;
-};
-
-
-class ScheduleThread : public SchedulerTraceEntry {
-public:
-	ScheduleThread(struct thread* thread, struct thread* previous)
-		:
-		SchedulerTraceEntry(thread),
-		fPreviousID(previous->id),
-		fCPU(previous->cpu->cpu_num),
-		fPriority(thread->priority),
-		fPreviousState(previous->state),
-		fPreviousWaitObjectType(previous->wait.type)
-	{
-		fName = alloc_tracing_buffer_strcpy(thread->name, B_OS_NAME_LENGTH,
-			false);
-
-#if SCHEDULER_TRACING >= 2
-		if (fPreviousState == B_THREAD_READY)
-			fPreviousPC = arch_debug_get_interrupt_pc();
-		else
-#endif
-			fPreviousWaitObject = previous->wait.object;
-
-		Initialized();
-	}
-
-	virtual void AddDump(TraceOutput& out)
-	{
-		out.Print("schedule %ld \"%s\", priority %d, CPU %ld, "
-			"previous thread: %ld (", fID, fName, fPriority, fCPU, fPreviousID);
-		if (fPreviousState == B_THREAD_WAITING) {
-			switch (fPreviousWaitObjectType) {
-				case THREAD_BLOCK_TYPE_SEMAPHORE:
-					out.Print("sem %ld", (sem_id)(addr_t)fPreviousWaitObject);
-					break;
-				case THREAD_BLOCK_TYPE_CONDITION_VARIABLE:
-					out.Print("cvar %p", fPreviousWaitObject);
-					break;
-				case THREAD_BLOCK_TYPE_SNOOZE:
-					out.Print("snooze()");
-					break;
-				case THREAD_BLOCK_TYPE_SIGNAL:
-					out.Print("signal");
-					break;
-				case THREAD_BLOCK_TYPE_MUTEX:
-					out.Print("mutex %p", fPreviousWaitObject);
-					break;
-				case THREAD_BLOCK_TYPE_RW_LOCK:
-					out.Print("rwlock %p", fPreviousWaitObject);
-					break;
-				case THREAD_BLOCK_TYPE_OTHER:
-					out.Print("other (%p)", fPreviousWaitObject);
-						// We could print the string, but it might come from a
-						// kernel module that has already been unloaded.
-					break;
-				default:
-					out.Print("unknown (%p)", fPreviousWaitObject);
-					break;
-			}
-#if SCHEDULER_TRACING >= 2
-		} else if (fPreviousState == B_THREAD_READY) {
-			out.Print("ready at %p", fPreviousPC);
-#endif
-		} else
-			out.Print("%s", thread_state_to_text(NULL, fPreviousState));
-
-		out.Print(")");
-	}
-
-	virtual const char* Name() const
-	{
-		return fName;
-	}
-
-	thread_id PreviousThreadID() const		{ return fPreviousID; }
-	uint8 PreviousState() const				{ return fPreviousState; }
-	uint16 PreviousWaitObjectType() const	{ return fPreviousWaitObjectType; }
-	const void* PreviousWaitObject() const	{ return fPreviousWaitObject; }
-
-private:
-	thread_id			fPreviousID;
-	int32				fCPU;
-	char*				fName;
-	uint8				fPriority;
-	uint8				fPreviousState;
-	uint16				fPreviousWaitObjectType;
-	union {
-		const void*		fPreviousWaitObject;
-		void*			fPreviousPC;
-	};
-};
-
-}	// namespace SchedulerTracing
-
-#	define T(x) new(std::nothrow) SchedulerTracing::x;
-#else
-#	define T(x) ;
-#endif
-
-
-// The run queue. Holds the threads ready to run ordered by priority.
-static struct thread *sRunQueue = NULL;
-static cpu_mask_t sIdleCPUs = 0;
-
-
-static int
-_rand(void)
-{
-	static int next = 0;
-
-	if (next == 0)
-		next = system_time();
-
-	next = next * 1103515245 + 12345;
-	return (next >> 16) & 0x7FFF;
-}
-
-
-static int
-dump_run_queue(int argc, char **argv)
-{
-	struct thread *thread;
-
-	thread = sRunQueue;
-	if (!thread)
-		kprintf("Run queue is empty!\n");
-	else {
-		kprintf("thread    id      priority name\n");
-		while (thread) {
-			kprintf("%p  %-7ld %-8ld %s\n", thread, thread->id,
-				thread->priority, thread->name);
-			thread = thread->queue_next;
-		}
-	}
-
-	return 0;
-}
-
-
-#if SCHEDULER_TRACING
-
-namespace SchedulerTracing {
-
-enum ScheduleState {
-	RUNNING,
-	STILL_RUNNING,
-	PREEMPTED,
-	READY,
-	WAITING,
-	UNKNOWN
-};
-
-}
-
-
-static int
-cmd_scheduler(int argc, char** argv)
-{
-	using namespace SchedulerTracing;
-
-	int64 threadID;
-	if (argc != 2
-		|| !evaluate_debug_expression(argv[1], (uint64*)&threadID, true)) {
-		print_debugger_command_usage(argv[0]);
-		return 0;
-	}
-
-	if (threadID <= 0) {
-		kprintf("Invalid thread ID: %lld\n", threadID);
-		return 0;
-	}
-
-	ScheduleState state = UNKNOWN;
-	bigtime_t lastTime = 0;
-
-	int64 runs = 0;
-	bigtime_t totalRunTime = 0;
-	bigtime_t minRunTime = -1;
-	bigtime_t maxRunTime = -1;
-
-	int64 latencies = 0;
-	bigtime_t totalLatency = 0;
-	bigtime_t minLatency = -1;
-	bigtime_t maxLatency = -1;
-	int32 maxLatencyEntry = -1;
-
-	int64 reruns = 0;
-	bigtime_t totalRerunTime = 0;
-	bigtime_t minRerunTime = -1;
-	bigtime_t maxRerunTime = -1;
-	int32 maxRerunEntry = -1;
-
-	int64 preemptions = 0;
-
-	TraceEntryIterator iterator;
-	while (TraceEntry* _entry = iterator.Next()) {
-		if (dynamic_cast<SchedulerTraceEntry*>(_entry) == NULL)
-			continue;
-
-		if (ScheduleThread* entry = dynamic_cast<ScheduleThread*>(_entry)) {
-			if (entry->ThreadID() == threadID) {
-				// thread scheduled
-				bigtime_t diffTime = entry->Time() - lastTime;
-
-				if (state == READY) {
-					// thread scheduled after having been woken up
-					latencies++;
-					totalLatency += diffTime;
-					if (minLatency < 0 || diffTime < minLatency)
-						minLatency = diffTime;
-					if (diffTime > maxLatency) {
-						maxLatency = diffTime;
-						maxLatencyEntry = iterator.Index();
-					}
-				} else if (state == PREEMPTED) {
-					// thread scheduled after having been preempted before
-					reruns++;
-					totalRerunTime += diffTime;
-					if (minRerunTime < 0 || diffTime < minRerunTime)
-						minRerunTime = diffTime;
-					if (diffTime > maxRerunTime) {
-						maxRerunTime = diffTime;
-						maxRerunEntry = iterator.Index();
-					}
-				}
-
-				if (state == STILL_RUNNING) {
-					// Thread was running and continues to run.
-					state = RUNNING;
-				}
-
-				if (state != RUNNING) {
-					lastTime = entry->Time();
-					state = RUNNING;
-				}
-			} else if (entry->PreviousThreadID() == threadID) {
-				// thread unscheduled
-				bigtime_t diffTime = entry->Time() - lastTime;
-
-				if (state == STILL_RUNNING) {
-					// thread preempted
-					state = PREEMPTED;
-
-					runs++;
-					preemptions++;
-					totalRunTime += diffTime;
-					if (minRunTime < 0 || diffTime < minRunTime)
-						minRunTime = diffTime;
-					if (diffTime > maxRunTime)
-						maxRunTime = diffTime;
-				} else if (state == RUNNING) {
-					// thread starts waiting (it hadn't been added to the run
-					// queue before being unscheduled)
-					bigtime_t diffTime = entry->Time() - lastTime;
-					runs++;
-					totalRunTime += diffTime;
-					if (minRunTime < 0 || diffTime < minRunTime)
-						minRunTime = diffTime;
-					if (diffTime > maxRunTime)
-						maxRunTime = diffTime;
-
-					state = WAITING;
-				}
-			}
-		} else if (EnqueueThread* entry
-				= dynamic_cast<EnqueueThread*>(_entry)) {
-			if (entry->ThreadID() != threadID)
-				continue;
-
-			// thread enqueued in run queue
-
-			if (state == RUNNING || state == STILL_RUNNING) {
-				// Thread was running and is reentered into the run queue. This
-				// is done by the scheduler, if the thread remains ready.
-				state = STILL_RUNNING;
-			} else {
-				// Thread was waiting and is ready now.
-				lastTime = entry->Time();
-				state = READY;
-			}
-		} else if (RemoveThread* entry = dynamic_cast<RemoveThread*>(_entry)) {
-			if (entry->ThreadID() != threadID)
-				continue;
-
-			// thread removed from run queue
-
-			// This really only happens when the thread priority is changed
-			// while the thread is ready.
-
-			if (state == RUNNING) {
-				// This should never happen.
-				bigtime_t diffTime = entry->Time() - lastTime;
-				runs++;
-				totalRunTime += diffTime;
-				if (minRunTime < 0 || diffTime < minRunTime)
-					minRunTime = diffTime;
-				if (diffTime > maxRunTime)
-					maxRunTime = diffTime;
-			}
-
-			state = WAITING;
-		}
-	}
-
-	// print results
-	if (runs == 0) {
-		kprintf("thread %lld never ran.\n", threadID);
-		return 0;
-	}
-
-	kprintf("scheduling statistics for thread %lld:\n", threadID);
-	kprintf("runs:\n");
-	kprintf("  total #: %lld\n", runs);
-	kprintf("  total:   %lld us\n", totalRunTime);
-	kprintf("  average: %#.2f us\n", (double)totalRunTime / runs);
-	kprintf("  min:     %lld us\n", minRunTime);
-	kprintf("  max:     %lld us\n", maxRunTime);
-
-	if (latencies > 0) {
-		kprintf("scheduling latency after wake up:\n");
-		kprintf("  total #: %lld\n", latencies);
-		kprintf("  total:   %lld us\n", totalLatency);
-		kprintf("  average: %#.2f us\n", (double)totalLatency / latencies);
-		kprintf("  min:     %lld us\n", minLatency);
-		kprintf("  max:     %lld us\n", maxLatency);
-		kprintf("  max:     %lld us (at tracing entry %ld)\n", maxLatency,
-			maxLatencyEntry);
-	} else
-		kprintf("thread was never run after having been woken up\n");
-
-	if (reruns > 0) {
-		kprintf("scheduling latency after preemption:\n");
-		kprintf("  total #: %lld\n", reruns);
-		kprintf("  total:   %lld us\n", totalRerunTime);
-		kprintf("  average: %#.2f us\n", (double)totalRerunTime / reruns);
-		kprintf("  min:     %lld us\n", minRerunTime);
-		kprintf("  max:     %lld us (at tracing entry %ld)\n", maxRerunTime,
-			maxRerunEntry);
-	} else
-		kprintf("thread was never rerun after preemption\n");
-
-	if (preemptions > 0)
-		kprintf("thread was preempted %lld times\n", preemptions);
-	else
-		kprintf("thread was never preempted\n");
-
-	return 0;
-}
-
-#endif	// SCHEDULER_TRACING
-
-
-/*!	Enqueues the thread into the run queue.
-	Note: thread lock must be held when entering this function
-*/
-void
-scheduler_enqueue_in_run_queue(struct thread *thread)
-{
-	if (thread->state == B_THREAD_RUNNING) {
-		// The thread is currently running (on another CPU) and we cannot
-		// insert it into the run queue. Set the next state to ready so the
-		// thread is inserted into the run queue on the next reschedule.
-		thread->next_state = B_THREAD_READY;
-		return;
-	}
-
-	thread->state = thread->next_state = B_THREAD_READY;
-
-	struct thread *curr, *prev;
-	for (curr = sRunQueue, prev = NULL; curr
-			&& curr->priority >= thread->next_priority;
-			curr = curr->queue_next) {
-		if (prev)
-			prev = prev->queue_next;
-		else
-			prev = sRunQueue;
-	}
-
-	T(EnqueueThread(thread, prev, curr));
-
-	thread->queue_next = curr;
-	if (prev)
-		prev->queue_next = thread;
-	else
-		sRunQueue = thread;
-
-	thread->next_priority = thread->priority;
-
-	if (thread->priority != B_IDLE_PRIORITY) {
-		int32 currentCPU = smp_get_current_cpu();
-		if (sIdleCPUs != 0) {
-			if (thread->pinned_to_cpu > 0) {
-				// thread is pinned to a CPU -- notify it, if it is idle
-				int32 targetCPU = thread->previous_cpu->cpu_num;
-				if ((sIdleCPUs & (1 << targetCPU)) != 0) {
-					sIdleCPUs &= ~(1 << targetCPU);
-					smp_send_ici(targetCPU, SMP_MSG_RESCHEDULE_IF_IDLE, 0, 0,
-						0, NULL, SMP_MSG_FLAG_ASYNC);
-				}
-			} else {
-				// Thread is not pinned to any CPU -- take it ourselves, if we
-				// are idle, otherwise notify the next idle CPU. In either case
-				// we clear the idle bit of the chosen CPU, so that the
-				// scheduler_enqueue_in_run_queue() won't try to bother the
-				// same CPU again, if invoked before it handled the interrupt.
-				cpu_mask_t idleCPUs = CLEAR_BIT(sIdleCPUs, currentCPU);
-				if ((sIdleCPUs & (1 << currentCPU)) != 0) {
-					sIdleCPUs = idleCPUs;
-				} else {
-					int32 targetCPU = 0;
-					for (; targetCPU < B_MAX_CPU_COUNT; targetCPU++) {
-						cpu_mask_t mask = 1 << targetCPU;
-						if ((idleCPUs & mask) != 0) {
-							sIdleCPUs &= ~mask;
-							break;
-						}
-					}
-
-					smp_send_ici(targetCPU, SMP_MSG_RESCHEDULE_IF_IDLE, 0, 0,
-						0, NULL, SMP_MSG_FLAG_ASYNC);
-				}
-			}
-		}
-	}
-}
-
-
-/*!	Removes a thread from the run queue.
-	Note: thread lock must be held when entering this function
-*/
-void
-scheduler_remove_from_run_queue(struct thread *thread)
-{
-	struct thread *item, *prev;
-
-	T(RemoveThread(thread));
-
-	// find thread in run queue
-	for (item = sRunQueue, prev = NULL; item && item != thread;
-			item = item->queue_next) {
-		if (prev)
-			prev = prev->queue_next;
-		else
-			prev = sRunQueue;
-	}
-
-	ASSERT(item == thread);
-
-	if (prev)
-		prev->queue_next = item->queue_next;
-	else
-		sRunQueue = item->queue_next;
-}
-
-
-static void
-context_switch(struct thread *fromThread, struct thread *toThread)
-{
-	if ((fromThread->flags & THREAD_FLAGS_DEBUGGER_INSTALLED) != 0)
-		user_debug_thread_unscheduled(fromThread);
-
-	toThread->previous_cpu = toThread->cpu = fromThread->cpu;
-	fromThread->cpu = NULL;
-
-	arch_thread_set_current_thread(toThread);
-	arch_thread_context_switch(fromThread, toThread);
-
-	// Looks weird, but is correct. fromThread had been unscheduled earlier,
-	// but is back now. The notification for a thread scheduled the first time
-	// happens in thread.cpp:thread_kthread_entry().
-	if ((fromThread->flags & THREAD_FLAGS_DEBUGGER_INSTALLED) != 0)
-		user_debug_thread_scheduled(fromThread);
-}
-
-
-static int32
-reschedule_event(timer *unused)
-{
-	if (thread_get_current_thread()->keep_scheduled > 0)
-		return B_HANDLED_INTERRUPT;
-
-	// this function is called as a result of the timer event set by the
-	// scheduler returning this causes a reschedule on the timer event
-	thread_get_current_thread()->cpu->preempted = 1;
-	return B_INVOKE_SCHEDULER;
-}
-
-
-/*!	Runs the scheduler.
-	Note: expects thread spinlock to be held
-*/
-void
-scheduler_reschedule(void)
-{
-	struct thread *oldThread = thread_get_current_thread();
-	struct thread *nextThread, *prevThread;
-
-	TRACE(("reschedule(): cpu %d, cur_thread = %ld\n", smp_get_current_cpu(), thread_get_current_thread()->id));
-
-	oldThread->cpu->invoke_scheduler = false;
-
-	oldThread->state = oldThread->next_state;
-	switch (oldThread->next_state) {
-		case B_THREAD_RUNNING:
-		case B_THREAD_READY:
-			TRACE(("enqueueing thread %ld into run q. pri = %ld\n", oldThread->id, oldThread->priority));
-			scheduler_enqueue_in_run_queue(oldThread);
-			break;
-		case B_THREAD_SUSPENDED:
-			TRACE(("reschedule(): suspending thread %ld\n", oldThread->id));
-			break;
-		case THREAD_STATE_FREE_ON_RESCHED:
-			break;
-		default:
-			TRACE(("not enqueueing thread %ld into run q. next_state = %ld\n", oldThread->id, oldThread->next_state));
-			break;
-	}
-
-	nextThread = sRunQueue;
-	prevThread = NULL;
-
-	if (oldThread->cpu->disabled) {
-		// CPU is disabled - just select an idle thread
-		while (nextThread && nextThread->priority > B_IDLE_PRIORITY) {
-			prevThread = nextThread;
-			nextThread = nextThread->queue_next;
-		}
-	} else {
-		while (nextThread) {
-			// select next thread from the run queue
-			while (nextThread && nextThread->priority > B_IDLE_PRIORITY) {
-#if 0
-				if (oldThread == nextThread && nextThread->was_yielded) {
-					// ignore threads that called thread_yield() once
-					nextThread->was_yielded = false;
-					prevThread = nextThread;
-					nextThread = nextThread->queue_next;
-				}
-#endif
-
-				// skip thread, if it doesn't want to run on this CPU
-				if (nextThread->pinned_to_cpu > 0
-					&& nextThread->previous_cpu != oldThread->cpu) {
-					prevThread = nextThread;
-					nextThread = nextThread->queue_next;
-					continue;
-				}
-
-				// always extract real time threads
-				if (nextThread->priority >= B_FIRST_REAL_TIME_PRIORITY)
-					break;
-
-				// never skip last non-idle normal thread
-				if (nextThread->queue_next && nextThread->queue_next->priority == B_IDLE_PRIORITY)
-					break;
-
-				// skip normal threads sometimes (roughly 20%)
-				if (_rand() > 0x1a00)
-					break;
-
-				// skip until next lower priority
-				int32 priority = nextThread->priority;
-				do {
-					prevThread = nextThread;
-					nextThread = nextThread->queue_next;
-				} while (nextThread->queue_next != NULL
-					&& priority == nextThread->queue_next->priority
-					&& nextThread->queue_next->priority > B_IDLE_PRIORITY);
-			}
-
-			if (nextThread->cpu
-				&& nextThread->cpu->cpu_num != oldThread->cpu->cpu_num) {
-				panic("thread in run queue that's still running on another CPU!\n");
-				// ToDo: remove this check completely when we're sure that this
-				// cannot happen anymore.
-				prevThread = nextThread;
-				nextThread = nextThread->queue_next;
-				continue;
-			}
-
-			break;
-		}
-	}
-
-	if (!nextThread)
-		panic("reschedule(): run queue is empty!\n");
-
-	// extract selected thread from the run queue
-	if (prevThread)
-		prevThread->queue_next = nextThread->queue_next;
-	else
-		sRunQueue = nextThread->queue_next;
-
-	T(ScheduleThread(nextThread, oldThread));
-
-	nextThread->state = B_THREAD_RUNNING;
-	nextThread->next_state = B_THREAD_READY;
-	oldThread->was_yielded = false;
-
-	// track kernel time (user time is tracked in thread_at_kernel_entry())
-	bigtime_t now = system_time();
-	oldThread->kernel_time += now - oldThread->last_time;
-	nextThread->last_time = now;
-
-	// track CPU activity
-	if (!thread_is_idle_thread(oldThread)) {
-		oldThread->cpu->active_time +=
-			(oldThread->kernel_time - oldThread->cpu->last_kernel_time)
-			+ (oldThread->user_time - oldThread->cpu->last_user_time);
-	}
-
-	if (!thread_is_idle_thread(nextThread)) {
-		oldThread->cpu->last_kernel_time = nextThread->kernel_time;
-		oldThread->cpu->last_user_time = nextThread->user_time;
-	}
-
-	if (nextThread != oldThread || oldThread->cpu->preempted) {
-		bigtime_t quantum = 3000;	// ToDo: calculate quantum!
-		timer *quantumTimer = &oldThread->cpu->quantum_timer;
-
-		if (!oldThread->cpu->preempted)
-			cancel_timer(quantumTimer);
-
-		oldThread->cpu->preempted = 0;
-		add_timer(quantumTimer, &reschedule_event, quantum,
-			B_ONE_SHOT_RELATIVE_TIMER | B_TIMER_ACQUIRE_THREAD_LOCK);
-
-		// update the idle bit for this CPU in the CPU mask
-		int32 cpuNum = smp_get_current_cpu();
-		if (nextThread->priority == B_IDLE_PRIORITY)
-			sIdleCPUs = SET_BIT(sIdleCPUs, cpuNum);
-		else
-			sIdleCPUs = CLEAR_BIT(sIdleCPUs, cpuNum);
-
-		if (nextThread != oldThread)
-			context_switch(oldThread, nextThread);
-	}
-}
-
-
-void
-scheduler_init(void)
-{
-	add_debugger_command_etc("run_queue", &dump_run_queue,
-		"List threads in run queue", "\nLists threads in run queue", 0);
-
-#if SCHEDULER_TRACING
-	add_debugger_command_etc("scheduler", &cmd_scheduler,
-		"Analyze scheduler tracing information",
-		"<thread>\n"
-		"Analyzes scheduler tracing information for a given thread.\n"
-		"  <thread>  - ID of the thread.\n", 0);
-#endif
-}
-
-
-/*!	This starts the scheduler. Must be run under the context of
-	the initial idle thread.
-*/
-void
-scheduler_start(void)
-{
-	cpu_status state = disable_interrupts();
-	GRAB_THREAD_LOCK();
-
-	scheduler_reschedule();
-
-	RELEASE_THREAD_LOCK();
-	restore_interrupts(state);
-}
-
-
-// #pragma mark -
+#include "scheduler_tracing.h"
 
 
 #if SCHEDULER_TRACING