Implemented lazy FPU state save/restore. In the end mostly ported from NewOS. SMP safe.

git-svn-id: file:///srv/svn/repos/haiku/haiku/trunk@17251 a95241bf-73f2-0310-859d-f6bbb57e9c96
This commit is contained in:
Michael Lotz 2006-04-27 22:02:48 +00:00
parent fdfbd66456
commit 7eee76e65a
9 changed files with 56 additions and 35 deletions

View File

@ -104,12 +104,10 @@ void i386_enter_uspace(addr_t entry, void *args1, void *args2, addr_t ustack_top
void i386_set_tss_and_kstack(addr_t kstack);
void i386_switch_stack_and_call(addr_t stack, void (*func)(void *), void *arg);
void i386_swap_pgdir(addr_t new_pgdir);
void i386_fsave(void *fpu_state);
void i386_fnsave(void *fpu_state);
void i386_fxsave(void *fpu_state);
void i386_frstor(const void *fpu_state);
void i386_fxrstor(const void *fpu_state);
void i386_fsave_swap(void *old_fpu_state, const void *new_fpu_state);
void i386_fxsave_swap(void *old_fpu_state, const void *new_fpu_state);
uint32 x86_read_ebp();
uint32 x86_read_cr0();
void x86_write_cr0(uint32 value);

View File

@ -10,6 +10,7 @@
#include <smp.h>
#include <thread.h>
#include <timer.h>
#include <boot/kernel_args.h>
@ -24,6 +25,9 @@ typedef union cpu_ent {
int preempted;
timer quantum_timer;
// tells which thread's fpu state we hold
struct thread *fpu_state_thread;
// keeping track of CPU activity
bigtime_t active_time;
bigtime_t last_kernel_time;

View File

@ -139,6 +139,7 @@ struct thread {
int32 state;
int32 next_state;
union cpu_ent *cpu;
union cpu_ent *fpu_cpu; /* this cpu holds our fpu state */
sigset_t sig_pending;
sigset_t sig_block_mask;

View File

@ -43,7 +43,8 @@ struct set_mtrr_parameter {
extern void reboot(void);
// from arch_x86.S
void (*gX86SwapFPUFunc)(void *oldState, const void *newState);
void (*gX86SaveFPUFunc)(void *state);
void (*gX86RestoreFPUFunc)(const void *state);
bool gHasSSE = false;
static struct tss **sTSS;
@ -65,7 +66,6 @@ struct tss *
x86_get_main_tss(void)
{
int cpuNum = smp_get_current_cpu();
return sTSS[cpuNum];
}
@ -184,9 +184,9 @@ init_sse(void)
// enable OS support for SSE
x86_write_cr4(x86_read_cr4() | CR4_OS_FXSR | CR4_OS_XMM_EXCEPTION);
x86_write_cr0(x86_read_cr0() & ~(CR0_FPU_EMULATION | CR0_MONITOR_FPU));
gX86SwapFPUFunc = i386_fxsave_swap;
gX86SaveFPUFunc = i386_fxsave;
gX86RestoreFPUFunc = i386_fxrstor;
gHasSSE = true;
}
@ -237,7 +237,8 @@ status_t
arch_cpu_preboot_init(kernel_args *args)
{
write_dr3(0);
gX86SwapFPUFunc = i386_fsave_swap;
gX86SaveFPUFunc = i386_fnsave;
gX86RestoreFPUFunc = i386_frstor;
return B_OK;
}
@ -332,6 +333,9 @@ arch_cpu_init_post_vm(kernel_args *args)
DT_DATA_WRITEABLE, DPL_USER);
}
// enable lazy FPU state
x86_write_cr0((x86_read_cr0() & ~CR0_FPU_EMULATION) | CR0_MONITOR_FPU);
// setup SSE2/3 support
init_sse();

View File

@ -6,6 +6,7 @@
* Distributed under the terms of the NewOS License.
*/
#include <cpu.h>
#include <int.h>
#include <kscheduler.h>
#include <ksyscalls.h>
@ -36,6 +37,10 @@
# define TRACE(x) ;
#endif
// from arch_cpu.c
extern void (*gX86SaveFPUFunc)(void *state);
extern void (*gX86RestoreFPUFunc)(const void *state);
// Definitions for the PIC 8259 controller
// (this is not a complete list, only what we're actually using)
@ -353,8 +358,7 @@ i386_handle_trap(struct iframe frame)
switch (frame.vector) {
// fatal exceptions
case 2: // NMI Interrupt
case 7: // Device Not Available Exception (#NM)
case 2: // NMI Interrupt
case 9: // Coprocessor Segment Overrun
case 10: // Invalid TSS Exception (#TS)
case 11: // Segment Not Present (#NP)
@ -363,6 +367,25 @@ i386_handle_trap(struct iframe frame)
fatal_exception(&frame);
break;
case 7: // Device Not Available Exception (#NM)
{
// raised to lazily save and restore FPU states
cpu_ent *cpu = get_cpu_struct();
asm volatile ("clts;");
if (cpu->info.fpu_state_thread != thread) {
if (cpu->info.fpu_state_thread) {
gX86SaveFPUFunc(cpu->info.fpu_state_thread->arch_info.fpu_state);
cpu->info.fpu_state_thread->fpu_cpu = NULL;
}
gX86RestoreFPUFunc(thread->arch_info.fpu_state);
cpu->info.fpu_state_thread = thread;
thread->fpu_cpu = cpu;
}
break;
}
case 8: // Double Fault Exception (#DF)
{
struct tss *tss = x86_get_main_tss();

View File

@ -28,12 +28,13 @@
#endif
#define CR0_TASK_SWITCHED (1UL << 3)
// from arch_interrupts.S
extern void i386_stack_init(struct farcall *interrupt_stack_offset);
extern void i386_restore_frame_from_syscall(struct iframe frame);
// from arch_cpu.c
extern void (*gX86SwapFPUFunc)(void *oldState, const void *newState);
extern bool gHasSSE;
static struct arch_thread sInitialState _ALIGNED(16);
@ -46,11 +47,11 @@ arch_thread_init(struct kernel_args *args)
// save one global valid FPU state; it will be copied in the arch dependent
// part of each new thread
asm("fninit");
asm volatile ("clts; fninit; fnclex;");
if (gHasSSE)
i386_fxsave(sInitialState.fpu_state);
else
i386_fsave(sInitialState.fpu_state);
i386_fnsave(sInitialState.fpu_state);
// let the asm function know the offset to the interrupt stack within struct thread
// I know no better ( = static) way to tell the asm function the offset
@ -285,7 +286,7 @@ arch_thread_context_switch(struct thread *from, struct thread *to)
if (to->team->address_space != NULL)
i386_reinit_user_debug_after_context_switch(to);
gX86SwapFPUFunc(from->arch_info.fpu_state, to->arch_info.fpu_state);
x86_write_cr0(x86_read_cr0() | CR0_TASK_SWITCHED);
i386_context_switch(&from->arch_info, &to->arch_info, newPageDirectory);
}
@ -366,7 +367,7 @@ arch_setup_signal_frame(struct thread *t, struct sigaction *sa, int sig, int sig
regs._reserved_2[0] = frame->edi;
regs._reserved_2[1] = frame->esi;
regs._reserved_2[2] = frame->ebp;
i386_fsave((void *)(&regs.xregs));
i386_fnsave((void *)(&regs.xregs));
status = user_memcpy(stack_ptr, &regs, sizeof(regs));
if (status < B_OK)

View File

@ -122,7 +122,7 @@ arch_get_debug_cpu_state(struct debug_cpu_state *cpuState)
if (struct iframe *frame = i386_get_user_iframe()) {
struct thread *thread = thread_get_current_thread();
i386_fsave(cpuState->extended_regs);
i386_fnsave(cpuState->extended_regs);
// For this to be correct the calling function must not use these
// registers (not even indirectly).

View File

@ -21,10 +21,10 @@ FUNCTION(arch_cpu_user_TLB_invalidate):
movl %eax,%cr3
ret
/* void i386_fsave(void *fpu_state); */
FUNCTION(i386_fsave):
/* void i386_fnsave(void *fpu_state); */
FUNCTION(i386_fnsave):
movl 4(%esp), %eax
fsave (%eax)
fnsave (%eax)
ret
/* void i386_fxsave(void *fpu_state); */
@ -45,22 +45,6 @@ FUNCTION(i386_fxrstor):
fxrstor (%eax)
ret
/* void i386_fsave_swap(void *old_fpu_state, const void *new_fpu_state); */
FUNCTION(i386_fsave_swap):
movl 4(%esp),%eax
fsave (%eax)
movl 8(%esp),%eax
frstor (%eax)
ret
/* void i386_fxsave_swap(void *old_fpu_state, const void *new_fpu_state); */
FUNCTION(i386_fxsave_swap):
movl 4(%esp),%eax
fxsave (%eax)
movl 8(%esp),%eax
fxrstor (%eax)
ret
/* uint32 x86_read_ebp(); */
FUNCTION(x86_read_ebp):
movl %ebp, %eax

View File

@ -872,6 +872,12 @@ thread_exit2(void *_args)
user_debug_thread_deleted(args.original_team_id, args.thread->id);
}
// remove us as the fpu state owner if we are one
if (args.thread->fpu_cpu) {
args.thread->fpu_cpu->info.fpu_state_thread = NULL;
args.thread->fpu_cpu = NULL;
}
// return the death stack and reschedule one last time
put_death_stack_and_reschedule(args.death_stack);