Implemented lazy FPU state save/restore. In the end mostly ported from NewOS. SMP safe.
git-svn-id: file:///srv/svn/repos/haiku/haiku/trunk@17251 a95241bf-73f2-0310-859d-f6bbb57e9c96
This commit is contained in:
parent
fdfbd66456
commit
7eee76e65a
@ -104,12 +104,10 @@ void i386_enter_uspace(addr_t entry, void *args1, void *args2, addr_t ustack_top
|
||||
void i386_set_tss_and_kstack(addr_t kstack);
|
||||
void i386_switch_stack_and_call(addr_t stack, void (*func)(void *), void *arg);
|
||||
void i386_swap_pgdir(addr_t new_pgdir);
|
||||
void i386_fsave(void *fpu_state);
|
||||
void i386_fnsave(void *fpu_state);
|
||||
void i386_fxsave(void *fpu_state);
|
||||
void i386_frstor(const void *fpu_state);
|
||||
void i386_fxrstor(const void *fpu_state);
|
||||
void i386_fsave_swap(void *old_fpu_state, const void *new_fpu_state);
|
||||
void i386_fxsave_swap(void *old_fpu_state, const void *new_fpu_state);
|
||||
uint32 x86_read_ebp();
|
||||
uint32 x86_read_cr0();
|
||||
void x86_write_cr0(uint32 value);
|
||||
|
@ -10,6 +10,7 @@
|
||||
|
||||
|
||||
#include <smp.h>
|
||||
#include <thread.h>
|
||||
#include <timer.h>
|
||||
#include <boot/kernel_args.h>
|
||||
|
||||
@ -24,6 +25,9 @@ typedef union cpu_ent {
|
||||
int preempted;
|
||||
timer quantum_timer;
|
||||
|
||||
// tells which thread's fpu state we hold
|
||||
struct thread *fpu_state_thread;
|
||||
|
||||
// keeping track of CPU activity
|
||||
bigtime_t active_time;
|
||||
bigtime_t last_kernel_time;
|
||||
|
@ -139,6 +139,7 @@ struct thread {
|
||||
int32 state;
|
||||
int32 next_state;
|
||||
union cpu_ent *cpu;
|
||||
union cpu_ent *fpu_cpu; /* this cpu holds our fpu state */
|
||||
|
||||
sigset_t sig_pending;
|
||||
sigset_t sig_block_mask;
|
||||
|
@ -43,7 +43,8 @@ struct set_mtrr_parameter {
|
||||
extern void reboot(void);
|
||||
// from arch_x86.S
|
||||
|
||||
void (*gX86SwapFPUFunc)(void *oldState, const void *newState);
|
||||
void (*gX86SaveFPUFunc)(void *state);
|
||||
void (*gX86RestoreFPUFunc)(const void *state);
|
||||
bool gHasSSE = false;
|
||||
|
||||
static struct tss **sTSS;
|
||||
@ -65,7 +66,6 @@ struct tss *
|
||||
x86_get_main_tss(void)
|
||||
{
|
||||
int cpuNum = smp_get_current_cpu();
|
||||
|
||||
return sTSS[cpuNum];
|
||||
}
|
||||
|
||||
@ -184,9 +184,9 @@ init_sse(void)
|
||||
|
||||
// enable OS support for SSE
|
||||
x86_write_cr4(x86_read_cr4() | CR4_OS_FXSR | CR4_OS_XMM_EXCEPTION);
|
||||
x86_write_cr0(x86_read_cr0() & ~(CR0_FPU_EMULATION | CR0_MONITOR_FPU));
|
||||
|
||||
gX86SwapFPUFunc = i386_fxsave_swap;
|
||||
gX86SaveFPUFunc = i386_fxsave;
|
||||
gX86RestoreFPUFunc = i386_fxrstor;
|
||||
gHasSSE = true;
|
||||
}
|
||||
|
||||
@ -237,7 +237,8 @@ status_t
|
||||
arch_cpu_preboot_init(kernel_args *args)
|
||||
{
|
||||
write_dr3(0);
|
||||
gX86SwapFPUFunc = i386_fsave_swap;
|
||||
gX86SaveFPUFunc = i386_fnsave;
|
||||
gX86RestoreFPUFunc = i386_frstor;
|
||||
|
||||
return B_OK;
|
||||
}
|
||||
@ -332,6 +333,9 @@ arch_cpu_init_post_vm(kernel_args *args)
|
||||
DT_DATA_WRITEABLE, DPL_USER);
|
||||
}
|
||||
|
||||
// enable lazy FPU state
|
||||
x86_write_cr0((x86_read_cr0() & ~CR0_FPU_EMULATION) | CR0_MONITOR_FPU);
|
||||
|
||||
// setup SSE2/3 support
|
||||
init_sse();
|
||||
|
||||
|
@ -6,6 +6,7 @@
|
||||
* Distributed under the terms of the NewOS License.
|
||||
*/
|
||||
|
||||
#include <cpu.h>
|
||||
#include <int.h>
|
||||
#include <kscheduler.h>
|
||||
#include <ksyscalls.h>
|
||||
@ -36,6 +37,10 @@
|
||||
# define TRACE(x) ;
|
||||
#endif
|
||||
|
||||
// from arch_cpu.c
|
||||
extern void (*gX86SaveFPUFunc)(void *state);
|
||||
extern void (*gX86RestoreFPUFunc)(const void *state);
|
||||
|
||||
// Definitions for the PIC 8259 controller
|
||||
// (this is not a complete list, only what we're actually using)
|
||||
|
||||
@ -353,8 +358,7 @@ i386_handle_trap(struct iframe frame)
|
||||
switch (frame.vector) {
|
||||
// fatal exceptions
|
||||
|
||||
case 2: // NMI Interrupt
|
||||
case 7: // Device Not Available Exception (#NM)
|
||||
case 2: // NMI Interrupt
|
||||
case 9: // Coprocessor Segment Overrun
|
||||
case 10: // Invalid TSS Exception (#TS)
|
||||
case 11: // Segment Not Present (#NP)
|
||||
@ -363,6 +367,25 @@ i386_handle_trap(struct iframe frame)
|
||||
fatal_exception(&frame);
|
||||
break;
|
||||
|
||||
case 7: // Device Not Available Exception (#NM)
|
||||
{
|
||||
// raised to lazily save and restore FPU states
|
||||
cpu_ent *cpu = get_cpu_struct();
|
||||
|
||||
asm volatile ("clts;");
|
||||
if (cpu->info.fpu_state_thread != thread) {
|
||||
if (cpu->info.fpu_state_thread) {
|
||||
gX86SaveFPUFunc(cpu->info.fpu_state_thread->arch_info.fpu_state);
|
||||
cpu->info.fpu_state_thread->fpu_cpu = NULL;
|
||||
}
|
||||
|
||||
gX86RestoreFPUFunc(thread->arch_info.fpu_state);
|
||||
cpu->info.fpu_state_thread = thread;
|
||||
thread->fpu_cpu = cpu;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case 8: // Double Fault Exception (#DF)
|
||||
{
|
||||
struct tss *tss = x86_get_main_tss();
|
||||
|
@ -28,12 +28,13 @@
|
||||
#endif
|
||||
|
||||
|
||||
#define CR0_TASK_SWITCHED (1UL << 3)
|
||||
|
||||
// from arch_interrupts.S
|
||||
extern void i386_stack_init(struct farcall *interrupt_stack_offset);
|
||||
extern void i386_restore_frame_from_syscall(struct iframe frame);
|
||||
|
||||
// from arch_cpu.c
|
||||
extern void (*gX86SwapFPUFunc)(void *oldState, const void *newState);
|
||||
extern bool gHasSSE;
|
||||
|
||||
static struct arch_thread sInitialState _ALIGNED(16);
|
||||
@ -46,11 +47,11 @@ arch_thread_init(struct kernel_args *args)
|
||||
// save one global valid FPU state; it will be copied in the arch dependent
|
||||
// part of each new thread
|
||||
|
||||
asm("fninit");
|
||||
asm volatile ("clts; fninit; fnclex;");
|
||||
if (gHasSSE)
|
||||
i386_fxsave(sInitialState.fpu_state);
|
||||
else
|
||||
i386_fsave(sInitialState.fpu_state);
|
||||
i386_fnsave(sInitialState.fpu_state);
|
||||
|
||||
// let the asm function know the offset to the interrupt stack within struct thread
|
||||
// I know no better ( = static) way to tell the asm function the offset
|
||||
@ -285,7 +286,7 @@ arch_thread_context_switch(struct thread *from, struct thread *to)
|
||||
if (to->team->address_space != NULL)
|
||||
i386_reinit_user_debug_after_context_switch(to);
|
||||
|
||||
gX86SwapFPUFunc(from->arch_info.fpu_state, to->arch_info.fpu_state);
|
||||
x86_write_cr0(x86_read_cr0() | CR0_TASK_SWITCHED);
|
||||
i386_context_switch(&from->arch_info, &to->arch_info, newPageDirectory);
|
||||
}
|
||||
|
||||
@ -366,7 +367,7 @@ arch_setup_signal_frame(struct thread *t, struct sigaction *sa, int sig, int sig
|
||||
regs._reserved_2[0] = frame->edi;
|
||||
regs._reserved_2[1] = frame->esi;
|
||||
regs._reserved_2[2] = frame->ebp;
|
||||
i386_fsave((void *)(®s.xregs));
|
||||
i386_fnsave((void *)(®s.xregs));
|
||||
|
||||
status = user_memcpy(stack_ptr, ®s, sizeof(regs));
|
||||
if (status < B_OK)
|
||||
|
@ -122,7 +122,7 @@ arch_get_debug_cpu_state(struct debug_cpu_state *cpuState)
|
||||
if (struct iframe *frame = i386_get_user_iframe()) {
|
||||
struct thread *thread = thread_get_current_thread();
|
||||
|
||||
i386_fsave(cpuState->extended_regs);
|
||||
i386_fnsave(cpuState->extended_regs);
|
||||
// For this to be correct the calling function must not use these
|
||||
// registers (not even indirectly).
|
||||
|
||||
|
@ -21,10 +21,10 @@ FUNCTION(arch_cpu_user_TLB_invalidate):
|
||||
movl %eax,%cr3
|
||||
ret
|
||||
|
||||
/* void i386_fsave(void *fpu_state); */
|
||||
FUNCTION(i386_fsave):
|
||||
/* void i386_fnsave(void *fpu_state); */
|
||||
FUNCTION(i386_fnsave):
|
||||
movl 4(%esp), %eax
|
||||
fsave (%eax)
|
||||
fnsave (%eax)
|
||||
ret
|
||||
|
||||
/* void i386_fxsave(void *fpu_state); */
|
||||
@ -45,22 +45,6 @@ FUNCTION(i386_fxrstor):
|
||||
fxrstor (%eax)
|
||||
ret
|
||||
|
||||
/* void i386_fsave_swap(void *old_fpu_state, const void *new_fpu_state); */
|
||||
FUNCTION(i386_fsave_swap):
|
||||
movl 4(%esp),%eax
|
||||
fsave (%eax)
|
||||
movl 8(%esp),%eax
|
||||
frstor (%eax)
|
||||
ret
|
||||
|
||||
/* void i386_fxsave_swap(void *old_fpu_state, const void *new_fpu_state); */
|
||||
FUNCTION(i386_fxsave_swap):
|
||||
movl 4(%esp),%eax
|
||||
fxsave (%eax)
|
||||
movl 8(%esp),%eax
|
||||
fxrstor (%eax)
|
||||
ret
|
||||
|
||||
/* uint32 x86_read_ebp(); */
|
||||
FUNCTION(x86_read_ebp):
|
||||
movl %ebp, %eax
|
||||
|
@ -872,6 +872,12 @@ thread_exit2(void *_args)
|
||||
user_debug_thread_deleted(args.original_team_id, args.thread->id);
|
||||
}
|
||||
|
||||
// remove us as the fpu state owner if we are one
|
||||
if (args.thread->fpu_cpu) {
|
||||
args.thread->fpu_cpu->info.fpu_state_thread = NULL;
|
||||
args.thread->fpu_cpu = NULL;
|
||||
}
|
||||
|
||||
// return the death stack and reschedule one last time
|
||||
put_death_stack_and_reschedule(args.death_stack);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user