Implemented SSE2/3 support (tested with VLC).

git-svn-id: file:///srv/svn/repos/haiku/haiku/trunk@16569 a95241bf-73f2-0310-859d-f6bbb57e9c96
This commit is contained in:
Axel Dörfler 2006-03-02 17:12:56 +00:00
parent e11882b484
commit f94b06f992
6 changed files with 62 additions and 18 deletions

View File

@ -1,5 +1,5 @@
/*
* Copyright 2002-2005, Axel Dörfler, axeld@pinc-software.de. All rights reserved.
* Copyright 2002-2006, Axel Dörfler, axeld@pinc-software.de. All rights reserved.
* Distributed under the terms of the MIT License.
*
* Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
@ -24,7 +24,8 @@
// cpuid eax 1 features
#define IA32_FEATURE_MTRR (1UL << 12)
#define IA32_FEATURE_GLOBAL_PAGES (1UL << 13)
#define IA32_FEATURE_SSE (1UL << 25)
#define IA32_FEATURE_FXSR (1UL << 24)
// cr4 flags
#define IA32_CR4_GLOBAL_PAGES (1UL << 7)

View File

@ -1,5 +1,5 @@
/*
* Copyright 2002-2005, The Haiku Team. All rights reserved.
* Copyright 2002-2006, The Haiku Team. All rights reserved.
* Distributed under the terms of the MIT License.
*
* Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
@ -11,6 +11,8 @@
#include <arch_cpu.h>
#define _ALIGNED(bytes) __attribute__((aligned(bytes)))
// move this to somewhere else, maybe BeBuild.h?
struct farcall {
uint32 *esp;
@ -29,12 +31,12 @@ struct arch_thread {
struct farcall current_stack;
struct farcall interrupt_stack;
// 512 byte floating point save point - this must be 16 byte aligned
uint8 fpu_state[512];
// used to track interrupts on this thread
struct iframe_stack iframes;
// 512 byte floating point save point
uint8 fpu_state[512];
};
} _ALIGNED(16);
struct arch_team {
// gcc treats empty structures as zero-length in C, but as if they contain

View File

@ -12,6 +12,7 @@
#include <tls.h>
#include <vm.h>
#include <arch_system_info.h>
#include <arch/cpu.h>
#include <arch/x86/selector.h>
#include <boot/kernel_args.h>
@ -25,6 +26,11 @@
#define CR0_CACHE_DISABLE (1UL << 30)
#define CR0_NOT_WRITE_THROUGH (1UL << 29)
#define CR0_FPU_EMULATION (1UL << 2)
#define CR0_MONITOR_FPU (1UL << 1)
#define CR4_OS_FXSR (1UL << 9)
#define CR4_OS_XMM_EXCEPTION (1UL << 10)
struct set_mtrr_parameter {
int32 index;
@ -37,6 +43,9 @@ struct set_mtrr_parameter {
extern void reboot(void);
// from arch_x86.S
void (*gX86SwapFPUFunc)(void *oldState, const void *newState);
bool gHasSSE = false;
static struct tss **sTSS;
//static struct tss **sDoubleFaultTSS;
struct tss **sDoubleFaultTSS;
@ -162,6 +171,26 @@ x86_get_mtrr(uint32 index, uint64 *_base, uint64 *_length, uint8 *_type)
}
static void
init_sse(void)
{
cpuid_info info;
if (get_current_cpuid(&info, 1) != B_OK
|| (info.eax_1.features & IA32_FEATURE_SSE) == 0
|| (info.eax_1.features & IA32_FEATURE_FXSR) == 0) {
// we don't have proper SSE support
return;
}
// enable OS support for SSE
x86_write_cr4(x86_read_cr4() | CR4_OS_FXSR | CR4_OS_XMM_EXCEPTION);
x86_write_cr0(x86_read_cr0() & ~(CR0_FPU_EMULATION | CR0_MONITOR_FPU));
gX86SwapFPUFunc = i386_fxsave_swap;
gHasSSE = true;
}
static void
load_tss(void *data, int cpu)
{
@ -201,10 +230,15 @@ init_double_fault(int cpuNum)
}
// #pragma mark -
status_t
arch_cpu_preboot_init(kernel_args *args)
{
write_dr3(0);
gX86SwapFPUFunc = i386_fsave_swap;
return B_OK;
}
@ -298,6 +332,9 @@ arch_cpu_init_post_vm(kernel_args *args)
DT_DATA_WRITEABLE, DPL_USER);
}
// setup SSE2/3 support
init_sse();
return B_OK;
}

View File

@ -1,5 +1,5 @@
/*
* Copyright 2002-2005, Axel Dörfler, axeld@pinc-software.de.
* Copyright 2002-2006, Axel Dörfler, axeld@pinc-software.de.
* Distributed under the terms of the MIT License.
*
* Copyright 2001, Travis Geiselbrecht. All rights reserved.
@ -235,9 +235,9 @@ arch_int_disable_io_interrupt(int irq)
// disable PIC 8259 controlled interrupt
if (irq < 8)
out8(in8(0x21) | (1 << irq), 0x21);
out8(in8(PIC_MASTER_MASK) | (1 << irq), PIC_MASTER_MASK);
else
out8(in8(0xa1) | (1 << (irq - 8)), 0xa1);
out8(in8(PIC_SLAVE_MASK) | (1 << (irq - 8)), PIC_SLAVE_MASK);
}
@ -351,7 +351,6 @@ i386_handle_trap(struct iframe frame)
// dprintf("i386_handle_trap: vector 0x%x, ip 0x%x, cpu %d\n", frame.vector, frame.eip, smp_get_current_cpu());
switch (frame.vector) {
// fatal exceptions
case 2: // NMI Interrupt
@ -577,6 +576,7 @@ arch_int_init(kernel_args *args)
set_intr_gate(16, &trap16);
set_intr_gate(17, &trap17);
set_intr_gate(18, &trap18);
set_intr_gate(19, &trap19);
set_intr_gate(32, &trap32);
set_intr_gate(33, &trap33);

View File

@ -32,10 +32,11 @@
extern void i386_stack_init(struct farcall *interrupt_stack_offset);
extern void i386_restore_frame_from_syscall(struct iframe frame);
// from arch_cpu.c
extern void (*gX86SwapFPUFunc)(void *oldState, const void *newState);
extern bool gHasSSE;
static struct arch_thread sInitialState;
// ToDo:
// __attribute__ ((aligned(16)));
static struct arch_thread sInitialState _ALIGNED(16);
// the fpu_state must be aligned on a 16 byte boundary, so that fxsave can use it
@ -46,7 +47,9 @@ arch_thread_init(struct kernel_args *args)
// part of each new thread
asm("fninit");
// ToDo: add MMX/SSE support (ie. use fxsave)
if (gHasSSE)
i386_fxsave(sInitialState.fpu_state);
else
i386_fsave(sInitialState.fpu_state);
// let the asm function know the offset to the interrupt stack within struct thread
@ -282,7 +285,7 @@ arch_thread_context_switch(struct thread *from, struct thread *to)
if (to->team->address_space != NULL)
i386_reinit_user_debug_after_context_switch(to);
i386_fsave_swap(from->arch_info.fpu_state, to->arch_info.fpu_state);
gX86SwapFPUFunc(from->arch_info.fpu_state, to->arch_info.fpu_state);
i386_context_switch(&from->arch_info, &to->arch_info, newPageDirectory);
}

View File

@ -1,5 +1,5 @@
/*
* Copyright 2005, Axel Dörfler, axeld@pinc-software.de. All rights reserved.
* Copyright 2005-2006, Axel Dörfler, axeld@pinc-software.de. All rights reserved.
* Distributed under the terms of the MIT License.
*
* Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
@ -16,6 +16,7 @@ extern "C" {
void trap0();void trap1();void trap2();void trap3();void trap4();void trap5();
void trap6();void trap7();void trap9();void trap10();void trap11();
void trap12();void trap13();void trap14();void trap16();void trap17();void trap18();
void trap19();
void trap32();void trap33();void trap34();void trap35();void trap36();void trap37();
void trap38();void trap39();void trap40();void trap41();void trap42();void trap43();
void trap44();void trap45();void trap46();void trap47();