kernel/x86_64: AVX support
xsave or xsavec are supported. breaks vregs compatibility. change the thread structure object cache alignment to 64 the xsave fpu_state size isn't defined, it is for instance 832 here, thus I picked 1024. Change-Id: I4a0cab0bc42c1d37f24dcafb8259f8ff24a330d2 Reviewed-on: https://review.haiku-os.org/c/haiku/+/2849 Reviewed-by: Adrien Destugues <pulkomandy@gmail.com>
This commit is contained in:
parent
734c1e0491
commit
9495126984
@ -89,6 +89,18 @@ struct fpu_state {
|
||||
unsigned char _reserved_416_511[96];
|
||||
};
|
||||
|
||||
struct xstate_hdr {
|
||||
unsigned long bv;
|
||||
unsigned long xcomp_bv;
|
||||
unsigned char _reserved[48];
|
||||
};
|
||||
|
||||
struct savefpu {
|
||||
struct fpu_state fp_fxsave;
|
||||
struct xstate_hdr fp_xstate;
|
||||
unsigned long fp_ymm[16][2];
|
||||
};
|
||||
|
||||
struct vregs {
|
||||
unsigned long rax;
|
||||
unsigned long rbx;
|
||||
@ -110,7 +122,7 @@ struct vregs {
|
||||
unsigned long rip;
|
||||
unsigned long rflags;
|
||||
|
||||
struct fpu_state fpu;
|
||||
struct savefpu fpu;
|
||||
};
|
||||
|
||||
|
||||
|
@ -22,6 +22,8 @@
|
||||
|
||||
#define ALTCODEPATCH_TAG_STAC 1
|
||||
#define ALTCODEPATCH_TAG_CLAC 2
|
||||
#define ALTCODEPATCH_TAG_XSAVE 3
|
||||
#define ALTCODEPATCH_TAG_XRSTOR 4
|
||||
|
||||
|
||||
#ifdef _ASSEMBLER
|
||||
|
@ -354,9 +354,15 @@
|
||||
#define IA32_CR4_GLOBAL_PAGES (1UL << 7)
|
||||
#define CR4_OS_FXSR (1UL << 9)
|
||||
#define CR4_OS_XMM_EXCEPTION (1UL << 10)
|
||||
#define IA32_CR4_OSXSAVE (1UL << 18)
|
||||
#define IA32_CR4_SMEP (1UL << 20)
|
||||
#define IA32_CR4_SMAP (1UL << 21)
|
||||
|
||||
// Extended Control Register XCR0 flags
|
||||
#define IA32_XCR0_X87 (1UL << 0)
|
||||
#define IA32_XCR0_SSE (1UL << 1)
|
||||
#define IA32_XCR0_AVX (1UL << 2)
|
||||
|
||||
// page fault error codes (http://wiki.osdev.org/Page_Fault)
|
||||
#define PGFAULT_P 0x01 // Protection violation
|
||||
#define PGFAULT_W 0x02 // Write
|
||||
@ -547,6 +553,16 @@ struct intel_microcode_extended_signature {
|
||||
#define clear_ac() \
|
||||
__asm__ volatile (ASM_CLAC : : : "memory")
|
||||
|
||||
#define xgetbv(reg) ({ \
|
||||
uint32 low, high; \
|
||||
__asm__ volatile ("xgetbv" : "=a" (low), "=d" (high), "c" (reg)); \
|
||||
(low | (uint64)high << 32); \
|
||||
})
|
||||
|
||||
#define xsetbv(reg, value) { \
|
||||
uint32 low = value; uint32 high = value >> 32; \
|
||||
__asm__ volatile ("xsetbv" : : "a" (low), "d" (high), "c" (reg)); }
|
||||
|
||||
#define out8(value,port) \
|
||||
__asm__ ("outb %%al,%%dx" : : "a" (value), "d" (port))
|
||||
|
||||
|
@ -53,8 +53,13 @@ struct arch_thread {
|
||||
struct farcall interrupt_stack;
|
||||
#endif
|
||||
|
||||
#ifndef __x86_64__
|
||||
// 512 byte floating point save point - this must be 16 byte aligned
|
||||
uint8 fpu_state[512] _ALIGNED(16);
|
||||
#else
|
||||
// floating point save point - this must be 64 byte aligned for xsave
|
||||
uint8 fpu_state[1024] _ALIGNED(64);
|
||||
#endif
|
||||
|
||||
addr_t GetFramePointer() const;
|
||||
} _ALIGNED(16);
|
||||
|
@ -119,3 +119,16 @@ FUNCTION_END(_stac)
|
||||
FUNCTION(_clac):
|
||||
clac
|
||||
FUNCTION_END(_clac)
|
||||
|
||||
FUNCTION(_xsave):
|
||||
xsave64 (%rdi)
|
||||
FUNCTION_END(_xsave)
|
||||
|
||||
FUNCTION(_xsavec):
|
||||
xsavec64 (%rdi)
|
||||
FUNCTION_END(_xsavec)
|
||||
|
||||
FUNCTION(_xrstor):
|
||||
xrstor64 (%rdi)
|
||||
FUNCTION_END(_xrstor)
|
||||
|
||||
|
@ -221,16 +221,32 @@ STATIC_FUNCTION(int_bottom):
|
||||
// exception.
|
||||
orq $X86_EFLAGS_RESUME, IFRAME_flags(%rbp)
|
||||
|
||||
subq $512, %rsp
|
||||
andq $~15, %rsp
|
||||
fxsaveq (%rsp)
|
||||
// xsave needs a 64-byte alignment
|
||||
andq $~63, %rsp
|
||||
movq (gFPUSaveLength), %rcx
|
||||
subq %rcx, %rsp
|
||||
leaq (%rsp), %rdi
|
||||
shrq $3, %rcx
|
||||
movq $0, %rax
|
||||
rep stosq
|
||||
movl (gXsaveMask), %eax
|
||||
movl (gXsaveMask+4), %edx
|
||||
movq %rsp, %rdi
|
||||
CODEPATCH_START
|
||||
fxsaveq (%rdi)
|
||||
CODEPATCH_END(ALTCODEPATCH_TAG_XSAVE)
|
||||
|
||||
// Call the interrupt handler.
|
||||
movq %rbp, %rdi
|
||||
movq IFRAME_vector(%rbp), %rax
|
||||
call *gInterruptHandlerTable(, %rax, 8)
|
||||
|
||||
fxrstorq (%rsp)
|
||||
movl (gXsaveMask), %eax
|
||||
movl (gXsaveMask+4), %edx
|
||||
movq %rsp, %rdi
|
||||
CODEPATCH_START
|
||||
fxrstorq (%rdi)
|
||||
CODEPATCH_END(ALTCODEPATCH_TAG_XRSTOR)
|
||||
movq %rbp, %rsp
|
||||
|
||||
// Restore the saved registers.
|
||||
@ -253,9 +269,22 @@ STATIC_FUNCTION(int_bottom_user):
|
||||
// Frame pointer is the iframe.
|
||||
movq %rsp, %rbp
|
||||
|
||||
subq $512, %rsp
|
||||
andq $~15, %rsp
|
||||
fxsaveq (%rsp)
|
||||
// xsave needs a 64-byte alignment
|
||||
andq $~63, %rsp
|
||||
movq (gFPUSaveLength), %rcx
|
||||
subq %rcx, %rsp
|
||||
leaq (%rsp), %rdi
|
||||
shrq $3, %rcx
|
||||
movq $0, %rax
|
||||
rep stosq
|
||||
movl (gXsaveMask), %eax
|
||||
movl (gXsaveMask+4), %edx
|
||||
|
||||
movq %rsp, %rdi
|
||||
CODEPATCH_START
|
||||
fxsaveq (%rdi)
|
||||
CODEPATCH_END(ALTCODEPATCH_TAG_XSAVE)
|
||||
|
||||
movq %rsp, IFRAME_fpu(%rbp)
|
||||
|
||||
// Set the RF (resume flag) in RFLAGS. This prevents an instruction
|
||||
@ -286,7 +315,12 @@ STATIC_FUNCTION(int_bottom_user):
|
||||
|
||||
UPDATE_THREAD_KERNEL_TIME()
|
||||
|
||||
fxrstorq (%rsp)
|
||||
movl (gXsaveMask), %eax
|
||||
movl (gXsaveMask+4), %edx
|
||||
movq %rsp, %rdi
|
||||
CODEPATCH_START
|
||||
fxrstorq (%rdi)
|
||||
CODEPATCH_END(ALTCODEPATCH_TAG_XRSTOR)
|
||||
movq %rbp, %rsp
|
||||
|
||||
// Restore the saved registers.
|
||||
@ -315,7 +349,12 @@ STATIC_FUNCTION(int_bottom_user):
|
||||
movq %rbp, %rdi
|
||||
call x86_init_user_debug_at_kernel_exit
|
||||
1:
|
||||
fxrstorq (%rsp)
|
||||
movl (gXsaveMask), %eax
|
||||
movl (gXsaveMask+4), %edx
|
||||
movq %rsp, %rdi
|
||||
CODEPATCH_START
|
||||
fxrstorq (%rdi)
|
||||
CODEPATCH_END(ALTCODEPATCH_TAG_XRSTOR)
|
||||
movq %rbp, %rsp
|
||||
|
||||
// Restore the saved registers.
|
||||
@ -522,8 +561,13 @@ FUNCTION(x86_64_syscall_entry):
|
||||
jmp .Liret
|
||||
|
||||
.Lrestore_fpu:
|
||||
movq IFRAME_fpu(%rbp), %rax
|
||||
fxrstorq (%rax)
|
||||
movq IFRAME_fpu(%rbp), %rdi
|
||||
|
||||
movl (gXsaveMask), %eax
|
||||
movl (gXsaveMask+4), %edx
|
||||
CODEPATCH_START
|
||||
fxrstorq (%rdi)
|
||||
CODEPATCH_END(ALTCODEPATCH_TAG_XRSTOR)
|
||||
.Liret:
|
||||
// Restore the saved registers.
|
||||
RESTORE_IFRAME()
|
||||
|
@ -68,7 +68,10 @@ class RestartSyscall : public AbstractTraceEntry {
|
||||
extern "C" void x86_64_thread_entry();
|
||||
|
||||
// Initial thread saved state.
|
||||
static arch_thread sInitialState;
|
||||
static arch_thread sInitialState _ALIGNED(64);
|
||||
extern uint64 gFPUSaveLength;
|
||||
extern bool gHasXsave;
|
||||
extern bool gHasXsavec;
|
||||
|
||||
|
||||
void
|
||||
@ -140,12 +143,36 @@ arch_thread_init(kernel_args* args)
|
||||
{
|
||||
// Save one global valid FPU state; it will be copied in the arch dependent
|
||||
// part of each new thread.
|
||||
asm volatile (
|
||||
"clts;" \
|
||||
"fninit;" \
|
||||
"fnclex;" \
|
||||
"fxsave %0;"
|
||||
: "=m" (sInitialState.fpu_state));
|
||||
if (gHasXsave || gHasXsavec) {
|
||||
ASSERT(gFPUSaveLength <= sizeof(sInitialState.fpu_state));
|
||||
memset(sInitialState.fpu_state, 0, gFPUSaveLength);
|
||||
if (gHasXsavec) {
|
||||
asm volatile (
|
||||
"clts;" \
|
||||
"fninit;" \
|
||||
"fnclex;" \
|
||||
"movl $0x7,%%eax;" \
|
||||
"movl $0x0,%%edx;" \
|
||||
"xsavec64 %0"
|
||||
:: "m" (sInitialState.fpu_state));
|
||||
} else {
|
||||
asm volatile (
|
||||
"clts;" \
|
||||
"fninit;" \
|
||||
"fnclex;" \
|
||||
"movl $0x7,%%eax;" \
|
||||
"movl $0x0,%%edx;" \
|
||||
"xsave64 %0"
|
||||
:: "m" (sInitialState.fpu_state));
|
||||
}
|
||||
} else {
|
||||
asm volatile (
|
||||
"clts;" \
|
||||
"fninit;" \
|
||||
"fnclex;" \
|
||||
"fxsaveq %0"
|
||||
:: "m" (sInitialState.fpu_state));
|
||||
}
|
||||
return B_OK;
|
||||
}
|
||||
|
||||
@ -309,11 +336,10 @@ arch_setup_signal_frame(Thread* thread, struct sigaction* action,
|
||||
|
||||
if (frame->fpu != nullptr) {
|
||||
memcpy((void*)&signalFrameData->context.uc_mcontext.fpu, frame->fpu,
|
||||
sizeof(signalFrameData->context.uc_mcontext.fpu));
|
||||
gFPUSaveLength);
|
||||
} else {
|
||||
memcpy((void*)&signalFrameData->context.uc_mcontext.fpu,
|
||||
sInitialState.fpu_state,
|
||||
sizeof(signalFrameData->context.uc_mcontext.fpu));
|
||||
sInitialState.fpu_state, gFPUSaveLength);
|
||||
}
|
||||
|
||||
// Fill in signalFrameData->context.uc_stack.
|
||||
@ -385,8 +411,7 @@ arch_restore_signal_frame(struct signal_frame_data* signalFrameData)
|
||||
Thread* thread = thread_get_current_thread();
|
||||
|
||||
memcpy(thread->arch_info.fpu_state,
|
||||
(void*)&signalFrameData->context.uc_mcontext.fpu,
|
||||
sizeof(thread->arch_info.fpu_state));
|
||||
(void*)&signalFrameData->context.uc_mcontext.fpu, gFPUSaveLength);
|
||||
frame->fpu = &thread->arch_info.fpu_state;
|
||||
|
||||
// The syscall return code overwrites frame->ax with the return value of
|
||||
|
@ -52,6 +52,7 @@ arch_altcodepatch_replace(uint16 tag, void* newcodepatch, size_t length)
|
||||
// disable write after patch
|
||||
set_area_protection(info->text_region.id, kernelProtection);
|
||||
|
||||
dprintf("arch_altcodepatch_replace found %" B_PRIu32 " altcodepatches\n", count);
|
||||
dprintf("arch_altcodepatch_replace found %" B_PRIu32 " altcodepatches "
|
||||
"for tag %u\n", count, tag);
|
||||
}
|
||||
|
||||
|
@ -84,6 +84,13 @@ struct set_mtrrs_parameter {
|
||||
#ifdef __x86_64__
|
||||
extern addr_t _stac;
|
||||
extern addr_t _clac;
|
||||
extern addr_t _xsave;
|
||||
extern addr_t _xsavec;
|
||||
extern addr_t _xrstor;
|
||||
uint64 gXsaveMask;
|
||||
uint64 gFPUSaveLength = 512;
|
||||
bool gHasXsave = false;
|
||||
bool gHasXsavec = false;
|
||||
#endif
|
||||
|
||||
extern "C" void x86_reboot(void);
|
||||
@ -1406,6 +1413,20 @@ enable_smep(void* dummy, int cpu)
|
||||
{
|
||||
x86_write_cr4(x86_read_cr4() | IA32_CR4_SMEP);
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
enable_osxsave(void* dummy, int cpu)
|
||||
{
|
||||
x86_write_cr4(x86_read_cr4() | IA32_CR4_OSXSAVE);
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
enable_xsavemask(void* dummy, int cpu)
|
||||
{
|
||||
xsetbv(0, gXsaveMask);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
@ -1459,6 +1480,31 @@ arch_cpu_init_post_vm(kernel_args* args)
|
||||
} else
|
||||
dprintf("SMAP disabled per safemode setting\n");
|
||||
}
|
||||
|
||||
// if available enable XSAVE (XSAVE and extended states)
|
||||
gHasXsave = x86_check_feature(IA32_FEATURE_EXT_XSAVE, FEATURE_EXT);
|
||||
if (gHasXsave) {
|
||||
gHasXsavec = x86_check_feature(IA32_FEATURE_XSAVEC,
|
||||
FEATURE_D_1_EAX);
|
||||
|
||||
call_all_cpus_sync(&enable_osxsave, NULL);
|
||||
gXsaveMask = IA32_XCR0_X87 | IA32_XCR0_SSE;
|
||||
cpuid_info cpuid;
|
||||
get_current_cpuid(&cpuid, 0xd, 0);
|
||||
gXsaveMask |= (cpuid.regs.eax & IA32_XCR0_AVX);
|
||||
call_all_cpus_sync(&enable_xsavemask, NULL);
|
||||
get_current_cpuid(&cpuid, 0xd, 0);
|
||||
gFPUSaveLength = cpuid.regs.ebx;
|
||||
|
||||
arch_altcodepatch_replace(ALTCODEPATCH_TAG_XSAVE,
|
||||
gHasXsavec ? &_xsavec : &_xsave, 4);
|
||||
arch_altcodepatch_replace(ALTCODEPATCH_TAG_XRSTOR,
|
||||
&_xrstor, 4);
|
||||
|
||||
dprintf("enable %s 0x%" B_PRIx64 " %" B_PRId64 "\n",
|
||||
gHasXsavec ? "XSAVEC" : "XSAVE", gXsaveMask, gFPUSaveLength);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
return B_OK;
|
||||
|
@ -2679,9 +2679,9 @@ thread_init(kernel_args *args)
|
||||
panic("thread_init(): failed to init thread hash table!");
|
||||
|
||||
// create the thread structure object cache
|
||||
sThreadCache = create_object_cache("threads", sizeof(Thread), 16, NULL,
|
||||
sThreadCache = create_object_cache("threads", sizeof(Thread), 64, NULL,
|
||||
NULL, NULL);
|
||||
// Note: The x86 port requires 16 byte alignment of thread structures.
|
||||
// Note: The x86 port requires 64 byte alignment of thread structures.
|
||||
if (sThreadCache == NULL)
|
||||
panic("thread_init(): failed to allocate thread object cache!");
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user