sparc64_ipi_save_fpstate:

- use primary MMU context for consistency with other trap/interrupt handlers.

sparc64_ipi_save_fpstate, savefpstate:
- avoid storing fp registers as we can.

sparc64_ipi_save_fpstate, savefpstate, loadfpstate:
- remove unaligned case since buffers allocated with pool_cache are ensured
  64-byte aligned.

Ok by martin@.
This commit is contained in:
nakayama 2008-07-10 15:23:58 +00:00
parent b74ec3e6b6
commit 3eac605929
4 changed files with 77 additions and 201 deletions

View File

@ -1,4 +1,4 @@
/* $NetBSD: db_machdep.h,v 1.24 2008/02/29 20:27:07 martin Exp $ */
/* $NetBSD: db_machdep.h,v 1.25 2008/07/10 15:23:58 nakayama Exp $ */
/*
* Mach Operating System
@ -56,7 +56,7 @@ typedef struct {
struct frame64 db_fr;
struct trapstate db_ts[5];
int db_tl;
struct fpstate64 db_fpstate;
struct fpstate64 db_fpstate __aligned(BLOCK_SIZE);
} db_regs_t;
/* Current CPU register state */

View File

@ -1,4 +1,4 @@
/* $NetBSD: cpu.c,v 1.76 2008/07/10 15:04:41 nakayama Exp $ */
/* $NetBSD: cpu.c,v 1.77 2008/07/10 15:23:58 nakayama Exp $ */
/*
* Copyright (c) 1996
@ -52,7 +52,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.76 2008/07/10 15:04:41 nakayama Exp $");
__KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.77 2008/07/10 15:23:58 nakayama Exp $");
#include "opt_multiprocessor.h"
@ -184,7 +184,7 @@ cpu_reset_fpustate(void)
struct fpstate64 *fpstate;
struct fpstate64 fps[2];
/* This needs to be 64-bit aligned */
/* This needs to be 64-byte aligned */
fpstate = ALIGNFPSTATE(&fps[1]);
/*

View File

@ -1,4 +1,4 @@
/* $NetBSD: locore.s,v 1.282 2008/07/02 12:15:19 nakayama Exp $ */
/* $NetBSD: locore.s,v 1.283 2008/07/10 15:23:58 nakayama Exp $ */
/*
* Copyright (c) 1996-2002 Eduardo Horvath
@ -9492,120 +9492,76 @@ ENTRY(sparc64_ipi_save_fpstate)
sethi %hi(FPLWP), %g1
LDPTR [%g1 + %lo(FPLWP)], %g3
cmp %g3, %g2
bne,pn CCCR, 7f ! skip if fplwp != %g2
bne,pn CCCR, 7f ! skip if fplwp has changed
mov CTX_SECONDARY, %g5
ldxa [%g5] ASI_DMMU, %g6
membar #LoadStore
stxa %g0, [%g5] ASI_DMMU
membar #Sync
LDPTR [%g3 + L_FPSTATE], %g3
rdpr %pstate, %g2 ! enable FP before we begin
rdpr %pstate, %g2 ! enable FP before we begin
rd %fprs, %g5
wr %g0, FPRS_FEF, %fprs
or %g2, PSTATE_PEF, %g2
wrpr %g2, 0, %pstate
LDPTR [%g3 + L_FPSTATE], %g3
stx %fsr, [%g3 + FS_FSR] ! f->fs_fsr = getfsr();
rd %gsr, %g2 ! Save %gsr
st %g2, [%g3 + FS_GSR]
#if FS_REGS > 0
add %g3, FS_REGS, %g3
#endif
#ifdef DIAGNOSTIC
btst BLOCK_ALIGN, %g3 ! Needs to be re-executed
bnz,pn %icc, 3f ! Check alignment
bnz,pn %icc, 6f ! Check alignment
#endif
st %g0, [%g3 + FS_QSIZE - FS_REGS] ! f->fs_qsize = 0;
btst FPRS_DL, %g5 ! Lower FPU clean?
bz,a,pt %icc, 1f ! Then skip it
add %g3, 128, %g3 ! Skip a block
btst FPRS_DL|FPRS_DU, %g5 ! Both FPU halves clean?
bz,pt %icc, 5f ! Then skip it
mov CTX_PRIMARY, %g2
ldxa [%g2] ASI_DMMU, %g6
membar #LoadStore
stxa %g0, [%g2] ASI_DMMU ! Switch MMU to kernel primary context
membar #Sync
stda %f0, [%g3] ASI_BLK_S ! f->fs_f0 = etc;
btst FPRS_DL, %g5 ! Lower FPU clean?
bz,a,pt %icc, 1f ! Then skip it, but upper FPU not clean
add %g3, 2*BLOCK_SIZE, %g3 ! Skip a block
stda %f0, [%g3] ASI_BLK_P ! f->fs_f0 = etc;
inc BLOCK_SIZE, %g3
stda %f16, [%g3] ASI_BLK_S
inc BLOCK_SIZE, %g3
1:
stda %f16, [%g3] ASI_BLK_P
btst FPRS_DU, %g5 ! Upper FPU clean?
bz,pt %icc, 2f ! Then skip it
nop
membar #Sync
stda %f32, [%g3] ASI_BLK_S
inc BLOCK_SIZE, %g3
1:
stda %f32, [%g3] ASI_BLK_P
inc BLOCK_SIZE, %g3
stda %f48, [%g3] ASI_BLK_S
stda %f48, [%g3] ASI_BLK_P
2:
membar #Sync ! Finish operation so we can
wr %g0, FPRS_FEF, %fprs ! Mark FPU clean
mov CTX_SECONDARY, %g5
STPTR %g0, [%g1 + %lo(FPLWP)] ! fplwp = NULL
stxa %g6, [%g5] ASI_DMMU
brz,pn %g6, 5f ! Skip if context 0
nop
stxa %g6, [%g2] ASI_DMMU ! Restore primary context
membar #Sync
5:
wr %g0, FPRS_FEF, %fprs ! Mark FPU clean
STPTR %g0, [%g1 + %lo(FPLWP)] ! fplwp = NULL
7:
IPIEVC_INC(IPI_EVCNT_FPU_SYNCH,%g2,%g3)
ba,a ret_from_intr_vector
nop
3:
#ifdef DIAGONSTIC
btst 7, %g3 ! 32-bit aligned!?!?
bnz,pn %icc, 6f
#endif
btst FPRS_DL, %g5 ! Lower FPU clean?
bz,a,pt %icc, 4f ! Then skip it
add %g3, 128, %g3
membar #Sync
std %f0, [%g3 + FS_REGS + (4*0)] ! f->fs_f0 = etc;
std %f2, [%g3 + FS_REGS + (4*2)]
std %f4, [%g3 + FS_REGS + (4*4)]
std %f6, [%g3 + FS_REGS + (4*6)]
std %f8, [%g3 + FS_REGS + (4*8)]
std %f10, [%g3 + FS_REGS + (4*10)]
std %f12, [%g3 + FS_REGS + (4*12)]
std %f14, [%g3 + FS_REGS + (4*14)]
std %f16, [%g3 + FS_REGS + (4*16)]
std %f18, [%g3 + FS_REGS + (4*18)]
std %f20, [%g3 + FS_REGS + (4*20)]
std %f22, [%g3 + FS_REGS + (4*22)]
std %f24, [%g3 + FS_REGS + (4*24)]
std %f26, [%g3 + FS_REGS + (4*26)]
std %f28, [%g3 + FS_REGS + (4*28)]
std %f30, [%g3 + FS_REGS + (4*30)]
4:
btst FPRS_DU, %g5 ! Upper FPU clean?
bz,pt %icc, 2b ! Then skip it
nop
membar #Sync
std %f32, [%g3 + FS_REGS + (4*32)]
std %f34, [%g3 + FS_REGS + (4*34)]
std %f36, [%g3 + FS_REGS + (4*36)]
std %f38, [%g3 + FS_REGS + (4*38)]
std %f40, [%g3 + FS_REGS + (4*40)]
std %f42, [%g3 + FS_REGS + (4*42)]
std %f44, [%g3 + FS_REGS + (4*44)]
std %f46, [%g3 + FS_REGS + (4*46)]
std %f48, [%g3 + FS_REGS + (4*48)]
std %f50, [%g3 + FS_REGS + (4*50)]
std %f52, [%g3 + FS_REGS + (4*52)]
std %f54, [%g3 + FS_REGS + (4*54)]
std %f56, [%g3 + FS_REGS + (4*56)]
std %f58, [%g3 + FS_REGS + (4*58)]
std %f60, [%g3 + FS_REGS + (4*60)]
ba 2b
std %f62, [%g3 + FS_REGS + (4*62)]
#ifdef DIAGNOSTIC
!!
!! Damn thing is *NOT* aligned on a 64-byte boundary
!!
6:
wr %g0, FPRS_FEF, %fprs
! XXX -- we should panic instead of silently entering debugger
ta 1
nop
ba,a ret_from_intr_vector
nop
#endif
/*
* IPI handler to drop the current FPU state.
@ -9661,92 +9617,47 @@ ENTRY(savefpstate)
st %o4, [%o0 + FS_GSR]
add %o0, FS_REGS, %o2
#ifdef DIAGNOSTIC
btst BLOCK_ALIGN, %o2 ! Needs to be re-executed
bnz,pn %icc, 3f ! Check alignment
bnz,pn %icc, 6f ! Check alignment
#endif
st %g0, [%o0 + FS_QSIZE] ! f->fs_qsize = 0;
btst FPRS_DL, %o5 ! Lower FPU clean?
bz,a,pt %icc, 1f ! Then skip it
add %o2, 128, %o2 ! Skip a block
btst FPRS_DL|FPRS_DU, %o5 ! Both FPU halves clean?
bz,pt %icc, 5f ! Then skip it
btst FPRS_DL, %o5 ! Lower FPU clean?
membar #Sync
stda %f0, [%o2] ASI_BLK_COMMIT_P ! f->fs_f0 = etc;
bz,a,pt %icc, 1f ! Then skip it, but upper FPU not clean
add %o2, 2*BLOCK_SIZE, %o2 ! Skip a block
stda %f0, [%o2] ASI_BLK_P ! f->fs_f0 = etc;
inc BLOCK_SIZE, %o2
stda %f16, [%o2] ASI_BLK_COMMIT_P
inc BLOCK_SIZE, %o2
1:
stda %f16, [%o2] ASI_BLK_P
btst FPRS_DU, %o5 ! Upper FPU clean?
bz,pt %icc, 2f ! Then skip it
nop
membar #Sync
stda %f32, [%o2] ASI_BLK_COMMIT_P
inc BLOCK_SIZE, %o2
1:
stda %f32, [%o2] ASI_BLK_P
inc BLOCK_SIZE, %o2
stda %f48, [%o2] ASI_BLK_COMMIT_P
stda %f48, [%o2] ASI_BLK_P
2:
membar #Sync ! Finish operation so we can
5:
retl
wr %g0, FPRS_FEF, %fprs ! Mark FPU clean
3:
#ifdef DIAGONSTIC
btst 7, %o2 ! 32-bit aligned!?!?
bnz,pn %icc, 6f
#endif
btst FPRS_DL, %o5 ! Lower FPU clean?
bz,a,pt %icc, 4f ! Then skip it
add %o0, 128, %o0
membar #Sync
std %f0, [%o0 + FS_REGS + (4*0)] ! f->fs_f0 = etc;
std %f2, [%o0 + FS_REGS + (4*2)]
std %f4, [%o0 + FS_REGS + (4*4)]
std %f6, [%o0 + FS_REGS + (4*6)]
std %f8, [%o0 + FS_REGS + (4*8)]
std %f10, [%o0 + FS_REGS + (4*10)]
std %f12, [%o0 + FS_REGS + (4*12)]
std %f14, [%o0 + FS_REGS + (4*14)]
std %f16, [%o0 + FS_REGS + (4*16)]
std %f18, [%o0 + FS_REGS + (4*18)]
std %f20, [%o0 + FS_REGS + (4*20)]
std %f22, [%o0 + FS_REGS + (4*22)]
std %f24, [%o0 + FS_REGS + (4*24)]
std %f26, [%o0 + FS_REGS + (4*26)]
std %f28, [%o0 + FS_REGS + (4*28)]
std %f30, [%o0 + FS_REGS + (4*30)]
4:
btst FPRS_DU, %o5 ! Upper FPU clean?
bz,pt %icc, 5f ! Then skip it
nop
membar #Sync
std %f32, [%o0 + FS_REGS + (4*32)]
std %f34, [%o0 + FS_REGS + (4*34)]
std %f36, [%o0 + FS_REGS + (4*36)]
std %f38, [%o0 + FS_REGS + (4*38)]
std %f40, [%o0 + FS_REGS + (4*40)]
std %f42, [%o0 + FS_REGS + (4*42)]
std %f44, [%o0 + FS_REGS + (4*44)]
std %f46, [%o0 + FS_REGS + (4*46)]
std %f48, [%o0 + FS_REGS + (4*48)]
std %f50, [%o0 + FS_REGS + (4*50)]
std %f52, [%o0 + FS_REGS + (4*52)]
std %f54, [%o0 + FS_REGS + (4*54)]
std %f56, [%o0 + FS_REGS + (4*56)]
std %f58, [%o0 + FS_REGS + (4*58)]
std %f60, [%o0 + FS_REGS + (4*60)]
std %f62, [%o0 + FS_REGS + (4*62)]
5:
membar #Sync
retl
wr %g0, FPRS_FEF, %fprs ! Mark FPU clean
#ifdef DIAGNOSTIC
!!
!! Damn thing is *NOT* aligned on a 64-bit boundary
!! Damn thing is *NOT* aligned on a 64-byte boundary
!!
6:
wr %g0, FPRS_FEF, %fprs
! XXX -- we should panic instead of silently entering debugger
ta 1
retl
nop
#endif
/*
* Load FPU state.
@ -9762,8 +9673,10 @@ ENTRY(loadfpstate)
wrpr %o1, 0, %pstate
ldx [%o0 + FS_FSR], %fsr ! setfsr(f->fs_fsr);
add %o0, FS_REGS, %o3 ! This is zero...
#ifdef DIAGNOSTIC
btst BLOCK_ALIGN, %o3
bne,pt %icc, 1f ! Only use block loads on aligned blocks
bne,pn %icc, 1f ! Only use block loads on aligned blocks
#endif
wr %o4, %g0, %gsr
membar #Sync
ldda [%o3] ASI_BLK_P, %f0
@ -9776,55 +9689,19 @@ ENTRY(loadfpstate)
membar #Sync ! Make sure loads are complete
retl
wr %g0, FPRS_FEF, %fprs ! Clear dirty bits
1:
#ifdef DIAGNOSTIC
btst 7, %o3
bne,pn %icc, 1f
nop
#endif
/* Unaligned -- needs to be done the long way */
membar #Sync
ldd [%o3 + (4*0)], %f0
ldd [%o3 + (4*2)], %f2
ldd [%o3 + (4*4)], %f4
ldd [%o3 + (4*6)], %f6
ldd [%o3 + (4*8)], %f8
ldd [%o3 + (4*10)], %f10
ldd [%o3 + (4*12)], %f12
ldd [%o3 + (4*14)], %f14
ldd [%o3 + (4*16)], %f16
ldd [%o3 + (4*18)], %f18
ldd [%o3 + (4*20)], %f20
ldd [%o3 + (4*22)], %f22
ldd [%o3 + (4*24)], %f24
ldd [%o3 + (4*26)], %f26
ldd [%o3 + (4*28)], %f28
ldd [%o3 + (4*30)], %f30
ldd [%o3 + (4*32)], %f32
ldd [%o3 + (4*34)], %f34
ldd [%o3 + (4*36)], %f36
ldd [%o3 + (4*38)], %f38
ldd [%o3 + (4*40)], %f40
ldd [%o3 + (4*42)], %f42
ldd [%o3 + (4*44)], %f44
ldd [%o3 + (4*46)], %f46
ldd [%o3 + (4*48)], %f48
ldd [%o3 + (4*50)], %f50
ldd [%o3 + (4*52)], %f52
ldd [%o3 + (4*54)], %f54
ldd [%o3 + (4*56)], %f56
ldd [%o3 + (4*58)], %f58
ldd [%o3 + (4*60)], %f60
ldd [%o3 + (4*62)], %f62
membar #Sync
retl
wr %g0, FPRS_FEF, %fprs ! Clear dirty bits
#ifdef DIAGNOSTIC
!!
!! Damn thing is *NOT* aligned on a 64-byte boundary
!!
1:
wr %g0, FPRS_FEF, %fprs ! Clear dirty bits
! XXX -- we should panic instead of silently entering debugger
ta 1
retl
nop
#endif
/*
* ienab_bis(bis) int bis;
* ienab_bic(bic) int bic;

View File

@ -1,4 +1,4 @@
/* $NetBSD: trap.c,v 1.152 2008/07/10 15:04:42 nakayama Exp $ */
/* $NetBSD: trap.c,v 1.153 2008/07/10 15:23:58 nakayama Exp $ */
/*
* Copyright (c) 1996-2002 Eduardo Horvath. All rights reserved.
@ -50,7 +50,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: trap.c,v 1.152 2008/07/10 15:04:42 nakayama Exp $");
__KERNEL_RCSID(0, "$NetBSD: trap.c,v 1.153 2008/07/10 15:23:58 nakayama Exp $");
#include "opt_ddb.h"
#include "opt_multiprocessor.h"
@ -173,8 +173,7 @@ int trapdebug = 0/*|TDB_SYSCALL|TDB_STOPSIG|TDB_STOPCPIO|TDB_ADDFLT|TDB_FOLLOW*/
* set, no matter how it is interpreted. Appendix N of the Sparc V8 document
* seems to imply that we should do this, and it does make sense.
*/
__asm(".align 64");
const struct fpstate64 initfpstate = {
const struct fpstate64 initfpstate __aligned(BLOCK_SIZE) = {
.fs_regs =
{ ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0,
~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0,
@ -668,7 +667,7 @@ badtrap:
struct fpstate64 *fs = l->l_md.md_fpstate;
if (fs == NULL) {
/* NOTE: fpstate must be 64-bit aligned */
/* NOTE: fpstate must be 64-byte aligned */
fs = pool_cache_get(fpstate_cache, PR_WAITOK);
*fs = initfpstate;
l->l_md.md_fpstate = fs;