From 3eac605929317c1749d94d65fee9bf6b5d81cee1 Mon Sep 17 00:00:00 2001 From: nakayama Date: Thu, 10 Jul 2008 15:23:58 +0000 Subject: [PATCH] sparc64_ipi_save_fpstate: - use primary MMU context for consistency with other trap/interrupt handlers. sparc64_ipi_save_fpstate, savefpstate: - avoid storing fp registers as we can. sparc64_ipi_save_fpstate, savefpstate, loadfpstate: - remove unaligned case since buffers allocated with pool_cache are ensured 64-byte aligned. Ok by martin@. --- sys/arch/sparc64/include/db_machdep.h | 4 +- sys/arch/sparc64/sparc64/cpu.c | 6 +- sys/arch/sparc64/sparc64/locore.s | 259 +++++++------------------- sys/arch/sparc64/sparc64/trap.c | 9 +- 4 files changed, 77 insertions(+), 201 deletions(-) diff --git a/sys/arch/sparc64/include/db_machdep.h b/sys/arch/sparc64/include/db_machdep.h index e94e1e429a09..d2ab6039ac3a 100644 --- a/sys/arch/sparc64/include/db_machdep.h +++ b/sys/arch/sparc64/include/db_machdep.h @@ -1,4 +1,4 @@ -/* $NetBSD: db_machdep.h,v 1.24 2008/02/29 20:27:07 martin Exp $ */ +/* $NetBSD: db_machdep.h,v 1.25 2008/07/10 15:23:58 nakayama Exp $ */ /* * Mach Operating System @@ -56,7 +56,7 @@ typedef struct { struct frame64 db_fr; struct trapstate db_ts[5]; int db_tl; - struct fpstate64 db_fpstate; + struct fpstate64 db_fpstate __aligned(BLOCK_SIZE); } db_regs_t; /* Current CPU register state */ diff --git a/sys/arch/sparc64/sparc64/cpu.c b/sys/arch/sparc64/sparc64/cpu.c index 48ec1d2cedda..e122a24900e0 100644 --- a/sys/arch/sparc64/sparc64/cpu.c +++ b/sys/arch/sparc64/sparc64/cpu.c @@ -1,4 +1,4 @@ -/* $NetBSD: cpu.c,v 1.76 2008/07/10 15:04:41 nakayama Exp $ */ +/* $NetBSD: cpu.c,v 1.77 2008/07/10 15:23:58 nakayama Exp $ */ /* * Copyright (c) 1996 @@ -52,7 +52,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.76 2008/07/10 15:04:41 nakayama Exp $"); +__KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.77 2008/07/10 15:23:58 nakayama Exp $"); #include "opt_multiprocessor.h" @@ -184,7 +184,7 @@ cpu_reset_fpustate(void) struct fpstate64 *fpstate; struct fpstate64 fps[2]; - /* This needs to be 64-bit aligned */ + /* This needs to be 64-byte aligned */ fpstate = ALIGNFPSTATE(&fps[1]); /* diff --git a/sys/arch/sparc64/sparc64/locore.s b/sys/arch/sparc64/sparc64/locore.s index c3fb2849a2e9..9a03bfa2b346 100644 --- a/sys/arch/sparc64/sparc64/locore.s +++ b/sys/arch/sparc64/sparc64/locore.s @@ -1,4 +1,4 @@ -/* $NetBSD: locore.s,v 1.282 2008/07/02 12:15:19 nakayama Exp $ */ +/* $NetBSD: locore.s,v 1.283 2008/07/10 15:23:58 nakayama Exp $ */ /* * Copyright (c) 1996-2002 Eduardo Horvath @@ -9492,120 +9492,76 @@ ENTRY(sparc64_ipi_save_fpstate) sethi %hi(FPLWP), %g1 LDPTR [%g1 + %lo(FPLWP)], %g3 cmp %g3, %g2 - bne,pn CCCR, 7f ! skip if fplwp != %g2 + bne,pn CCCR, 7f ! skip if fplwp has changed - mov CTX_SECONDARY, %g5 - ldxa [%g5] ASI_DMMU, %g6 - membar #LoadStore - stxa %g0, [%g5] ASI_DMMU - membar #Sync - - LDPTR [%g3 + L_FPSTATE], %g3 - - rdpr %pstate, %g2 ! enable FP before we begin + rdpr %pstate, %g2 ! enable FP before we begin rd %fprs, %g5 wr %g0, FPRS_FEF, %fprs or %g2, PSTATE_PEF, %g2 wrpr %g2, 0, %pstate + LDPTR [%g3 + L_FPSTATE], %g3 stx %fsr, [%g3 + FS_FSR] ! f->fs_fsr = getfsr(); - rd %gsr, %g2 ! Save %gsr st %g2, [%g3 + FS_GSR] - +#if FS_REGS > 0 add %g3, FS_REGS, %g3 +#endif +#ifdef DIAGNOSTIC btst BLOCK_ALIGN, %g3 ! Needs to be re-executed - bnz,pn %icc, 3f ! Check alignment + bnz,pn %icc, 6f ! Check alignment +#endif st %g0, [%g3 + FS_QSIZE - FS_REGS] ! f->fs_qsize = 0; - btst FPRS_DL, %g5 ! Lower FPU clean? - bz,a,pt %icc, 1f ! Then skip it - add %g3, 128, %g3 ! Skip a block + btst FPRS_DL|FPRS_DU, %g5 ! Both FPU halves clean? + bz,pt %icc, 5f ! Then skip it + + mov CTX_PRIMARY, %g2 + ldxa [%g2] ASI_DMMU, %g6 + membar #LoadStore + stxa %g0, [%g2] ASI_DMMU ! Switch MMU to kernel primary context membar #Sync - stda %f0, [%g3] ASI_BLK_S ! f->fs_f0 = etc; + + btst FPRS_DL, %g5 ! Lower FPU clean? + bz,a,pt %icc, 1f ! Then skip it, but upper FPU not clean + add %g3, 2*BLOCK_SIZE, %g3 ! Skip a block + + stda %f0, [%g3] ASI_BLK_P ! f->fs_f0 = etc; inc BLOCK_SIZE, %g3 - stda %f16, [%g3] ASI_BLK_S - inc BLOCK_SIZE, %g3 -1: + stda %f16, [%g3] ASI_BLK_P + btst FPRS_DU, %g5 ! Upper FPU clean? bz,pt %icc, 2f ! Then skip it - nop - - membar #Sync - stda %f32, [%g3] ASI_BLK_S + inc BLOCK_SIZE, %g3 +1: + stda %f32, [%g3] ASI_BLK_P inc BLOCK_SIZE, %g3 - stda %f48, [%g3] ASI_BLK_S + stda %f48, [%g3] ASI_BLK_P 2: membar #Sync ! Finish operation so we can - wr %g0, FPRS_FEF, %fprs ! Mark FPU clean - - mov CTX_SECONDARY, %g5 - STPTR %g0, [%g1 + %lo(FPLWP)] ! fplwp = NULL - stxa %g6, [%g5] ASI_DMMU + brz,pn %g6, 5f ! Skip if context 0 + nop + stxa %g6, [%g2] ASI_DMMU ! Restore primary context membar #Sync +5: + wr %g0, FPRS_FEF, %fprs ! Mark FPU clean + STPTR %g0, [%g1 + %lo(FPLWP)] ! fplwp = NULL 7: IPIEVC_INC(IPI_EVCNT_FPU_SYNCH,%g2,%g3) ba,a ret_from_intr_vector nop -3: -#ifdef DIAGONSTIC - btst 7, %g3 ! 32-bit aligned!?!? - bnz,pn %icc, 6f -#endif - btst FPRS_DL, %g5 ! Lower FPU clean? - bz,a,pt %icc, 4f ! Then skip it - add %g3, 128, %g3 - - membar #Sync - std %f0, [%g3 + FS_REGS + (4*0)] ! f->fs_f0 = etc; - std %f2, [%g3 + FS_REGS + (4*2)] - std %f4, [%g3 + FS_REGS + (4*4)] - std %f6, [%g3 + FS_REGS + (4*6)] - std %f8, [%g3 + FS_REGS + (4*8)] - std %f10, [%g3 + FS_REGS + (4*10)] - std %f12, [%g3 + FS_REGS + (4*12)] - std %f14, [%g3 + FS_REGS + (4*14)] - std %f16, [%g3 + FS_REGS + (4*16)] - std %f18, [%g3 + FS_REGS + (4*18)] - std %f20, [%g3 + FS_REGS + (4*20)] - std %f22, [%g3 + FS_REGS + (4*22)] - std %f24, [%g3 + FS_REGS + (4*24)] - std %f26, [%g3 + FS_REGS + (4*26)] - std %f28, [%g3 + FS_REGS + (4*28)] - std %f30, [%g3 + FS_REGS + (4*30)] -4: - btst FPRS_DU, %g5 ! Upper FPU clean? - bz,pt %icc, 2b ! Then skip it - nop - - membar #Sync - std %f32, [%g3 + FS_REGS + (4*32)] - std %f34, [%g3 + FS_REGS + (4*34)] - std %f36, [%g3 + FS_REGS + (4*36)] - std %f38, [%g3 + FS_REGS + (4*38)] - std %f40, [%g3 + FS_REGS + (4*40)] - std %f42, [%g3 + FS_REGS + (4*42)] - std %f44, [%g3 + FS_REGS + (4*44)] - std %f46, [%g3 + FS_REGS + (4*46)] - std %f48, [%g3 + FS_REGS + (4*48)] - std %f50, [%g3 + FS_REGS + (4*50)] - std %f52, [%g3 + FS_REGS + (4*52)] - std %f54, [%g3 + FS_REGS + (4*54)] - std %f56, [%g3 + FS_REGS + (4*56)] - std %f58, [%g3 + FS_REGS + (4*58)] - std %f60, [%g3 + FS_REGS + (4*60)] - ba 2b - std %f62, [%g3 + FS_REGS + (4*62)] - +#ifdef DIAGNOSTIC !! !! Damn thing is *NOT* aligned on a 64-byte boundary !! 6: wr %g0, FPRS_FEF, %fprs + ! XXX -- we should panic instead of silently entering debugger ta 1 nop ba,a ret_from_intr_vector nop +#endif /* * IPI handler to drop the current FPU state. @@ -9661,92 +9617,47 @@ ENTRY(savefpstate) st %o4, [%o0 + FS_GSR] add %o0, FS_REGS, %o2 +#ifdef DIAGNOSTIC btst BLOCK_ALIGN, %o2 ! Needs to be re-executed - bnz,pn %icc, 3f ! Check alignment + bnz,pn %icc, 6f ! Check alignment +#endif st %g0, [%o0 + FS_QSIZE] ! f->fs_qsize = 0; - btst FPRS_DL, %o5 ! Lower FPU clean? - bz,a,pt %icc, 1f ! Then skip it - add %o2, 128, %o2 ! Skip a block + btst FPRS_DL|FPRS_DU, %o5 ! Both FPU halves clean? + bz,pt %icc, 5f ! Then skip it + btst FPRS_DL, %o5 ! Lower FPU clean? membar #Sync - stda %f0, [%o2] ASI_BLK_COMMIT_P ! f->fs_f0 = etc; + bz,a,pt %icc, 1f ! Then skip it, but upper FPU not clean + add %o2, 2*BLOCK_SIZE, %o2 ! Skip a block + + stda %f0, [%o2] ASI_BLK_P ! f->fs_f0 = etc; inc BLOCK_SIZE, %o2 - stda %f16, [%o2] ASI_BLK_COMMIT_P - inc BLOCK_SIZE, %o2 -1: + stda %f16, [%o2] ASI_BLK_P + btst FPRS_DU, %o5 ! Upper FPU clean? bz,pt %icc, 2f ! Then skip it - nop - - membar #Sync - stda %f32, [%o2] ASI_BLK_COMMIT_P + inc BLOCK_SIZE, %o2 +1: + stda %f32, [%o2] ASI_BLK_P inc BLOCK_SIZE, %o2 - stda %f48, [%o2] ASI_BLK_COMMIT_P + stda %f48, [%o2] ASI_BLK_P 2: membar #Sync ! Finish operation so we can +5: retl wr %g0, FPRS_FEF, %fprs ! Mark FPU clean -3: -#ifdef DIAGONSTIC - btst 7, %o2 ! 32-bit aligned!?!? - bnz,pn %icc, 6f -#endif - btst FPRS_DL, %o5 ! Lower FPU clean? - bz,a,pt %icc, 4f ! Then skip it - add %o0, 128, %o0 - - membar #Sync - std %f0, [%o0 + FS_REGS + (4*0)] ! f->fs_f0 = etc; - std %f2, [%o0 + FS_REGS + (4*2)] - std %f4, [%o0 + FS_REGS + (4*4)] - std %f6, [%o0 + FS_REGS + (4*6)] - std %f8, [%o0 + FS_REGS + (4*8)] - std %f10, [%o0 + FS_REGS + (4*10)] - std %f12, [%o0 + FS_REGS + (4*12)] - std %f14, [%o0 + FS_REGS + (4*14)] - std %f16, [%o0 + FS_REGS + (4*16)] - std %f18, [%o0 + FS_REGS + (4*18)] - std %f20, [%o0 + FS_REGS + (4*20)] - std %f22, [%o0 + FS_REGS + (4*22)] - std %f24, [%o0 + FS_REGS + (4*24)] - std %f26, [%o0 + FS_REGS + (4*26)] - std %f28, [%o0 + FS_REGS + (4*28)] - std %f30, [%o0 + FS_REGS + (4*30)] -4: - btst FPRS_DU, %o5 ! Upper FPU clean? - bz,pt %icc, 5f ! Then skip it - nop - - membar #Sync - std %f32, [%o0 + FS_REGS + (4*32)] - std %f34, [%o0 + FS_REGS + (4*34)] - std %f36, [%o0 + FS_REGS + (4*36)] - std %f38, [%o0 + FS_REGS + (4*38)] - std %f40, [%o0 + FS_REGS + (4*40)] - std %f42, [%o0 + FS_REGS + (4*42)] - std %f44, [%o0 + FS_REGS + (4*44)] - std %f46, [%o0 + FS_REGS + (4*46)] - std %f48, [%o0 + FS_REGS + (4*48)] - std %f50, [%o0 + FS_REGS + (4*50)] - std %f52, [%o0 + FS_REGS + (4*52)] - std %f54, [%o0 + FS_REGS + (4*54)] - std %f56, [%o0 + FS_REGS + (4*56)] - std %f58, [%o0 + FS_REGS + (4*58)] - std %f60, [%o0 + FS_REGS + (4*60)] - std %f62, [%o0 + FS_REGS + (4*62)] -5: - membar #Sync - retl - wr %g0, FPRS_FEF, %fprs ! Mark FPU clean +#ifdef DIAGNOSTIC !! - !! Damn thing is *NOT* aligned on a 64-bit boundary + !! Damn thing is *NOT* aligned on a 64-byte boundary !! 6: wr %g0, FPRS_FEF, %fprs + ! XXX -- we should panic instead of silently entering debugger ta 1 retl nop +#endif /* * Load FPU state. @@ -9762,8 +9673,10 @@ ENTRY(loadfpstate) wrpr %o1, 0, %pstate ldx [%o0 + FS_FSR], %fsr ! setfsr(f->fs_fsr); add %o0, FS_REGS, %o3 ! This is zero... +#ifdef DIAGNOSTIC btst BLOCK_ALIGN, %o3 - bne,pt %icc, 1f ! Only use block loads on aligned blocks + bne,pn %icc, 1f ! Only use block loads on aligned blocks +#endif wr %o4, %g0, %gsr membar #Sync ldda [%o3] ASI_BLK_P, %f0 @@ -9776,55 +9689,19 @@ ENTRY(loadfpstate) membar #Sync ! Make sure loads are complete retl wr %g0, FPRS_FEF, %fprs ! Clear dirty bits -1: -#ifdef DIAGNOSTIC - btst 7, %o3 - bne,pn %icc, 1f - nop -#endif - /* Unaligned -- needs to be done the long way */ - membar #Sync - ldd [%o3 + (4*0)], %f0 - ldd [%o3 + (4*2)], %f2 - ldd [%o3 + (4*4)], %f4 - ldd [%o3 + (4*6)], %f6 - ldd [%o3 + (4*8)], %f8 - ldd [%o3 + (4*10)], %f10 - ldd [%o3 + (4*12)], %f12 - ldd [%o3 + (4*14)], %f14 - ldd [%o3 + (4*16)], %f16 - ldd [%o3 + (4*18)], %f18 - ldd [%o3 + (4*20)], %f20 - ldd [%o3 + (4*22)], %f22 - ldd [%o3 + (4*24)], %f24 - ldd [%o3 + (4*26)], %f26 - ldd [%o3 + (4*28)], %f28 - ldd [%o3 + (4*30)], %f30 - ldd [%o3 + (4*32)], %f32 - ldd [%o3 + (4*34)], %f34 - ldd [%o3 + (4*36)], %f36 - ldd [%o3 + (4*38)], %f38 - ldd [%o3 + (4*40)], %f40 - ldd [%o3 + (4*42)], %f42 - ldd [%o3 + (4*44)], %f44 - ldd [%o3 + (4*46)], %f46 - ldd [%o3 + (4*48)], %f48 - ldd [%o3 + (4*50)], %f50 - ldd [%o3 + (4*52)], %f52 - ldd [%o3 + (4*54)], %f54 - ldd [%o3 + (4*56)], %f56 - ldd [%o3 + (4*58)], %f58 - ldd [%o3 + (4*60)], %f60 - ldd [%o3 + (4*62)], %f62 - membar #Sync - retl - wr %g0, FPRS_FEF, %fprs ! Clear dirty bits +#ifdef DIAGNOSTIC + !! + !! Damn thing is *NOT* aligned on a 64-byte boundary + !! 1: wr %g0, FPRS_FEF, %fprs ! Clear dirty bits + ! XXX -- we should panic instead of silently entering debugger ta 1 retl nop +#endif + /* * ienab_bis(bis) int bis; * ienab_bic(bic) int bic; diff --git a/sys/arch/sparc64/sparc64/trap.c b/sys/arch/sparc64/sparc64/trap.c index ac84ab16ba27..b6bf9d034e2b 100644 --- a/sys/arch/sparc64/sparc64/trap.c +++ b/sys/arch/sparc64/sparc64/trap.c @@ -1,4 +1,4 @@ -/* $NetBSD: trap.c,v 1.152 2008/07/10 15:04:42 nakayama Exp $ */ +/* $NetBSD: trap.c,v 1.153 2008/07/10 15:23:58 nakayama Exp $ */ /* * Copyright (c) 1996-2002 Eduardo Horvath. All rights reserved. @@ -50,7 +50,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: trap.c,v 1.152 2008/07/10 15:04:42 nakayama Exp $"); +__KERNEL_RCSID(0, "$NetBSD: trap.c,v 1.153 2008/07/10 15:23:58 nakayama Exp $"); #include "opt_ddb.h" #include "opt_multiprocessor.h" @@ -173,8 +173,7 @@ int trapdebug = 0/*|TDB_SYSCALL|TDB_STOPSIG|TDB_STOPCPIO|TDB_ADDFLT|TDB_FOLLOW*/ * set, no matter how it is interpreted. Appendix N of the Sparc V8 document * seems to imply that we should do this, and it does make sense. */ -__asm(".align 64"); -const struct fpstate64 initfpstate = { +const struct fpstate64 initfpstate __aligned(BLOCK_SIZE) = { .fs_regs = { ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, @@ -668,7 +667,7 @@ badtrap: struct fpstate64 *fs = l->l_md.md_fpstate; if (fs == NULL) { - /* NOTE: fpstate must be 64-bit aligned */ + /* NOTE: fpstate must be 64-byte aligned */ fs = pool_cache_get(fpstate_cache, PR_WAITOK); *fs = initfpstate; l->l_md.md_fpstate = fs;