diff --git a/sys/arch/sparc64/sparc64/locore.s b/sys/arch/sparc64/sparc64/locore.s index d6627d975cc4..b39a88641c89 100644 --- a/sys/arch/sparc64/sparc64/locore.s +++ b/sys/arch/sparc64/sparc64/locore.s @@ -1,4 +1,4 @@ -/* $NetBSD: locore.s,v 1.119 2001/06/30 00:08:15 eeh Exp $ */ +/* $NetBSD: locore.s,v 1.120 2001/06/30 19:09:38 eeh Exp $ */ /* * Copyright (c) 1996-2001 Eduardo Horvath @@ -304,6 +304,69 @@ #define STACKFRAME(size) TO_STACK32(size) #endif +/* + * The following routines allow fpu use in the kernel. + * + * They allocate a stack frame and use all local regs. Extra + * local storage can be requested by setting the siz parameter, + * and can be accessed at %sp+CC64FSZ. + */ +#define ENABLE_FPU(siz) \ + save %sp, -(CC64FSZ), %sp; /* Allocate a stack frame */ \ + sethi %hi(FPPROC), %l1; \ + add %fp, STKB-FS_SIZE, %l0; /* Allocate a fpstate */ \ + LDPTR [%l1 + %lo(FPPROC)], %l2; /* Load fpproc */ \ + andn %l0, BLOCK_SIZE, %l0; /* Align it */ \ + brz,pt %l2, 1f; /* fpproc == NULL? */ \ + add %l0, -STKB-CC64FSZ-(siz), %sp; /* Set proper %sp */ \ + LDPTR [%l2 + P_FPSTATE], %l3; \ + brz,pn %l3, 1f; /* Make sure we have an fpstate */ \ + mov %l3, %o0; \ + call _C_LABEL(savefpstate); /* Save the old fpstate */ \ + set EINTSTACK-STKB, %l4; /* Are we on intr stack? */ \ + cmp %sp, %l4; \ + bgu,pt %xcc, 1f; \ + set INTSTACK-STKB, %l4; \ + cmp %sp, %l4; \ + blu %xcc, 1f; \ +0: \ + sethi %hi(_C_LABEL(proc0)), %l4; /* Yes, use proc0 */ \ + ba,pt %xcc, 2f; /* XXXX needs to change to CPUs idle proc */ \ + or %l4, %lo(_C_LABEL(proc0)), %l5; \ +1: \ + sethi %hi(CURPROC), %l4; /* Use curproc */ \ + LDPTR [%l4 + %lo(CURPROC)], %l5; \ + brz,pn %l5, 0b; /* If curproc is NULL need to use proc0 */ \ +2: \ + LDPTR [%l5 + P_FPSTATE], %l6; /* Save old fpstate */ \ + STPTR %l0, [%l5 + P_FPSTATE]; /* Insert new fpstate */ \ + STPTR %l5, [%l1 + %lo(FPPROC)]; /* Set new fpproc */ \ + wr %g0, FPRS_FEF, %fprs /* Enable FPU */ + +/* + * Weve saved our possible fpstate, now disable the fpu + * and continue with life. + */ +#ifdef DEBUG +#define __CHECK_FPU \ + LDPTR [%l5 + P_FPSTATE], %l7; \ + cmp %l7, %l0; \ + tnz 1; +#else +#define __CHECK_FPU +#endif + +#define RESTORE_FPU \ + __CHECK_FPU \ + andcc %l2, %l3, %g0; /* If (fpproc && fpstate) */ \ + STPTR %l2, [%l1 + %lo(FPPROC)]; /* Restore old fproc */ \ + bz,pt %xcc, 1f; /* Skip if no fpstate */ \ + STPTR %l6, [%l5 + P_FPSTATE]; /* Restore old fpstate */ \ + \ + call _C_LABEL(loadfpstate); /* Re-load orig fpstate */ \ + mov %l3, %o0; +1: + .data .globl _C_LABEL(data_start) @@ -9347,10 +9410,6 @@ ENTRY(bcopy) /* src, dest, size */ _ALIGN .text 3: -#endif -#if 1 - cmp %o2, 256 - bge Lbcopy_block #endif cmp %o2, BCOPY_SMALL Lbcopy_start: @@ -9386,10 +9445,14 @@ Lbcopy_fancy: btst 7, %o1 be,a Lbcopy_doubles dec 8, %o2 ! if all lined up, len -= 8, goto bcopy_doubes - - ! If the low bits match, we can make these line up. 1: - xor %o0, %o1, %o3 ! t = src ^ dst; +#if 0 + ! If it is big enough, use VIS instructions + cmp %o2, 256 + bge Lbcopy_block +#endif + ! If the low bits match, we can make these line up. + xor %o0, %o1, %o3 ! t = src ^ dst; btst 1, %o3 ! if (t & 1) { be 1f btst 1, %o0 ! [delay slot: if (src & 1)] @@ -9524,13 +9587,11 @@ Lbcopy_done: Lbcopy_block: !! Make sure our trap table is installed -# ba,a,pt %icc, Lbcopy_start rdpr %tba, %o3 set _C_LABEL(trapbase), %o5 sub %o3, %o5, %o3 brnz,pn %o3, Lbcopy_start ! No, then don't use block load/store nop -#define _KERNEL #ifdef _KERNEL /* * Kernel: @@ -9567,7 +9628,8 @@ Lbcopy_block: * * %l0 XXXX DEBUG old fpstate * %l1 fpproc (hi bits only) - * %l2 old fpproc + * %l2 orig fpproc + * %l3 orig fpstate * %l5 curproc * %l6 old fpstate * @@ -9588,6 +9650,9 @@ Lbcopy_block: !! This code will allow us to save the fpstate around this !! routine and nest FP use in the kernel !! +#if 1 + ENABLE_FPU(0) +#else save %sp, -(CC64FSZ+FS_SIZE+BLOCK_SIZE), %sp ! Allocate an fpstate sethi %hi(FPPROC), %l1 LDPTR [%l1 + %lo(FPPROC)], %l2 ! Load fpproc @@ -9617,7 +9682,7 @@ Lbcopy_block: STPTR %l0, [%l5 + P_FPSTATE] ! Insert new fpstate STPTR %l5, [%l1 + %lo(FPPROC)] ! Set new fpproc wr %g0, FPRS_FEF, %fprs ! Enable FPU - +#endif mov %i0, %o0 ! Src addr. mov %i1, %o1 ! Store our dest ptr here. mov %i2, %o2 ! Len counter @@ -9628,7 +9693,7 @@ Lbcopy_block: mov %i2, %o2 ! Len counter #endif - + !! !! First align the output to a 64-bit entity !! @@ -9679,7 +9744,7 @@ Lbcopy_block: stda %f4, [%o1] ASI_FL16_P ! Store 1st short dec 2, %o2 inc 2, %o1 - inc 2, %o0 ! XXXX + inc 2, %o0 4: brz,pn %o2, Lbcopy_blockfinish ! XXXX @@ -9706,7 +9771,7 @@ Lbcopy_block: st %f5, [%o1] ! Store word dec 4, %o2 inc 4, %o1 - inc 4, %o0 ! XXXX + inc 4, %o0 4: brz,pn %o2, Lbcopy_blockfinish ! XXXX !! @@ -9716,7 +9781,7 @@ Lbcopy_block_common: mov -0, %o4 alignaddr %o0, %o4, %o4 ! base - shift - + brz,pt %g1, 1f ! Data loaded? cmp %o3, %o4 ! Addresses same? beq,pt %xcc, 3f @@ -9793,12 +9858,14 @@ Lbcopy_block_aligned64: * store. * */ -#if 0 +#if 1 /* XXXX DEBUG -- return which routine we used instead of *src */ and %o0, BLOCK_ALIGN, %o3 - set Lbcopy_blocknames, %g7 + set Lbcopy_blocknames, %g1 + ldx [%g1 + %o3], %g1 + set block_routine, %o3 ba 1f - ldx [%g7 + %o3], %g7 + stx %g1, [%o3] #define BL_NAME(x) x: .asciz #x .align 8 @@ -9811,14 +9878,14 @@ Lbcopy_blocknames: .xword 105f .xword 106f .xword 107f - BL_NAME(100) - BL_NAME(101) - BL_NAME(102) - BL_NAME(103) - BL_NAME(104) - BL_NAME(105) - BL_NAME(106) - BL_NAME(107) +100: .asciz "L100" +101: .asciz "L101" +102: .asciz "L102" +103: .asciz "L103" +104: .asciz "L104" +105: .asciz "L105" +106: .asciz "L106" +107: .asciz "L107" .align 8 1: #endif @@ -9898,14 +9965,14 @@ Lbcopy_block_jmp: L100: fmovd %f0 , %f62 ldda [%o0] ASI_BLK_P, %f0 - inc BLOCK_SIZE, %o0 + inc BLOCK_SIZE, %o0 cmp %o0, %g2 bgu,a,pn %icc, 3f membar #Sync ldda [%o0] ASI_BLK_P, %f16 3: faligndata %f62, %f0, %f32 - inc BLOCK_SIZE, %o0 + inc BLOCK_SIZE, %o0 faligndata %f0, %f2, %f34 dec BLOCK_SIZE, %o2 faligndata %f2, %f4, %f36 @@ -9962,7 +10029,7 @@ L100: bgu,a,pn %icc, 2f membar #Sync - ldda [%o0] ASI_BLK_P, %f16 + ldda [%o0] ASI_BLK_P, %f16 ! Increment is at top 2: stda %f32, [%o1] ASI_STORE ba 3b @@ -10206,7 +10273,6 @@ L103: faligndata %f50, %f52, %f46 stda %f32, [%o1] ASI_STORE - inc BLOCK_SIZE, %o1 faligndata %f52, %f54, %f32 dec BLOCK_SIZE, %o2 @@ -10214,6 +10280,7 @@ L103: cmp %o0, %g2 faligndata %f56, %f58, %f36 faligndata %f58, %f60, %f38 + inc BLOCK_SIZE, %o1 faligndata %f60, %f62, %f40 bgu,a,pn %icc, 2f membar #Sync @@ -10570,6 +10637,7 @@ Lbcopy_blockdone: FINISH_REG(%f42) FINISH_REG(%f44) FINISH_REG(%f46) +#undef FINISH_REG !! !! The low 3 bits have the sub-word bits needed to be !! stored [because (x-8)&0x7 == x]. @@ -10616,6 +10684,9 @@ Lbcopy_blockfinish: * Weve saved our possible fpstate, now disable the fpu * and continue with life. */ +#if 1 + RESTORE_FPU +#else #ifdef DEBUG LDPTR [%l1 + %lo(FPPROC)], %l7 cmp %l7, %l5 @@ -10624,9 +10695,15 @@ Lbcopy_blockfinish: cmp %l7, %l0 tnz 1 ! fpstate has changed! #endif - STPTR %g0, [%l1 + %lo(FPPROC)] ! Clear fpproc - STPTR %l6, [%l5 + P_FPSTATE] ! Restore old fpstate - wr %g0, 0, %fprs ! Disable FPU + andcc %l2, %l3, %g0 ! If (fpproc && fpstate) + STPTR %l2, [%l1 + %lo(FPPROC)] ! Restore old fproc + bz,pt %xcc, 1f ! Skip if no fpstate + STPTR %l6, [%l5 + P_FPSTATE] ! Restore old fpstate + + call _C_LABEL(loadfpstate) ! Re-load orig fpstate + mov %l3, %o0 +1: +#endif ret restore %g7, 0, %o0 ! Return DEST for memcpy #endif