Rewrite RUN_P2, RUN_P1 in asm. gcc4 is too happy to optimize away
assigned-goto versions if try to do anything fancy with them (discovered by trying to wrap RUN_P2 into an "if").
This commit is contained in:
parent
4e674399aa
commit
04e0ee32b2
|
@ -1,4 +1,4 @@
|
||||||
/* $NetBSD: cpu.h,v 1.51 2008/02/27 18:26:16 xtraeme Exp $ */
|
/* $NetBSD: cpu.h,v 1.52 2008/03/16 19:14:08 uwe Exp $ */
|
||||||
|
|
||||||
/*-
|
/*-
|
||||||
* Copyright (c) 2002 The NetBSD Foundation, Inc. All rights reserved.
|
* Copyright (c) 2002 The NetBSD Foundation, Inc. All rights reserved.
|
||||||
|
@ -165,26 +165,65 @@ do { \
|
||||||
|
|
||||||
#ifndef __lint__
|
#ifndef __lint__
|
||||||
|
|
||||||
/* switch from P1 to P2 */
|
/*
|
||||||
#define RUN_P2 do { \
|
* Switch from P1 (cached) to P2 (uncached). This used to be written
|
||||||
void *p; \
|
* using gcc's assigned goto extension, but gcc4 aggressive optimizations
|
||||||
p = &&P2; \
|
* tend to optimize that away under certain circumstances.
|
||||||
goto *(void *)SH3_P1SEG_TO_P2SEG(p); \
|
*/
|
||||||
P2: (void)0; \
|
#define RUN_P2 \
|
||||||
|
do { \
|
||||||
|
register uint32_t r0 asm("r0"); \
|
||||||
|
uint32_t pc; \
|
||||||
|
__asm volatile( \
|
||||||
|
" mov.l 1f, %1 ;" \
|
||||||
|
" mova 2f, %0 ;" \
|
||||||
|
" or %0, %1 ;" \
|
||||||
|
" jmp @%1 ;" \
|
||||||
|
" nop ;" \
|
||||||
|
" .align 2 ;" \
|
||||||
|
"1: .long 0x20000000;" \
|
||||||
|
"2:;" \
|
||||||
|
: "=r"(r0), "=r"(pc)); \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
/* switch from P2 to P1 */
|
/*
|
||||||
#define RUN_P1 do { \
|
* Switch from P2 (uncached) back to P1 (cached). We need to be
|
||||||
void *p; \
|
* running on P2 to access cache control, memory-mapped cache and TLB
|
||||||
p = &&P1; \
|
* arrays, etc. and after touching them at least 8 instructinos are
|
||||||
__asm volatile("nop;nop;nop;nop;nop;nop;nop;nop"); \
|
* necessary before jumping to P1, so provide that padding here.
|
||||||
goto *(void *)SH3_P2SEG_TO_P1SEG(p); \
|
*/
|
||||||
P1: (void)0; \
|
#define RUN_P1 \
|
||||||
|
do { \
|
||||||
|
register uint32_t r0 asm("r0"); \
|
||||||
|
uint32_t pc; \
|
||||||
|
__asm volatile( \
|
||||||
|
/*1*/ " mov.l 1f, %1 ;" \
|
||||||
|
/*2*/ " mova 2f, %0 ;" \
|
||||||
|
/*3*/ " nop ;" \
|
||||||
|
/*4*/ " and %0, %1 ;" \
|
||||||
|
/*5*/ " nop ;" \
|
||||||
|
/*6*/ " nop ;" \
|
||||||
|
/*7*/ " nop ;" \
|
||||||
|
/*8*/ " nop ;" \
|
||||||
|
" jmp @%1 ;" \
|
||||||
|
" nop ;" \
|
||||||
|
" .align 2 ;" \
|
||||||
|
"1: .long ~0x20000000;" \
|
||||||
|
"2:;" \
|
||||||
|
: "=r"(r0), "=r"(pc)); \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If RUN_P1 is the last thing we do in a function we can omit it, b/c
|
||||||
|
* we are going to return to a P1 caller anyway, but we still need to
|
||||||
|
* ensure there's at least 8 instructions before jump to P1.
|
||||||
|
*/
|
||||||
|
#define PAD_P1_SWITCH __asm volatile ("nop;nop;nop;nop;nop;nop;nop;nop;")
|
||||||
|
|
||||||
#else /* __lint__ */
|
#else /* __lint__ */
|
||||||
#define RUN_P2 do {} while (/* CONSTCOND */ 0)
|
#define RUN_P2 do {} while (/* CONSTCOND */ 0)
|
||||||
#define RUN_P1 do {} while (/* CONSTCOND */ 0)
|
#define RUN_P1 do {} while (/* CONSTCOND */ 0)
|
||||||
|
#define PAD_P1_SWITCH do {} while (/* CONSTCOND */ 0)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(SH4)
|
#if defined(SH4)
|
||||||
|
|
Loading…
Reference in New Issue