diff --git a/sys/arch/sh3/include/cpu.h b/sys/arch/sh3/include/cpu.h index 2806afbb9c5b..94a775ee81ee 100644 --- a/sys/arch/sh3/include/cpu.h +++ b/sys/arch/sh3/include/cpu.h @@ -1,4 +1,4 @@ -/* $NetBSD: cpu.h,v 1.51 2008/02/27 18:26:16 xtraeme Exp $ */ +/* $NetBSD: cpu.h,v 1.52 2008/03/16 19:14:08 uwe Exp $ */ /*- * Copyright (c) 2002 The NetBSD Foundation, Inc. All rights reserved. @@ -165,26 +165,65 @@ do { \ #ifndef __lint__ -/* switch from P1 to P2 */ -#define RUN_P2 do { \ - void *p; \ - p = &&P2; \ - goto *(void *)SH3_P1SEG_TO_P2SEG(p); \ - P2: (void)0; \ +/* + * Switch from P1 (cached) to P2 (uncached). This used to be written + * using gcc's assigned goto extension, but gcc4 aggressive optimizations + * tend to optimize that away under certain circumstances. + */ +#define RUN_P2 \ + do { \ + register uint32_t r0 asm("r0"); \ + uint32_t pc; \ + __asm volatile( \ + " mov.l 1f, %1 ;" \ + " mova 2f, %0 ;" \ + " or %0, %1 ;" \ + " jmp @%1 ;" \ + " nop ;" \ + " .align 2 ;" \ + "1: .long 0x20000000;" \ + "2:;" \ + : "=r"(r0), "=r"(pc)); \ } while (0) -/* switch from P2 to P1 */ -#define RUN_P1 do { \ - void *p; \ - p = &&P1; \ - __asm volatile("nop;nop;nop;nop;nop;nop;nop;nop"); \ - goto *(void *)SH3_P2SEG_TO_P1SEG(p); \ - P1: (void)0; \ +/* + * Switch from P2 (uncached) back to P1 (cached). We need to be + * running on P2 to access cache control, memory-mapped cache and TLB + * arrays, etc. and after touching them at least 8 instructinos are + * necessary before jumping to P1, so provide that padding here. + */ +#define RUN_P1 \ + do { \ + register uint32_t r0 asm("r0"); \ + uint32_t pc; \ + __asm volatile( \ + /*1*/ " mov.l 1f, %1 ;" \ + /*2*/ " mova 2f, %0 ;" \ + /*3*/ " nop ;" \ + /*4*/ " and %0, %1 ;" \ + /*5*/ " nop ;" \ + /*6*/ " nop ;" \ + /*7*/ " nop ;" \ + /*8*/ " nop ;" \ + " jmp @%1 ;" \ + " nop ;" \ + " .align 2 ;" \ + "1: .long ~0x20000000;" \ + "2:;" \ + : "=r"(r0), "=r"(pc)); \ } while (0) +/* + * If RUN_P1 is the last thing we do in a function we can omit it, b/c + * we are going to return to a P1 caller anyway, but we still need to + * ensure there's at least 8 instructions before jump to P1. + */ +#define PAD_P1_SWITCH __asm volatile ("nop;nop;nop;nop;nop;nop;nop;nop;") + #else /* __lint__ */ -#define RUN_P2 do {} while (/* CONSTCOND */ 0) -#define RUN_P1 do {} while (/* CONSTCOND */ 0) +#define RUN_P2 do {} while (/* CONSTCOND */ 0) +#define RUN_P1 do {} while (/* CONSTCOND */ 0) +#define PAD_P1_SWITCH do {} while (/* CONSTCOND */ 0) #endif #if defined(SH4)