From 1609107a4587560a3464caa759c31358d1a0ed15 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Revol?= Date: Mon, 22 Oct 2007 00:48:01 +0000 Subject: [PATCH] - some swap asm. - 64bit atomic ops. git-svn-id: file:///srv/svn/repos/haiku/haiku/trunk@22657 a95241bf-73f2-0310-859d-f6bbb57e9c96 --- src/system/libroot/os/arch/m68k/atomic.S | 152 +++++++++++++++++--- src/system/libroot/os/arch/m68k/byteorder.S | 54 ++++--- 2 files changed, 170 insertions(+), 36 deletions(-) diff --git a/src/system/libroot/os/arch/m68k/atomic.S b/src/system/libroot/os/arch/m68k/atomic.S index bbe0c428ca..fcff4d5f45 100644 --- a/src/system/libroot/os/arch/m68k/atomic.S +++ b/src/system/libroot/os/arch/m68k/atomic.S @@ -52,9 +52,8 @@ miss3: move.l %d0,%d1 FUNCTION(atomic_set): move.l (4,%a7),%a0 move.l (%a0),%d0 -miss4: move.l %d0,%d1 move.l (8,%a7),%d1 - cas.l %d0,%d1,(%a0) +miss4: cas.l %d0,%d1,(%a0) bne miss4 rts @@ -65,22 +64,143 @@ FUNCTION(atomic_test_and_set): move.l (8,%a7),%d1 move.l (12,%a7),%d0 cas.l %d0,%d1,(%a0) -//XXX: rts -lost5: lwarx %r6, 0, %r3 - cmpw %r6, %r5 - bne out5 - stwcx. %r4, 0, %r3 -out5: mr %r3, %r6 - blr - /* int atomic_get(int *value) - * r3 */ FUNCTION(atomic_get): -lost6: lwarx %r5, 0, %r3 - stwcx. %r5, 0, %r3 - bne- lost6 - mr %r3, %r5 - blr + move.l (4,%a7),%a0 + move.l (%a0),%d0 + move.l %d0,%d1 + cas.l %d0,%d1,(%a0) + // we must use cas... so we change to the same value if matching, + // else we get the correct one anyway + rts + +/* m68k elf convention is to return structs in (a0) */ +/* other conventions use d0/d1 but which is MSB ?? */ + +/* int64 atomic_add64(vint64 *value, int64 addValue) */ +FUNCTION(atomic_add64): + move.l %d2,-(%a7) + move.l %d3,-(%a7) + move.l (4,%a7),%a1 + // addValue + move.l (12,%a7),%d2 /*LSB*/ + move.l (8,%a7),%d3 /*MSB*/ +miss5: // old value + move.l (4,%a1),%d0 /*LSB*/ + move.l (%a1),%d1 /*MSB*/ + add.l %d0,%d2 + addx.l %d1,%d3 + cas2.l %d0:%d1,%d2:%d3,(4,%a1):(%a1) + bne miss5 + // return value + move.l %d0,(4,%a0) + move.l %d1,(%a0) + move.l (%a7)+,%d3 + move.l (%a7)+,%d2 + rts + +/* int64 atomic_and64(vint64 *value, int64 andValue) */ +FUNCTION(atomic_and64): + move.l %d2,-(%a7) + move.l %d3,-(%a7) + move.l (4,%a7),%a1 + // addValue + move.l (12,%a7),%d2 /*LSB*/ + move.l (8,%a7),%d3 /*MSB*/ +miss6: // old value + move.l (4,%a1),%d0 /*LSB*/ + move.l (%a1),%d1 /*MSB*/ + and.l %d0,%d2 + and.l %d1,%d3 + cas2.l %d0:%d1,%d2:%d3,(4,%a1):(%a1) + bne miss6 + // return value + move.l %d0,(4,%a0) + move.l %d1,(%a0) + move.l (%a7)+,%d3 + move.l (%a7)+,%d2 + rts + +/* int64 atomic_or64(vint64 *value, int64 orValue) */ +FUNCTION(atomic_or64): + move.l %d2,-(%a7) + move.l %d3,-(%a7) + move.l (4,%a7),%a1 + // addValue + move.l (12,%a7),%d2 /*LSB*/ + move.l (8,%a7),%d3 /*MSB*/ +miss7: // old value + move.l (4,%a1),%d0 /*LSB*/ + move.l (%a1),%d1 /*MSB*/ + or.l %d0,%d2 + or.l %d1,%d3 + cas2.l %d0:%d1,%d2:%d3,(4,%a1):(%a1) + bne miss7 + // return value + move.l %d0,(4,%a0) + move.l %d1,(%a0) + move.l (%a7)+,%d3 + move.l (%a7)+,%d2 + rts + +/* int64 atomic_set64(vint64 *value, int64 newValue) */ +FUNCTION(atomic_set64): + move.l %d2,-(%a7) + move.l %d3,-(%a7) + move.l (4,%a7),%a1 + // new value + move.l (12,%a7),%d2 /*LSB*/ + move.l (8,%a7),%d3 /*MSB*/ + // old value + move.l (4,%a1),%d0 /*LSB*/ + move.l (%a1),%d1 /*MSB*/ +miss8: cas2.l %d0:%d1,%d2:%d3,(4,%a1):(%a1) + bne miss8 + // return value + move.l %d0,(4,%a0) + move.l %d1,(%a0) + move.l (%a7)+,%d3 + move.l (%a7)+,%d2 + rts + +/* int64 atomic_test_and_set64(vint64 *value, int64 newValue, int64 testAgainst) */ +FUNCTION(atomic_test_and_set64): + move.l %d2,-(%a7) + move.l %d3,-(%a7) + move.l (4,%a7),%a1 + // new value + move.l (12,%a7),%d2 /*LSB*/ + move.l (8,%a7),%d3 /*MSB*/ + // test against value + move.l (20,%a7),%d0 /*LSB*/ + move.l (16,%a7),%d1 /*MSB*/ + cas2.l %d0:%d1,%d2:%d3,(4,%a1):(%a1) + // return value + move.l %d0,(4,%a0) + move.l %d1,(%a0) + move.l (%a7)+,%d3 + move.l (%a7)+,%d2 + rts + +/* int64 atomic_get64(vint64 *value) */ +FUNCTION(atomic_get64): + move.l %d2,-(%a7) + move.l %d3,-(%a7) + move.l (4,%a7),%a1 + move.l (4,%a1),%d0 /*LSB*/ + move.l (%a1),%d1 /*MSB*/ + move.l %d0,%d2 + move.l %d1,%d3 + // we must use cas... so we change to the same value if matching, + // else we get the correct one anyway + cas2.l %d0:%d1,%d2:%d3,(4,%a1):(%a1) + // return value + move.l %d0,(4,%a0) + move.l %d1,(%a0) + move.l (%a7)+,%d3 + move.l (%a7)+,%d2 + rts + diff --git a/src/system/libroot/os/arch/m68k/byteorder.S b/src/system/libroot/os/arch/m68k/byteorder.S index 8f1b1536b8..9c5d87eb75 100644 --- a/src/system/libroot/os/arch/m68k/byteorder.S +++ b/src/system/libroot/os/arch/m68k/byteorder.S @@ -8,38 +8,50 @@ .text /* uint16 __swap_int16(uint16 value) - * r3 */ FUNCTION(__swap_int16): - rlwinm %r4, %r3, 8, 16, 23 // byte 4 -> byte 3 (clear other bits) - rlwimi %r4, %r3, 24, 24, 31 // byte 3 -> byte 4 (copy into) - mr %r3, %r4 // copy to result register - blr + moveq.l #0,%d0 + move.b (5,%a7),%d0 + lsl.w #8,%d0 + move.b (4,%a7),%d0 + rts /* uint32 __swap_int32(uint32 value) - * r3 */ FUNCTION(__swap_int32): - rlwinm %r4, %r3, 24, 0, 31 // byte 4 to 1, byte 2 to 3 - rlwimi %r4, %r3, 8, 8, 15 // byte 3 to 2 - rlwimi %r4, %r3, 8, 24, 31 // byte 1 to 4 - mr %r3, %r4 - blr +/* moveq.l #0,%d0*/ + move.b (7,%a7),%d0 + lsl.w #8,%d0 + move.b (6,%a7),%d0 + lsl.w #8,%d0 + move.b (5,%a7),%d0 + lsl.w #8,%d0 + move.b (4,%a7),%d0 + rts /* uint64 __swap_int64(uint64 value) - * r3/r4 */ FUNCTION(__swap_int64): - rlwinm %r5, %r3, 24, 0, 31 // byte 4 to 5, byte 2 to 7 - rlwimi %r5, %r3, 8, 8, 15 // byte 3 to 6 - rlwimi %r5, %r3, 8, 24, 31 // byte 1 to 8 - rlwinm %r3, %r4, 24, 0, 31 // byte 8 to 1, byte 6 to 3 - rlwimi %r3, %r4, 8, 8, 15 // byte 7 to 2 - rlwimi %r3, %r4, 8, 24, 31 // byte 5 to 4 - mr %r4, %r5 // copy lower 32 bits - blr +/* moveq.l #0,%d0*/ + move.b (7,%a7),%d0 + lsl.w #8,%d0 + move.b (6,%a7),%d0 + lsl.w #8,%d0 + move.b (5,%a7),%d0 + lsl.w #8,%d0 + move.b (4,%a7),%d0 + move.l %d0,(%a0) + move.b (11,%a7),%d0 + lsl.w #8,%d0 + move.b (10,%a7),%d0 + lsl.w #8,%d0 + move.b (9,%a7),%d0 + lsl.w #8,%d0 + move.b (8,%a7),%d0 + move.l %d0,(4,%a0) + rts /* TODO: The following functions can surely be optimized. A simple optimization @@ -51,6 +63,7 @@ FUNCTION(__swap_int64): * f1 */ FUNCTION(__swap_float): +XXX: TODO // push a stack frame stwu %r1, -32(%r1) mflr %r0 @@ -77,6 +90,7 @@ FUNCTION(__swap_float): * f1 */ FUNCTION(__swap_double): +XXX: TODO // push a stack frame stwu %r1, -32(%r1) mflr %r0