From 6f14ecb4259868f79bcf9f4af89f055749b9e31f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Revol?= Date: Mon, 27 Jul 2009 16:27:13 +0000 Subject: [PATCH] [GSoC] [ARM] Patch by Johannes Wischert. Misc. Assembler functions (atomic ops, byte swapping, longjmp and libc). git-svn-id: file:///srv/svn/repos/haiku/haiku/trunk@31822 a95241bf-73f2-0310-859d-f6bbb57e9c96 --- src/system/libroot/os/arch/arm/atomic.S | 225 ++++++++++++++++++ src/system/libroot/os/arch/arm/byteorder.S | 52 ++++ src/system/libroot/os/arch/arm/system_time.c | 18 ++ .../libroot/posix/arch/arm/siglongjmp.S | 22 ++ src/system/libroot/posix/arch/arm/sigsetjmp.S | 36 +++ .../posix/string/arch/arm/arch_string.S | 156 ++++++++++++ 6 files changed, 509 insertions(+) create mode 100644 src/system/libroot/os/arch/arm/atomic.S create mode 100644 src/system/libroot/os/arch/arm/byteorder.S create mode 100644 src/system/libroot/os/arch/arm/system_time.c create mode 100644 src/system/libroot/posix/arch/arm/siglongjmp.S create mode 100644 src/system/libroot/posix/arch/arm/sigsetjmp.S create mode 100644 src/system/libroot/posix/string/arch/arm/arch_string.S diff --git a/src/system/libroot/os/arch/arm/atomic.S b/src/system/libroot/os/arch/arm/atomic.S new file mode 100644 index 0000000000..089d85ab6c --- /dev/null +++ b/src/system/libroot/os/arch/arm/atomic.S @@ -0,0 +1,225 @@ +/* +** Copyright 2003, Axel Dörfler, axeld@pinc-software.de. All rights reserved. +** Distributed under the terms of the OpenBeOS License. +*/ + +#include + + +.text + +/* int atomic_add(int *value, int increment) + */ +FUNCTION(atomic_add): +#if ARM_ISA_ARMV6 || ARM_ISA_ARMV7 +miss1: ldrex r12, [r0] + add r2, r12, r1 + strex r3, r2, [r0] + teq r3, #0 + bne miss1 + mov r0, r12 + mov pc, lr +#else + /* disable interrupts, do the add, and reenable */ + mrs r2, cpsr + mov r12, r2 + orr r2, r2, #(3<<6) + msr cpsr_c, r2 + + /* ints disabled, old cpsr state in r12 */ + + /* do the add, leave the previous value in r0 */ + mov r3, r0 + ldr r0, [r3] + add r2, r0, r1 + str r2, [r3] + + /* restore interrupts and exit */ + msr cpsr_c, r12 + bx lr +#endif +FUNCTION_END(atomic_add) + + + + +/* int atomic_and(int *value, int andValue) + */ +FUNCTION(atomic_and): +#if ARM_ISA_ARMV6 || ARM_ISA_ARMV7 +miss2: ldrex r12, [r0] + and r2, r12, r1 + strex r3, r2, [r0] + teq r3, #0 + bne miss2 + mov r0, r12 + mov pc, lr +#else + /* disable interrupts, do the and, and reenable */ + mrs r2, cpsr + mov r12, r2 + orr r2, r2, #(3<<6) + msr cpsr_c, r2 + + /* ints disabled, old cpsr state in r12 */ + + /* do the and, leave the previous value in r0 */ + mov r3, r0 + ldr r0, [r3] + and r2, r0, r1 + str r2, [r3] + + /* restore interrupts and exit */ + msr cpsr_c, r12 + bx lr +#endif + +FUNCTION_END(atomic_and) + +/* int atomic_or(int *value, int orValue) + */ +FUNCTION(atomic_or): +#if ARM_ISA_ARMV6 || ARM_ISA_ARMV7 + +miss3: ldrex r12, [r0] + eor r2, r12, r1 + strex r3, r2, [r0] + teq r3, #0 + bne miss3 + mov r0, r12 + mov pc, lr +#else + /* disable interrupts, do the or, and reenable */ + mrs r2, cpsr + mov r12, r2 + orr r2, r2, #(3<<6) + msr cpsr_c, r2 + + /* ints disabled, old cpsr state in r12 */ + + /* do the or, leave the previous value in r0 */ + mov r3, r0 + ldr r0, [r3] + orr r2, r0, r1 + str r2, [r3] + + /* restore interrupts and exit */ + msr cpsr_c, r12 + bx lr +#endif + +FUNCTION_END(atomic_or) + +/* int atomic_set(int *value, int setTo) + */ +FUNCTION(atomic_set): +miss4: ldrex r12, [r0] + strex r3, r1, [r0] + teq r3, #0 + bne miss4 + mov pc, lr +FUNCTION_END(atomic_set) + +/* int atomic_test_and_set(int *value, int setTo, int testValue) + */ +FUNCTION(atomic_test_and_set): +/*miss5: ldrex r12, [r0] @ load from the address and mark it exclusive + cmp r12, r2 @ compare the value with the comperand(r2) + strexeq r3, r1, [r0] @ if they were equal, attempt to store the new value (r1) + bne differ @ if they were not equal jump to (differ) which clears the exclusive tag on the address and returns< + cmp r3, #1 @ check the status of the store (returned in r3) + beq miss5 @ go back to the start if it failed (0=success, 1=failure) + bne same @ if it succeeded, jump to (same) and return. there is no need to clrex if strex succeeded +differ: clrex @ clrex +same: mov r0, r12*/ +#warning TODO TODO TODO + mov pc, lr +FUNCTION_END(atomic_test_and_set) + +/* int atomic_get(int *value) + */ +#warning ARM:atomic_get ich nix verstehen !!!!!!!!!!!! +FUNCTION(atomic_get): + mov pc, lr +FUNCTION_END(atomic_get) + + +/* int64 atomic_add64(vint64 *value, int64 addValue) */ +//FUNCTION(atomic_add64): +// movem.l %d2-%d3/%a2,-(%a7) +// move.l (4,%a7),%a2 +// lea.l (4,%a2),%a1 +// // addValue +// move.l (12,%a7),%d3 /*LSB*/ +// move.l (8,%a7),%d2 /*MSB*/ +//miss5: // old value +// move.l (%a1),%d1 /*LSB*/ +// move.l (%a2),%d0 /*MSB*/ +// add.l %d1,%d3 +// addx.l %d0,%d2 +// cas2.l %d0:%d1,%d2:%d3,(%a2):(%a1) +// bne miss5 +// // return value d0:d1 +// movem.l (%a7)+,%d2-%d3/%a2 +// rts +//FUNCTION_END(atomic_add64) + +/* int64 atomic_and64(vint64 *value, int64 andValue) */ +//FUNCTION(atomic_and64): +//FUNCTION_END(atomic_and64) + +/* int64 atomic_or64(vint64 *value, int64 orValue) */ +//FUNCTION(atomic_or64): +//FUNCTION_END(atomic_or64) + +/* int64 atomic_set64(vint64 *value, int64 newValue) */ +//FUNCTION(atomic_set64): +// movem.l %d2-%d3/%a2,-(%a7) +// move.l (4,%a7),%a2 +// lea.l (4,%a2),%a1 +// // new value +// move.l (12,%a7),%d3 /*LSB*/ +// move.l (8,%a7),%d2 /*MSB*/ +// // old value +// move.l (%a1),%d1 /*LSB*/ +// move.l (%a2),%d0 /*MSB*/ +//miss8: cas2.l %d0:%d1,%d2:%d3,(%a2):(%a1) +// bne miss8 +// // return value d0:d1 +// movem.l (%a7)+,%d2-%d3/%a2 +// rts +//FUNCTION_END(atomic_set64) + +/* int64 atomic_test_and_set64(vint64 *value, int64 newValue, int64 testAgainst) */ +//FUNCTION(atomic_test_and_set64): +// movem.l %d2-%d3/%a2,-(%a7) +// move.l (4,%a7),%a2 +// lea.l (4,%a2),%a1 +// // new value +// move.l (12,%a7),%d3 /*LSB*/ +// move.l (8,%a7),%d2 /*MSB*/ +// // test against value +// move.l (20,%a7),%d1 /*LSB*/ +// move.l (16,%a7),%d0 /*MSB*/ +// cas2.l %d0:%d1,%d2:%d3,(%a2):(%a1) +// // return value d0:d1 +// movem.l (%a7)+,%d2-%d3/%a2 +// rts +//FUNCTION_END(atomic_test_and_set64) + +/* int64 atomic_get64(vint64 *value) */ +//FUNCTION(atomic_get64): +// movem.l %d2-%d3/%a2,-(%a7) +// move.l (4,%a7),%a2 +// lea.l (4,%a2),%a1 +// move.l (%a1),%d1 /*LSB*/ +// move.l (%a2),%d0 /*MSB*/ +// move.l %d1,%d3 +// move.l %d0,%d2 +// // we must use cas... so we change to the same value if matching, +// // else we get the correct one anyway +// cas2.l %d0:%d1,%d2:%d3,(%a2):(%a1) +// // return value +// movem.l (%a7)+,%d2-%d3/%a2 +// rts +//FUNCTION_END(atomic_get64) diff --git a/src/system/libroot/os/arch/arm/byteorder.S b/src/system/libroot/os/arch/arm/byteorder.S new file mode 100644 index 0000000000..ce5ccc2ffb --- /dev/null +++ b/src/system/libroot/os/arch/arm/byteorder.S @@ -0,0 +1,52 @@ +/* +** Copyright 2003, Axel D�fler, axeld@pinc-software.de. All rights reserved. +** Distributed under the terms of the OpenBeOS License. +*/ + +#include + +.text + +/* uint16 __swap_int16(uint16 value) + */ +FUNCTION(__swap_int16): + rev16 r0,r0; +FUNCTION_END(__swap_int16) + +/* uint32 __swap_int32(uint32 value) + */ +FUNCTION(__swap_int32): + rev r0,r0; +FUNCTION_END(__swap_int32) + +/* uint64 __swap_int64(uint64 value) + */ +FUNCTION(__swap_int64): + rev r0,r0; + rev r1,r1; + mov r12,r0; + mov r0,r1; + mov r1,r12; +FUNCTION_END(__swap_int64) + +/* TODO: The following functions can surely be optimized. A simple optimization + * would be to define macros with the contents of the __swap_int{32,64} + * functions and use those instead of calling the functions. + */ + +/* float __swap_float(float value) + */ +FUNCTION(__swap_float): + b __swap_int32 + //rts +FUNCTION_END(__swap_float) + + +/* double __swap_double(double value) + */ +FUNCTION(__swap_double): + b __swap_int32 + //rts +#warning M68K: XXX:check sizeof(double) +FUNCTION_END(__swap_double) + diff --git a/src/system/libroot/os/arch/arm/system_time.c b/src/system/libroot/os/arch/arm/system_time.c new file mode 100644 index 0000000000..7fd1fd4dd1 --- /dev/null +++ b/src/system/libroot/os/arch/arm/system_time.c @@ -0,0 +1,18 @@ +/* + * Copyright 2006, Ingo Weinhold . + * All rights reserved. Distributed under the terms of the MIT License. + */ + +#include + +#include +#include +#include + + +bigtime_t +system_time(void) +{ +#warning ARM:WRITEME + return 0; +} diff --git a/src/system/libroot/posix/arch/arm/siglongjmp.S b/src/system/libroot/posix/arch/arm/siglongjmp.S new file mode 100644 index 0000000000..8ce21827fb --- /dev/null +++ b/src/system/libroot/posix/arch/arm/siglongjmp.S @@ -0,0 +1,22 @@ +/* + * Copyright 2005, Ingo Weinhold . All rights + * reserved. Distributed under the terms of the Haiku License. + */ + +#include + +//#include "setjmp_internal.h" + +/* int __siglongjmp(jmp_buf buffer, int value) */ +FUNCTION(siglongjmp): +FUNCTION(longjmp): +FUNCTION(_longjmp): + str r1,[r0] + ldmia r0,{r0-pc}^ + + bl __longjmp_return +FUNCTION_END(siglongjmp) +FUNCTION_END(longjmp) +FUNCTION_END(_longjmp) + +#pragma weak longjmp=siglongjmp diff --git a/src/system/libroot/posix/arch/arm/sigsetjmp.S b/src/system/libroot/posix/arch/arm/sigsetjmp.S new file mode 100644 index 0000000000..14bab2ef88 --- /dev/null +++ b/src/system/libroot/posix/arch/arm/sigsetjmp.S @@ -0,0 +1,36 @@ +/* + * Copyright 2005, Ingo Weinhold . All rights + * reserved. Distributed under the terms of the Haiku License. + */ + +#include + +//#include "setjmp_internal.h" + +/* int sigsetjmp(jmp_buf buffer, int saveMask) */ +FUNCTION(__sigsetjmp): +FUNCTION(sigsetjmp): + stmia r0, {r0-r14} + str lr,[r0, #60] + mrs r1,cpsr + str r1,[r0,#64] + ldr r1,[r0,#4] + mov r0, #0 + + bl __setjmp_save_sigs +FUNCTION_END(__sigsetjmp) +FUNCTION_END(sigsetjmp) + + +/* int setjmp(jmp_buf buffer) */ +FUNCTION(setjmp): + stmia r0, {r0-r14} + str lr,[r0, #60] + mrs r1,cpsr + str r1,[r0,#64] + ldr r1,[r0,#4] + mov r0, #0 + bl __sigsetjmp +FUNCTION_END(setjmp) + +#pragma weak _setjmp=setjmp diff --git a/src/system/libroot/posix/string/arch/arm/arch_string.S b/src/system/libroot/posix/string/arch/arm/arch_string.S new file mode 100644 index 0000000000..88e1bfa4af --- /dev/null +++ b/src/system/libroot/posix/string/arch/arm/arch_string.S @@ -0,0 +1,156 @@ +/* +** Copyright 2001, Travis Geiselbrecht. All rights reserved. +** Distributed under the terms of the NewOS License. +*/ + +#include + +//#warning M68K: optimize memcpy +#if 1 + +/* that should be enough for now */ + +.align 4 +FUNCTION(memcpy): + // check for zero length copy or the same pointer + cmp r2, #0 + cmpne r1, r0 + bxeq lr + + // save a few registers for use and the return code (input dst) + stmfd sp!, {r0, r4, r5, lr} + + // check for forwards overlap (src > dst, distance < len) + subs r3, r0, r1 + cmpgt r2, r3 + bgt .L_forwardoverlap + + // check for a short copy len. + // 20 bytes is enough so that if a 16 byte alignment needs to happen there is at least a + // wordwise copy worth of work to be done. + cmp r2, #(16+4) + blt .L_bytewise + + // see if they are similarly aligned on 4 byte boundaries + eor r3, r0, r1 + tst r3, #3 + bne .L_bytewise // dissimilarly aligned, nothing we can do (for now) + + // check for 16 byte alignment on dst. + // this will also catch src being not 4 byte aligned, since it is similarly 4 byte + // aligned with dst at this point. + tst r0, #15 + bne .L_not16bytealigned + + // check to see if we have at least 32 bytes of data to copy. + // if not, just revert to wordwise copy + cmp r2, #32 + blt .L_wordwise + +.L_bigcopy: + // copy 32 bytes at a time. src & dst need to be at least 4 byte aligned, + // and we need at least 32 bytes remaining to copy + + // save r6-r7 for use in the big copy + stmfd sp!, {r6-r7} + + sub r2, r2, #32 // subtract an extra 32 to the len so we can avoid an extra compare + +.L_bigcopy_loop: + ldmia r1!, {r4, r5, r6, r7} + stmia r0!, {r4, r5, r6, r7} + ldmia r1!, {r4, r5, r6, r7} + subs r2, r2, #32 + stmia r0!, {r4, r5, r6, r7} + bge .L_bigcopy_loop + + // restore r6-r7 + ldmfd sp!, {r6-r7} + + // see if we are done + adds r2, r2, #32 + beq .L_done + + // less then 4 bytes left? + cmp r2, #4 + blt .L_bytewise + +.L_wordwise: + // copy 4 bytes at a time. + // src & dst are guaranteed to be word aligned, and at least 4 bytes are left to copy. + subs r2, r2, #4 + +.L_wordwise_loop: + ldr r3, [r1], #4 + subs r2, r2, #4 + str r3, [r0], #4 + bge .L_wordwise_loop + + // correct the remaining len and test for completion + adds r2, r2, #4 + beq .L_done + +.L_bytewise: + // simple bytewise copy + ldrb r3, [r1], #1 + subs r2, r2, #1 + strb r3, [r0], #1 + bgt .L_bytewise + +.L_done: + // load dst for return and restore r4,r5 +//#if ARM_ARCH_LEVEL >= 5 +// ldmfd sp!, {r0, r4, r5, pc} +//#else + ldmfd sp!, {r0, r4, r5, lr} + bx lr +//#endif + +.L_not16bytealigned: + // dst is not 16 byte aligned, so we will copy up to 15 bytes to get it aligned. + // src is guaranteed to be similarly word aligned with dst. + + // set the condition flags based on the alignment. + lsl r12, r0, #28 + rsb r12, r12, #0 + msr CPSR_f, r12 // move into NZCV fields in CPSR + + // move as many bytes as necessary to get the dst aligned + ldrvsb r3, [r1], #1 // V set + ldrcsh r4, [r1], #2 // C set + ldreq r5, [r1], #4 // Z set + + strvsb r3, [r0], #1 + strcsh r4, [r0], #2 + streq r5, [r0], #4 + + ldmmiia r1!, {r3-r4} // N set + stmmiia r0!, {r3-r4} + + // fix the remaining len + sub r2, r2, r12, lsr #28 + + // test to see what we should do now + cmp r2, #32 + bge .L_bigcopy + b .L_wordwise + + // src and dest overlap 'forwards' or dst > src +.L_forwardoverlap: + + // do a bytewise reverse copy for now + add r1, r1, r2 + add r0, r0, r2 + +.L_bytewisereverse: + // simple bytewise reverse copy + ldrb r3, [r1], #-1 + subs r2, r2, #1 + strb r3, [r0], #-1 + bgt .L_bytewisereverse + + b .L_done + // check for zero length copy or the same pointer + +FUNCTION_END(memcpy) +#endif