From 6f14ecb4259868f79bcf9f4af89f055749b9e31f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fran=C3=A7ois=20Revol?= <revol@free.fr>
Date: Mon, 27 Jul 2009 16:27:13 +0000
Subject: [PATCH] [GSoC] [ARM] Patch by Johannes Wischert. Misc. Assembler
 functions (atomic ops, byte swapping, longjmp and libc).

git-svn-id: file:///srv/svn/repos/haiku/haiku/trunk@31822 a95241bf-73f2-0310-859d-f6bbb57e9c96
---
 src/system/libroot/os/arch/arm/atomic.S       | 225 ++++++++++++++++++
 src/system/libroot/os/arch/arm/byteorder.S    |  52 ++++
 src/system/libroot/os/arch/arm/system_time.c  |  18 ++
 .../libroot/posix/arch/arm/siglongjmp.S       |  22 ++
 src/system/libroot/posix/arch/arm/sigsetjmp.S |  36 +++
 .../posix/string/arch/arm/arch_string.S       | 156 ++++++++++++
 6 files changed, 509 insertions(+)
 create mode 100644 src/system/libroot/os/arch/arm/atomic.S
 create mode 100644 src/system/libroot/os/arch/arm/byteorder.S
 create mode 100644 src/system/libroot/os/arch/arm/system_time.c
 create mode 100644 src/system/libroot/posix/arch/arm/siglongjmp.S
 create mode 100644 src/system/libroot/posix/arch/arm/sigsetjmp.S
 create mode 100644 src/system/libroot/posix/string/arch/arm/arch_string.S

diff --git a/src/system/libroot/os/arch/arm/atomic.S b/src/system/libroot/os/arch/arm/atomic.S
new file mode 100644
index 0000000000..089d85ab6c
--- /dev/null
+++ b/src/system/libroot/os/arch/arm/atomic.S
@@ -0,0 +1,225 @@
+/*
+** Copyright 2003, Axel Dörfler, axeld@pinc-software.de. All rights reserved.
+** Distributed under the terms of the OpenBeOS License.
+*/
+
+#include <asm_defs.h>
+
+
+.text
+
+/* int atomic_add(int *value, int increment)
+ */
+FUNCTION(atomic_add):
+#if ARM_ISA_ARMV6 || ARM_ISA_ARMV7
+miss1:	       	ldrex       r12, [r0]
+        	add         r2,  r12, r1
+        	strex       r3,  r2,  [r0]
+        	teq         r3,  #0
+        	bne         miss1
+        	mov         r0,  r12
+	        mov         pc,  lr
+#else
+        /* disable interrupts, do the add, and reenable */
+        mrs     r2, cpsr
+        mov     r12, r2
+        orr     r2, r2, #(3<<6)
+        msr     cpsr_c, r2
+
+        /* ints disabled, old cpsr state in r12 */
+
+        /* do the add, leave the previous value in r0 */
+        mov     r3, r0
+        ldr     r0, [r3]
+        add     r2, r0, r1
+        str     r2, [r3]
+
+        /* restore interrupts and exit */
+        msr     cpsr_c, r12
+        bx      lr
+#endif
+FUNCTION_END(atomic_add)
+
+
+
+
+/* int atomic_and(int *value, int andValue)
+ */
+FUNCTION(atomic_and):
+#if ARM_ISA_ARMV6 || ARM_ISA_ARMV7
+miss2:	       	ldrex       r12, [r0]
+        	and         r2,  r12, r1
+        	strex       r3,  r2,  [r0]
+        	teq         r3,  #0
+        	bne         miss2
+        	mov         r0,  r12
+	        mov         pc,  lr
+#else
+        /* disable interrupts, do the and, and reenable */
+        mrs     r2, cpsr
+        mov     r12, r2
+        orr     r2, r2, #(3<<6)
+        msr     cpsr_c, r2
+
+        /* ints disabled, old cpsr state in r12 */
+
+        /* do the and, leave the previous value in r0 */
+        mov     r3, r0
+        ldr     r0, [r3]
+        and     r2, r0, r1
+        str     r2, [r3]
+
+        /* restore interrupts and exit */
+        msr     cpsr_c, r12
+        bx      lr
+#endif
+
+FUNCTION_END(atomic_and)
+
+/* int atomic_or(int *value, int orValue)
+ */
+FUNCTION(atomic_or):
+#if ARM_ISA_ARMV6 || ARM_ISA_ARMV7
+
+miss3:	       	ldrex       r12, [r0]
+        	eor         r2,  r12, r1
+        	strex       r3,  r2,  [r0]
+        	teq         r3,  #0
+        	bne         miss3
+        	mov         r0,  r12
+	        mov         pc,  lr
+#else
+        /* disable interrupts, do the or, and reenable */
+        mrs     r2, cpsr
+        mov     r12, r2
+        orr     r2, r2, #(3<<6)
+        msr     cpsr_c, r2
+
+        /* ints disabled, old cpsr state in r12 */
+
+        /* do the or, leave the previous value in r0 */
+        mov     r3, r0
+        ldr     r0, [r3]
+        orr     r2, r0, r1
+        str     r2, [r3]
+
+        /* restore interrupts and exit */
+        msr     cpsr_c, r12
+        bx      lr
+#endif
+
+FUNCTION_END(atomic_or)
+
+/* int atomic_set(int *value, int setTo)
+ */
+FUNCTION(atomic_set):
+miss4:		ldrex   r12, [r0]
+       		strex   r3, r1, [r0]
+       		teq     r3, #0
+       		bne     miss4
+	        mov         pc,  lr
+FUNCTION_END(atomic_set)
+
+/* int atomic_test_and_set(int *value, int setTo, int testValue) 
+ */
+FUNCTION(atomic_test_and_set):
+/*miss5:  ldrex       r12, [r0]                       @ load from the address and mark it exclusive
+        cmp         r12, r2                         @ compare the value with the comperand(r2)
+        strexeq     r3,  r1,  [r0]                  @ if they were equal, attempt to store the new value (r1)
+        bne         differ                          @ if they were not equal jump to (differ) which clears the exclusive tag on the address and returns<
+        cmp         r3,  #1                         @ check the status of the store (returned in r3)
+        beq         miss5                           @ go back to the start if it failed (0=success, 1=failure)
+        bne         same                            @ if it succeeded, jump to (same) and return. there is no need to clrex if strex succeeded
+differ: clrex                           	    @ clrex
+same:   mov         r0,  r12*/
+#warning TODO TODO TODO
+        mov         pc,  lr
+FUNCTION_END(atomic_test_and_set)
+
+/* int atomic_get(int *value) 
+ */
+#warning ARM:atomic_get ich nix verstehen !!!!!!!!!!!!
+FUNCTION(atomic_get):
+        mov         pc,  lr
+FUNCTION_END(atomic_get)
+
+
+/* int64	atomic_add64(vint64 *value, int64 addValue) */
+//FUNCTION(atomic_add64):
+//		movem.l	%d2-%d3/%a2,-(%a7)
+//		move.l	(4,%a7),%a2
+//		lea.l	(4,%a2),%a1
+//		// addValue
+//		move.l	(12,%a7),%d3	/*LSB*/
+//		move.l	(8,%a7),%d2		/*MSB*/
+//miss5:	// old value
+//		move.l	(%a1),%d1		/*LSB*/
+//		move.l	(%a2),%d0		/*MSB*/
+//		add.l	%d1,%d3
+//		addx.l	%d0,%d2
+//		cas2.l	%d0:%d1,%d2:%d3,(%a2):(%a1)
+//		bne		miss5
+//		// return value d0:d1
+//		movem.l	(%a7)+,%d2-%d3/%a2
+//		rts
+//FUNCTION_END(atomic_add64)
+
+/* int64	atomic_and64(vint64 *value, int64 andValue) */
+//FUNCTION(atomic_and64):
+//FUNCTION_END(atomic_and64)
+
+/* int64	atomic_or64(vint64 *value, int64 orValue) */
+//FUNCTION(atomic_or64):
+//FUNCTION_END(atomic_or64)
+
+/* int64	atomic_set64(vint64 *value, int64 newValue) */
+//FUNCTION(atomic_set64):
+//		movem.l	%d2-%d3/%a2,-(%a7)
+//		move.l	(4,%a7),%a2
+//		lea.l	(4,%a2),%a1
+//		// new value
+//		move.l	(12,%a7),%d3	/*LSB*/
+//		move.l	(8,%a7),%d2		/*MSB*/
+//		// old value
+//		move.l	(%a1),%d1		/*LSB*/
+//		move.l	(%a2),%d0		/*MSB*/
+//miss8:	cas2.l	%d0:%d1,%d2:%d3,(%a2):(%a1)
+//		bne		miss8
+//		// return value d0:d1
+//		movem.l	(%a7)+,%d2-%d3/%a2
+//		rts
+//FUNCTION_END(atomic_set64)
+
+/* int64	atomic_test_and_set64(vint64 *value, int64 newValue, int64 testAgainst) */
+//FUNCTION(atomic_test_and_set64):
+//		movem.l	%d2-%d3/%a2,-(%a7)
+//		move.l	(4,%a7),%a2
+//		lea.l	(4,%a2),%a1
+//		// new value
+//		move.l	(12,%a7),%d3	/*LSB*/
+//		move.l	(8,%a7),%d2		/*MSB*/
+//		// test against value
+//		move.l	(20,%a7),%d1	/*LSB*/
+//		move.l	(16,%a7),%d0	/*MSB*/
+//		cas2.l	%d0:%d1,%d2:%d3,(%a2):(%a1)
+//		// return value d0:d1
+//		movem.l	(%a7)+,%d2-%d3/%a2
+//		rts
+//FUNCTION_END(atomic_test_and_set64)
+
+/* int64	atomic_get64(vint64 *value) */
+//FUNCTION(atomic_get64):
+//		movem.l	%d2-%d3/%a2,-(%a7)
+//		move.l	(4,%a7),%a2
+//		lea.l	(4,%a2),%a1
+//		move.l	(%a1),%d1	/*LSB*/
+//		move.l	(%a2),%d0		/*MSB*/
+//		move.l	%d1,%d3
+//		move.l	%d0,%d2
+//		// we must use cas... so we change to the same value if matching,
+//		// else we get the correct one anyway
+//		cas2.l	%d0:%d1,%d2:%d3,(%a2):(%a1)
+//		// return value
+//		movem.l	(%a7)+,%d2-%d3/%a2
+//		rts
+//FUNCTION_END(atomic_get64)
diff --git a/src/system/libroot/os/arch/arm/byteorder.S b/src/system/libroot/os/arch/arm/byteorder.S
new file mode 100644
index 0000000000..ce5ccc2ffb
--- /dev/null
+++ b/src/system/libroot/os/arch/arm/byteorder.S
@@ -0,0 +1,52 @@
+/*
+** Copyright 2003, Axel D�fler, axeld@pinc-software.de. All rights reserved.
+** Distributed under the terms of the OpenBeOS License.
+*/
+
+#include <asm_defs.h>
+
+.text
+
+/* uint16 __swap_int16(uint16 value)
+ */
+FUNCTION(__swap_int16):
+		rev16 r0,r0;
+FUNCTION_END(__swap_int16)
+
+/* uint32 __swap_int32(uint32 value)
+ */
+FUNCTION(__swap_int32):
+		rev r0,r0;
+FUNCTION_END(__swap_int32)
+
+/* uint64 __swap_int64(uint64 value)
+ */
+FUNCTION(__swap_int64):
+		rev r0,r0;
+		rev r1,r1;
+		mov r12,r0;
+		mov r0,r1;
+		mov r1,r12;
+FUNCTION_END(__swap_int64)
+
+/* TODO: The following functions can surely be optimized. A simple optimization
+ * would be to define macros with the contents of the __swap_int{32,64}
+ * functions and use those instead of calling the functions.
+ */
+
+/* float __swap_float(float value)
+ */
+FUNCTION(__swap_float):
+		b		__swap_int32
+		//rts
+FUNCTION_END(__swap_float)
+
+
+/* double __swap_double(double value)
+ */
+FUNCTION(__swap_double):
+		b		__swap_int32
+		//rts
+#warning M68K: XXX:check sizeof(double)
+FUNCTION_END(__swap_double)
+
diff --git a/src/system/libroot/os/arch/arm/system_time.c b/src/system/libroot/os/arch/arm/system_time.c
new file mode 100644
index 0000000000..7fd1fd4dd1
--- /dev/null
+++ b/src/system/libroot/os/arch/arm/system_time.c
@@ -0,0 +1,18 @@
+/*
+ * Copyright 2006, Ingo Weinhold <bonefish@cs.tu-berlin.de>.
+ * All rights reserved. Distributed under the terms of the MIT License.
+ */
+
+#include <OS.h>
+
+#include <arch_cpu.h>
+#include <libroot_private.h>
+#include <real_time_data.h>
+
+
+bigtime_t
+system_time(void)
+{
+#warning ARM:WRITEME
+	return 0;
+}
diff --git a/src/system/libroot/posix/arch/arm/siglongjmp.S b/src/system/libroot/posix/arch/arm/siglongjmp.S
new file mode 100644
index 0000000000..8ce21827fb
--- /dev/null
+++ b/src/system/libroot/posix/arch/arm/siglongjmp.S
@@ -0,0 +1,22 @@
+/* 
+ * Copyright 2005, Ingo Weinhold <bonefish@cs.tu-berlin.de>. All rights
+ * reserved. Distributed under the terms of the Haiku License.
+ */
+
+#include <asm_defs.h>
+
+//#include "setjmp_internal.h"
+
+/* int __siglongjmp(jmp_buf buffer, int value) */
+FUNCTION(siglongjmp):
+FUNCTION(longjmp):
+FUNCTION(_longjmp):
+	str    r1,[r0]
+        ldmia  	r0,{r0-pc}^
+
+	bl	__longjmp_return
+FUNCTION_END(siglongjmp)
+FUNCTION_END(longjmp)
+FUNCTION_END(_longjmp)
+
+#pragma weak longjmp=siglongjmp
diff --git a/src/system/libroot/posix/arch/arm/sigsetjmp.S b/src/system/libroot/posix/arch/arm/sigsetjmp.S
new file mode 100644
index 0000000000..14bab2ef88
--- /dev/null
+++ b/src/system/libroot/posix/arch/arm/sigsetjmp.S
@@ -0,0 +1,36 @@
+/* 
+ * Copyright 2005, Ingo Weinhold <bonefish@cs.tu-berlin.de>. All rights
+ * reserved. Distributed under the terms of the Haiku License.
+ */
+
+#include <asm_defs.h>
+
+//#include "setjmp_internal.h"
+
+/* int sigsetjmp(jmp_buf buffer, int saveMask) */
+FUNCTION(__sigsetjmp):
+FUNCTION(sigsetjmp):
+        stmia   r0, {r0-r14}
+        str     lr,[r0, #60]
+        mrs     r1,cpsr
+        str     r1,[r0,#64]
+        ldr     r1,[r0,#4]
+        mov     r0, #0
+
+	bl		__setjmp_save_sigs
+FUNCTION_END(__sigsetjmp)
+FUNCTION_END(sigsetjmp)
+
+
+/* int setjmp(jmp_buf buffer) */
+FUNCTION(setjmp):
+        stmia   r0, {r0-r14}
+        str     lr,[r0, #60]
+        mrs     r1,cpsr
+        str     r1,[r0,#64]
+        ldr     r1,[r0,#4]
+        mov     r0, #0
+	bl	__sigsetjmp
+FUNCTION_END(setjmp)
+
+#pragma weak _setjmp=setjmp
diff --git a/src/system/libroot/posix/string/arch/arm/arch_string.S b/src/system/libroot/posix/string/arch/arm/arch_string.S
new file mode 100644
index 0000000000..88e1bfa4af
--- /dev/null
+++ b/src/system/libroot/posix/string/arch/arm/arch_string.S
@@ -0,0 +1,156 @@
+/*
+** Copyright 2001, Travis Geiselbrecht. All rights reserved.
+** Distributed under the terms of the NewOS License.
+*/
+
+#include <asm_defs.h>
+
+//#warning M68K: optimize memcpy
+#if 1
+
+/* that should be enough for now */
+
+.align 4
+FUNCTION(memcpy):
+	// check for zero length copy or the same pointer
+	cmp		r2, #0
+	cmpne	r1, r0
+	bxeq	lr
+
+	// save a few registers for use and the return code (input dst)
+	stmfd	sp!, {r0, r4, r5, lr}
+
+	// check for forwards overlap (src > dst, distance < len)
+	subs	r3, r0, r1
+	cmpgt	r2, r3
+	bgt		.L_forwardoverlap
+
+	// check for a short copy len.
+	// 20 bytes is enough so that if a 16 byte alignment needs to happen there is at least a 
+	//   wordwise copy worth of work to be done.
+	cmp		r2, #(16+4)
+	blt		.L_bytewise
+
+	// see if they are similarly aligned on 4 byte boundaries
+	eor		r3, r0, r1
+	tst		r3, #3
+	bne		.L_bytewise		// dissimilarly aligned, nothing we can do (for now)
+
+	// check for 16 byte alignment on dst.
+	// this will also catch src being not 4 byte aligned, since it is similarly 4 byte 
+	//   aligned with dst at this point.
+	tst		r0, #15
+	bne		.L_not16bytealigned
+
+	// check to see if we have at least 32 bytes of data to copy.
+	// if not, just revert to wordwise copy
+	cmp		r2, #32
+	blt		.L_wordwise
+
+.L_bigcopy:
+	// copy 32 bytes at a time. src & dst need to be at least 4 byte aligned, 
+	// and we need at least 32 bytes remaining to copy
+
+	// save r6-r7 for use in the big copy
+	stmfd	sp!, {r6-r7}
+
+	sub		r2, r2, #32		// subtract an extra 32 to the len so we can avoid an extra compare
+
+.L_bigcopy_loop:
+	ldmia	r1!, {r4, r5, r6, r7}
+	stmia	r0!, {r4, r5, r6, r7}
+	ldmia	r1!, {r4, r5, r6, r7}
+	subs	r2, r2, #32
+	stmia	r0!, {r4, r5, r6, r7}
+	bge		.L_bigcopy_loop
+
+	// restore r6-r7
+	ldmfd	sp!, {r6-r7}
+
+	// see if we are done
+	adds	r2, r2, #32
+	beq		.L_done
+
+	// less then 4 bytes left?
+	cmp		r2, #4
+	blt		.L_bytewise
+
+.L_wordwise:
+	// copy 4 bytes at a time.
+	// src & dst are guaranteed to be word aligned, and at least 4 bytes are left to copy.
+	subs	r2, r2, #4
+
+.L_wordwise_loop:
+	ldr		r3, [r1], #4
+	subs	r2, r2, #4
+	str		r3, [r0], #4
+	bge		.L_wordwise_loop
+
+	// correct the remaining len and test for completion
+	adds	r2, r2, #4	
+	beq		.L_done
+
+.L_bytewise:
+	// simple bytewise copy
+	ldrb	r3, [r1], #1
+	subs	r2, r2, #1
+	strb	r3, [r0], #1
+	bgt		.L_bytewise
+
+.L_done:
+	// load dst for return and restore r4,r5
+//#if ARM_ARCH_LEVEL >= 5
+//	ldmfd	sp!, {r0, r4, r5, pc}
+//#else
+	ldmfd	sp!, {r0, r4, r5, lr}
+	bx		lr
+//#endif
+
+.L_not16bytealigned:
+	// dst is not 16 byte aligned, so we will copy up to 15 bytes to get it aligned.
+	// src is guaranteed to be similarly word aligned with dst.
+
+	// set the condition flags based on the alignment.
+	lsl		r12, r0, #28
+	rsb		r12, r12, #0
+	msr		CPSR_f, r12				// move into NZCV fields in CPSR
+
+	// move as many bytes as necessary to get the dst aligned
+	ldrvsb	r3, [r1], #1			// V set
+	ldrcsh	r4, [r1], #2			// C set
+	ldreq	r5, [r1], #4			// Z set
+
+	strvsb	r3, [r0], #1
+	strcsh	r4, [r0], #2
+	streq	r5, [r0], #4
+
+	ldmmiia	r1!, {r3-r4}			// N set
+	stmmiia	r0!, {r3-r4}
+
+	// fix the remaining len
+	sub		r2, r2, r12, lsr #28
+
+	// test to see what we should do now
+	cmp		r2, #32
+	bge		.L_bigcopy
+	b		.L_wordwise
+	
+	// src and dest overlap 'forwards' or dst > src
+.L_forwardoverlap:
+
+	// do a bytewise reverse copy for now
+	add		r1, r1, r2
+	add		r0, r0, r2
+
+.L_bytewisereverse:
+	// simple bytewise reverse copy
+	ldrb	r3, [r1], #-1
+	subs	r2, r2, #1
+	strb	r3, [r0], #-1
+	bgt		.L_bytewisereverse
+
+	b		.L_done
+	// check for zero length copy or the same pointer
+
+FUNCTION_END(memcpy)
+#endif