From 6e040d83ea743f56389ce5c69e58c9ae8da3017f Mon Sep 17 00:00:00 2001 From: Ingo Weinhold Date: Sun, 11 Dec 2005 13:26:50 +0000 Subject: [PATCH] Added __swap_{float,double} for PPC. They are probably horribly suboptimal (if working at all) -- review is appreciated. git-svn-id: file:///srv/svn/repos/haiku/haiku/trunk@15486 a95241bf-73f2-0310-859d-f6bbb57e9c96 --- src/system/libroot/os/arch/ppc/byteorder.S | 61 +++++++++++++++++++++- 1 file changed, 60 insertions(+), 1 deletion(-) diff --git a/src/system/libroot/os/arch/ppc/byteorder.S b/src/system/libroot/os/arch/ppc/byteorder.S index 7ad16f1d98..8f1b1536b8 100644 --- a/src/system/libroot/os/arch/ppc/byteorder.S +++ b/src/system/libroot/os/arch/ppc/byteorder.S @@ -1,5 +1,5 @@ /* -** Copyright 2003, Axel Dörfler, axeld@pinc-software.de. All rights reserved. +** Copyright 2003, Axel D�fler, axeld@pinc-software.de. All rights reserved. ** Distributed under the terms of the OpenBeOS License. */ @@ -41,3 +41,62 @@ FUNCTION(__swap_int64): mr %r4, %r5 // copy lower 32 bits blr + +/* TODO: The following functions can surely be optimized. A simple optimization + * would be to define macros with the contents of the __swap_int{32,64} + * functions and use those instead of calling the functions. + */ + +/* float __swap_float(float value) + * f1 + */ +FUNCTION(__swap_float): + // push a stack frame + stwu %r1, -32(%r1) + mflr %r0 + stw %r0, 36(%r1) + + // %f1 -> %r3 + stfs %f1, 20(%r1) + lwz %r3, 20(%r1) + + // let __swap_int32 convert %r3 + bl __swap_int32 + + // %r3 -> %f1 + stw %r3, 20(%r1) + lfs %f1, 20(%r1) + + // pop the stack frame + lwz %r0, 36(%r1) + mtlr %r0 + addi %r1, %r1, 32 + blr + +/* double __swap_double(double value) + * f1 + */ +FUNCTION(__swap_double): + // push a stack frame + stwu %r1, -32(%r1) + mflr %r0 + stw %r0, 36(%r1) + + // %f1 -> (%r3:%r4) + stfd %f1, 20(%r1) + lwz %r3, 20(%r1) + lwz %r4, 24(%r1) + + // let __swap_int64 convert %r3:%r4 + bl __swap_int64 + + // (%r3:%r4) -> %f1 + stw %r3, 20(%r1) + stw %r4, 24(%r1) + lfd %f1, 20(%r1) + + // pop the stack frame + lwz %r0, 36(%r1) + mtlr %r0 + addi %r1, %r1, 32 + blr