From: Ard Biesheuvel <ard.biesheuvel@linaro.org>

Subject: [PATCH 2/2] Replace ARM arithmetic support routines with EDK2 versions. Replace the incomplete GPL licensed ARM arithmetic support routines with the ones from the EDK2 project. These cover long long multiplication and long long logical shift as well. Also remove the special case for small dividends in DivU64x32: we can simply let the compiler handle this, and emit calls to the support routines where appropriate. Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Signed-off-by: Nigel Croxon <nigel.croxon@hpe.com>
2015-12-23 08:33:02 -05:00 · 2015-12-23 08:33:02 -05:00 · d119b0d759
commit d119b0d759
parent 88e3cf9535
11 changed files with 608 additions and 480 deletions
--- a/inc/arm/efibind.h
+++ b/inc/arm/efibind.h
@ -125,13 +125,8 @@ typedef uint32_t   UINTN;
 #define uefi_call_wrapper(func, va_num, ...) func(__VA_ARGS__)
 #define EFI_FUNCTION
 extern UINT64 __DivU64x32(UINT64 Dividend, UINTN Divisor, UINTN *Remainder);
 static inline UINT64 DivU64x32(UINT64 Dividend, UINTN Divisor, UINTN *Remainder)
 {
    if (Dividend >> 32)
        return __DivU64x32(Dividend, Divisor, Remainder);
    /*
     * GCC turns a division into a multiplication and shift with precalculated
     * constants if the divisor is constant and the dividend fits into a 32 bit
@ -139,7 +134,7 @@ static inline UINT64 DivU64x32(UINT64 Dividend, UINTN Divisor, UINTN *Remainder)
     * library functions.
     */
    if (Remainder)
-        *Remainder = (UINTN)Dividend % Divisor;
+        *Remainder = Dividend % Divisor;
-    Dividend = (UINTN)Dividend / Divisor;
+    Dividend = Dividend / Divisor;
    return Dividend;
 }
--- a/lib/Makefile
+++ b/lib/Makefile
@ -58,7 +58,8 @@ FILES += $(ARCH)/callwrap $(ARCH)/efi_stub
 endif
 ifeq ($(ARCH),arm)
-FILES += $(ARCH)/lib1funcs $(ARCH)/div64
+FILES += $(ARCH)/uldiv $(ARCH)/ldivmod $(ARCH)/div $(ARCH)/llsl $(ARCH)/llsr \
 	 $(ARCH)/mullu
 endif
 OBJS  = $(FILES:%=%.o)
--- a/lib/arm/div.S
+++ b/lib/arm/div.S
@ -0,0 +1,155 @@
 #------------------------------------------------------------------------------
 #
 # Copyright (c) 2011, ARM. All rights reserved.<BR>
 #
 # This program and the accompanying materials
 # are licensed and made available under the terms and conditions of the BSD License
 # which accompanies this distribution.  The full text of the license may be found at
 # http://opensource.org/licenses/bsd-license.php
 #
 # THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
 # WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
 #
 #------------------------------------------------------------------------------
 #include "edk2asm.h"
 .text
 .align 2
 GCC_ASM_EXPORT(__aeabi_uidiv)
 GCC_ASM_EXPORT(__aeabi_uidivmod)
 GCC_ASM_EXPORT(__aeabi_idiv)
 GCC_ASM_EXPORT(__aeabi_idivmod)
 #    AREA  Math, CODE, READONLY
 #
 #UINT32
 #EFIAPI
 #__aeabi_uidivmode (
 #  IN UINT32  Dividen
 #  IN UINT32  Divisor
 #  );
 #
 ASM_PFX(__aeabi_uidiv):
 ASM_PFX(__aeabi_uidivmod):
  rsbs    r12, r1, r0, LSR #4
  mov     r2, #0
  bcc     ASM_PFX(__arm_div4)
  rsbs    r12, r1, r0, LSR #8
  bcc     ASM_PFX(__arm_div8)
  mov     r3, #0
  b       ASM_PFX(__arm_div_large)
 #
 #INT32
 #EFIAPI
 #__aeabi_idivmode (
 #  IN INT32  Dividen
 #  IN INT32  Divisor
 #  );
 #
 ASM_PFX(__aeabi_idiv):
 ASM_PFX(__aeabi_idivmod):
  orrs    r12, r0, r1
  bmi     ASM_PFX(__arm_div_negative)
  rsbs    r12, r1, r0, LSR #1
  mov     r2, #0
  bcc     ASM_PFX(__arm_div1)
  rsbs    r12, r1, r0, LSR #4
  bcc     ASM_PFX(__arm_div4)
  rsbs    r12, r1, r0, LSR #8
  bcc     ASM_PFX(__arm_div8)
  mov     r3, #0
  b       ASM_PFX(__arm_div_large)
 ASM_PFX(__arm_div8):
  rsbs    r12, r1, r0, LSR #7
  subcs   r0, r0, r1, LSL #7
  adc     r2, r2, r2
  rsbs    r12, r1, r0,LSR #6
  subcs   r0, r0, r1, LSL #6
  adc     r2, r2, r2
  rsbs    r12, r1, r0, LSR #5
  subcs   r0, r0, r1, LSL #5
  adc     r2, r2, r2
  rsbs    r12, r1, r0, LSR #4
  subcs   r0, r0, r1, LSL #4
  adc     r2, r2, r2
 ASM_PFX(__arm_div4):
  rsbs    r12, r1, r0, LSR #3
  subcs   r0, r0, r1, LSL #3
  adc     r2, r2, r2
  rsbs    r12, r1, r0, LSR #2
  subcs   r0, r0, r1, LSL #2
  adcs    r2, r2, r2
  rsbs    r12, r1, r0, LSR #1
  subcs   r0, r0, r1, LSL #1
  adc     r2, r2, r2
 ASM_PFX(__arm_div1):
  subs    r1, r0, r1
  movcc   r1, r0
  adc     r0, r2, r2
  bx      r14
 ASM_PFX(__arm_div_negative):
  ands    r2, r1, #0x80000000
  rsbmi   r1, r1, #0
  eors    r3, r2, r0, ASR #32
  rsbcs   r0, r0, #0
  rsbs    r12, r1, r0, LSR #4
  bcc     label1
  rsbs    r12, r1, r0, LSR #8
  bcc     label2
 ASM_PFX(__arm_div_large):
  lsl     r1, r1, #6
  rsbs    r12, r1, r0, LSR #8
  orr     r2, r2, #0xfc000000
  bcc     label2
  lsl     r1, r1, #6
  rsbs    r12, r1, r0, LSR #8
  orr     r2, r2, #0x3f00000
  bcc     label2
  lsl     r1, r1, #6
  rsbs    r12, r1, r0, LSR #8
  orr     r2, r2, #0xfc000
  orrcs   r2, r2, #0x3f00
  lslcs   r1, r1, #6
  rsbs    r12, r1, #0
  bcs     ASM_PFX(__aeabi_idiv0)
 label3:
  lsrcs   r1, r1, #6
 label2:
  rsbs    r12, r1, r0, LSR #7
  subcs   r0, r0, r1, LSL #7
  adc     r2, r2, r2
  rsbs    r12, r1, r0, LSR #6
  subcs   r0, r0, r1, LSL #6
  adc     r2, r2, r2
  rsbs    r12, r1, r0, LSR #5
  subcs   r0, r0, r1, LSL #5
  adc     r2, r2, r2
  rsbs    r12, r1, r0, LSR #4
  subcs   r0, r0, r1, LSL #4
  adc     r2, r2, r2
 label1:
  rsbs    r12, r1, r0, LSR #3
  subcs   r0, r0, r1, LSL #3
  adc     r2, r2, r2
  rsbs    r12, r1, r0, LSR #2
  subcs   r0, r0, r1, LSL #2
  adcs    r2, r2, r2
  bcs     label3
  rsbs    r12, r1, r0, LSR #1
  subcs   r0, r0, r1, LSL #1
  adc     r2, r2, r2
  subs    r1, r0, r1
  movcc   r1, r0
  adc     r0, r2, r2
  asrs    r3, r3, #31
  rsbmi   r0, r0, #0
  rsbcs   r1, r1, #0
  bx      r14
  @ What to do about division by zero?  For now, just return.
 ASM_PFX(__aeabi_idiv0):
  bx      r14
--- a/lib/arm/div64.S
+++ b/lib/arm/div64.S
@ -1,193 +0,0 @@
 /*
 *  linux/arch/arm/lib/div64.S
 *
 *  Optimized computation of 64-bit dividend / 32-bit divisor
 *
 *  Author:	Nicolas Pitre
 *  Created:	Oct 5, 2003
 *  Copyright:	Monta Vista Software, Inc.
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License version 2 as
 *  published by the Free Software Foundation.
 */
 #define xl r0
 #define xh r1
 #define yl r2
 #define yh r3
 #define UNWIND(x...)
 #define ARM(x...)	x
 #define THUMB(x...)
 #define ENTRY(__f)			\
 	.align		3		;\
 	.globl		__f		;\
 	.type		__f,%function	;\
 __f:
 #define ENDPROC(__f)			;\
 	.size		__f, . - __f
 /*
 UINT64
 DivU64x32 (
    IN UINT64   Dividend,
    IN UINTN    Divisor,
    OUT UINTN   *Remainder OPTIONAL
    )
 // divide 64bit by 32bit and get a 64bit result
 // N.B. only works for 31bit divisors!!
 {
 }
 */
 ENTRY(__DivU64x32)
 	stmfd	sp!, {r4-r6, lr}
 	mov	r5, r4			@ preserve Remainder
 	mov	r4, r2			@ divisor in r4
 	bl	__do_div64
 	teq	r5, #0
 	strne	xh, [r5]
 	mov	r0, yl
 	mov	r1, yh
 	ldmfd	sp!, {r4-r6, pc}
 ENDPROC(__DivU64x32)
 /*
 * __do_div64: perform a division with 64-bit dividend and 32-bit divisor.
 *
 * Note: Calling convention is totally non standard for optimal code.
 *       This is meant to be used by do_div() from include/asm/div64.h only.
 *
 * Input parameters:
 * 	xh-xl	= dividend (clobbered)
 * 	r4	= divisor (preserved)
 *
 * Output values:
 * 	yh-yl	= result
 * 	xh	= remainder
 *
 * Clobbered regs: xl, ip
 */
 ENTRY(__do_div64)
 UNWIND(.fnstart)
 	@ Test for easy paths first.
 	subs	ip, r4, #1
 	bls	9f			@ divisor is 0 or 1
 	tst	ip, r4
 	beq	8f			@ divisor is power of 2
 	@ See if we need to handle upper 32-bit result.
 	cmp	xh, r4
 	mov	yh, #0
 	blo	3f
 	@ Align divisor with upper part of dividend.
 	@ The aligned divisor is stored in yl preserving the original.
 	@ The bit position is stored in ip.
 	clz	yl, r4
 	clz	ip, xh
 	sub	yl, yl, ip
 	mov	ip, #1
 	mov	ip, ip, lsl yl
 	mov	yl, r4, lsl yl
 	@ The division loop for needed upper bit positions.
 	@ Break out early if dividend reaches 0.
 2:	cmp	xh, yl
 	orrcs	yh, yh, ip
 	subcss	xh, xh, yl
 	movnes	ip, ip, lsr #1
 	mov	yl, yl, lsr #1
 	bne	2b
 	@ See if we need to handle lower 32-bit result.
 3:	cmp	xh, #0
 	mov	yl, #0
 	cmpeq	xl, r4
 	movlo	xh, xl
 	movlo	pc, lr
 	@ The division loop for lower bit positions.
 	@ Here we shift remainer bits leftwards rather than moving the
 	@ divisor for comparisons, considering the carry-out bit as well.
 	mov	ip, #0x80000000
 4:	movs	xl, xl, lsl #1
 	adcs	xh, xh, xh
 	beq	6f
 	cmpcc	xh, r4
 5:	orrcs	yl, yl, ip
 	subcs	xh, xh, r4
 	movs	ip, ip, lsr #1
 	bne	4b
 	mov	pc, lr
 	@ The top part of remainder became zero.  If carry is set
 	@ (the 33th bit) this is a false positive so resume the loop.
 	@ Otherwise, if lower part is also null then we are done.
 6:	bcs	5b
 	cmp	xl, #0
 	moveq	pc, lr
 	@ We still have remainer bits in the low part.  Bring them up.
 	clz	xh, xl			@ we know xh is zero here so...
 	add	xh, xh, #1
 	mov	xl, xl, lsl xh
 	mov	ip, ip, lsr xh
 	@ Current remainder is now 1.  It is worthless to compare with
 	@ divisor at this point since divisor can not be smaller than 3 here.
 	@ If possible, branch for another shift in the division loop.
 	@ If no bit position left then we are done.
 	movs	ip, ip, lsr #1
 	mov	xh, #1
 	bne	4b
 	mov	pc, lr
 8:	@ Division by a power of 2: determine what that divisor order is
 	@ then simply shift values around
 	clz	ip, r4
 	rsb	ip, ip, #31
 	mov	yh, xh, lsr ip
 	mov	yl, xl, lsr ip
 	rsb	ip, ip, #32
 ARM(	orr	yl, yl, xh, lsl ip	)
 THUMB(	lsl	xh, xh, ip		)
 THUMB(	orr	yl, yl, xh		)
 	mov	xh, xl, lsl ip
 	mov	xh, xh, lsr ip
 	mov	pc, lr
 	@ eq -> division by 1: obvious enough...
 9:	moveq	yl, xl
 	moveq	yh, xh
 	moveq	xh, #0
 	moveq	pc, lr
 UNWIND(.fnend)
 UNWIND(.fnstart)
 UNWIND(.pad #4)
 UNWIND(.save {lr})
 Ldiv0_64:
 	@ Division by 0:
 	str	lr, [sp, #-8]!
 	bl	__div0
 	@ as wrong as it could be...
 	mov	yl, #0
 	mov	yh, #0
 	mov	xh, #0
 	ldr	pc, [sp], #8
 UNWIND(.fnend)
 ENDPROC(__do_div64)
--- a/lib/arm/edk2asm.h
+++ b/lib/arm/edk2asm.h
@ -0,0 +1,6 @@
 #define ASM_PFX(x)			x
 #define GCC_ASM_EXPORT(x)		\
 	.globl		x		; \
 	.type		x, %function
--- a/lib/arm/ldivmod.S
+++ b/lib/arm/ldivmod.S
@ -0,0 +1,61 @@
 //------------------------------------------------------------------------------
 //
 // Copyright (c) 2008 - 2009, Apple Inc. All rights reserved.<BR>
 //
 // This program and the accompanying materials
 // are licensed and made available under the terms and conditions of the BSD License
 // which accompanies this distribution.  The full text of the license may be found at
 // http://opensource.org/licenses/bsd-license.php
 //
 // THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
 // WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
 //
 //------------------------------------------------------------------------------
 #include "edk2asm.h"
  .text
  .align 2
  GCC_ASM_EXPORT(__aeabi_ldivmod)
 //
 // A pair of (unsigned) long longs is returned in {{r0, r1}, {r2, r3}},
 //  the quotient in {r0, r1}, and the remainder in {r2, r3}.
 //
 //__value_in_regs lldiv_t
 //EFIAPI
 //__aeabi_ldivmod (
 //  IN UINT64  Dividen
 //  IN UINT64  Divisor
 //  )//
 //
 ASM_PFX(__aeabi_ldivmod):
    push     {r4,lr}
    asrs     r4,r1,#1
    eor      r4,r4,r3,LSR #1
    bpl      L_Test1
    rsbs     r0,r0,#0
    rsc      r1,r1,#0
 L_Test1:
    tst      r3,r3
    bpl      L_Test2
    rsbs     r2,r2,#0
    rsc      r3,r3,#0
 L_Test2:
    bl       ASM_PFX(__aeabi_uldivmod)
    tst      r4,#0x40000000
    beq      L_Test3
    rsbs     r0,r0,#0
    rsc      r1,r1,#0
 L_Test3:
    tst      r4,#0x80000000
    beq      L_Exit
    rsbs     r2,r2,#0
    rsc      r3,r3,#0
 L_Exit:
    pop      {r4,pc}
--- a/lib/arm/lib1funcs.S
+++ b/lib/arm/lib1funcs.S
@ -1,279 +0,0 @@
 /*
 * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines
 *
 * Author: Nicolas Pitre <nico@fluxnic.net>
 *   - contributed to gcc-3.4 on Sep 30, 2003
 *   - adapted for the Linux kernel on Oct 2, 2003
 */
 /* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc.
 This file is free software; you can redistribute it and/or modify it
 under the terms of the GNU General Public License as published by the
 Free Software Foundation; either version 2, or (at your option) any
 later version.
 In addition to the permissions in the GNU General Public License, the
 Free Software Foundation gives you unlimited permission to link the
 compiled version of this file into combinations with other programs,
 and to distribute those combinations without any restriction coming
 from the use of this file.  (The General Public License restrictions
 do apply in other respects; for example, they cover modification of
 the file, and distribution when not linked into a combine
 executable.)
 This file is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 General Public License for more details.
 You should have received a copy of the GNU General Public License
 along with this program; see the file COPYING.  If not, write to
 the Free Software Foundation, 59 Temple Place - Suite 330,
 Boston, MA 02111-1307, USA.  */
 #define UNWIND(x...)
 #define ENTRY(__f)			\
 	.align		3		;\
 	.globl		__f		;\
 	.type		__f,%function	;\
 __f:
 #define ENDPROC(__f)			;\
 	.size		__f, . - __f
 .macro ARM_DIV_BODY dividend, divisor, result, curbit
 	clz	\curbit, \divisor
 	clz	\result, \dividend
 	sub	\result, \curbit, \result
 	mov	\curbit, #1
 	mov	\divisor, \divisor, lsl \result
 	mov	\curbit, \curbit, lsl \result
 	mov	\result, #0
 	@ Division loop
 1:	cmp	\dividend, \divisor
 	subhs	\dividend, \dividend, \divisor
 	orrhs	\result,   \result,   \curbit
 	cmp	\dividend, \divisor,  lsr #1
 	subhs	\dividend, \dividend, \divisor, lsr #1
 	orrhs	\result,   \result,   \curbit,  lsr #1
 	cmp	\dividend, \divisor,  lsr #2
 	subhs	\dividend, \dividend, \divisor, lsr #2
 	orrhs	\result,   \result,   \curbit,  lsr #2
 	cmp	\dividend, \divisor,  lsr #3
 	subhs	\dividend, \dividend, \divisor, lsr #3
 	orrhs	\result,   \result,   \curbit,  lsr #3
 	cmp	\dividend, #0			@ Early termination?
 	movnes	\curbit,   \curbit,  lsr #4	@ No, any more bits to do?
 	movne	\divisor,  \divisor, lsr #4
 	bne	1b
 .endm
 .macro ARM_DIV2_ORDER divisor, order
 	clz	\order, \divisor
 	rsb	\order, \order, #31
 .endm
 .macro ARM_MOD_BODY dividend, divisor, order, spare
 	clz	\order, \divisor
 	clz	\spare, \dividend
 	sub	\order, \order, \spare
 	mov	\divisor, \divisor, lsl \order
 	@ Perform all needed substractions to keep only the reminder.
 	@ Do comparisons in batch of 4 first.
 	subs	\order, \order, #3		@ yes, 3 is intended here
 	blt	2f
 1:	cmp	\dividend, \divisor
 	subhs	\dividend, \dividend, \divisor
 	cmp	\dividend, \divisor,  lsr #1
 	subhs	\dividend, \dividend, \divisor, lsr #1
 	cmp	\dividend, \divisor,  lsr #2
 	subhs	\dividend, \dividend, \divisor, lsr #2
 	cmp	\dividend, \divisor,  lsr #3
 	subhs	\dividend, \dividend, \divisor, lsr #3
 	cmp	\dividend, #1
 	mov	\divisor, \divisor, lsr #4
 	subges	\order, \order, #4
 	bge	1b
 	tst	\order, #3
 	teqne	\dividend, #0
 	beq	5f
 	@ Either 1, 2 or 3 comparison/substractions are left.
 2:	cmn	\order, #2
 	blt	4f
 	beq	3f
 	cmp	\dividend, \divisor
 	subhs	\dividend, \dividend, \divisor
 	mov	\divisor,  \divisor,  lsr #1
 3:	cmp	\dividend, \divisor
 	subhs	\dividend, \dividend, \divisor
 	mov	\divisor,  \divisor,  lsr #1
 4:	cmp	\dividend, \divisor
 	subhs	\dividend, \dividend, \divisor
 5:
 .endm
 ENTRY(__aeabi_uidiv)
 ENTRY(__udivsi3)
 	subs	r2, r1, #1
 	moveq	pc, lr
 	bcc	Ldiv0
 	cmp	r0, r1
 	bls	11f
 	tst	r1, r2
 	beq	12f
 	ARM_DIV_BODY r0, r1, r2, r3
 	mov	r0, r2
 	mov	pc, lr
 11:	moveq	r0, #1
 	movne	r0, #0
 	mov	pc, lr
 12:	ARM_DIV2_ORDER r1, r2
 	mov	r0, r0, lsr r2
 	mov	pc, lr
 UNWIND(.fnend)
 ENDPROC(__udivsi3)
 ENDPROC(__aeabi_uidiv)
 ENTRY(__umodsi3)
 UNWIND(.fnstart)
 	subs	r2, r1, #1			@ compare divisor with 1
 	bcc	Ldiv0
 	cmpne	r0, r1				@ compare dividend with divisor
 	moveq   r0, #0
 	tsthi	r1, r2				@ see if divisor is power of 2
 	andeq	r0, r0, r2
 	movls	pc, lr
 	ARM_MOD_BODY r0, r1, r2, r3
 	mov	pc, lr
 UNWIND(.fnend)
 ENDPROC(__umodsi3)
 ENTRY(__divsi3)
 ENTRY(__aeabi_idiv)
 UNWIND(.fnstart)
 	cmp	r1, #0
 	eor	ip, r0, r1			@ save the sign of the result.
 	beq	Ldiv0
 	rsbmi	r1, r1, #0			@ loops below use unsigned.
 	subs	r2, r1, #1			@ division by 1 or -1 ?
 	beq	10f
 	movs	r3, r0
 	rsbmi	r3, r0, #0			@ positive dividend value
 	cmp	r3, r1
 	bls	11f
 	tst	r1, r2				@ divisor is power of 2 ?
 	beq	12f
 	ARM_DIV_BODY r3, r1, r0, r2
 	cmp	ip, #0
 	rsbmi	r0, r0, #0
 	mov	pc, lr
 10:	teq	ip, r0				@ same sign ?
 	rsbmi	r0, r0, #0
 	mov	pc, lr
 11:	movlo	r0, #0
 	moveq	r0, ip, asr #31
 	orreq	r0, r0, #1
 	mov	pc, lr
 12:	ARM_DIV2_ORDER r1, r2
 	cmp	ip, #0
 	mov	r0, r3, lsr r2
 	rsbmi	r0, r0, #0
 	mov	pc, lr
 UNWIND(.fnend)
 ENDPROC(__divsi3)
 ENDPROC(__aeabi_idiv)
 ENTRY(__modsi3)
 UNWIND(.fnstart)
 	cmp	r1, #0
 	beq	Ldiv0
 	rsbmi	r1, r1, #0			@ loops below use unsigned.
 	movs	ip, r0				@ preserve sign of dividend
 	rsbmi	r0, r0, #0			@ if negative make positive
 	subs	r2, r1, #1			@ compare divisor with 1
 	cmpne	r0, r1				@ compare dividend with divisor
 	moveq	r0, #0
 	tsthi	r1, r2				@ see if divisor is power of 2
 	andeq	r0, r0, r2
 	bls	10f
 	ARM_MOD_BODY r0, r1, r2, r3
 10:	cmp	ip, #0
 	rsbmi	r0, r0, #0
 	mov	pc, lr
 UNWIND(.fnend)
 ENDPROC(__modsi3)
 ENTRY(__aeabi_uidivmod)
 UNWIND(.fnstart)
 UNWIND(.save {r0, r1, ip, lr}	)
 	stmfd	sp!, {r0, r1, ip, lr}
 	bl	__aeabi_uidiv
 	ldmfd	sp!, {r1, r2, ip, lr}
 	mul	r3, r0, r2
 	sub	r1, r1, r3
 	mov	pc, lr
 UNWIND(.fnend)
 ENDPROC(__aeabi_uidivmod)
 ENTRY(__aeabi_idivmod)
 UNWIND(.fnstart)
 UNWIND(.save {r0, r1, ip, lr}	)
 	stmfd	sp!, {r0, r1, ip, lr}
 	bl	__aeabi_idiv
 	ldmfd	sp!, {r1, r2, ip, lr}
 	mul	r3, r0, r2
 	sub	r1, r1, r3
 	mov	pc, lr
 UNWIND(.fnend)
 ENDPROC(__aeabi_idivmod)
 Ldiv0:
 UNWIND(.fnstart)
 UNWIND(.pad #4)
 UNWIND(.save {lr})
 	str	lr, [sp, #-8]!
 	bl	__div0
 	mov	r0, #0			@ About as wrong as it could be.
 	ldr	pc, [sp], #8
 UNWIND(.fnend)
 ENDPROC(Ldiv0)
--- a/lib/arm/llsl.S
+++ b/lib/arm/llsl.S
@ -0,0 +1,41 @@
 #------------------------------------------------------------------------------
 #
 # Copyright (c) 2013, ARM. All rights reserved.<BR>
 #
 # This program and the accompanying materials
 # are licensed and made available under the terms and conditions of the BSD License
 # which accompanies this distribution.  The full text of the license may be found at
 # http://opensource.org/licenses/bsd-license.php
 #
 # THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
 # WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
 #
 #------------------------------------------------------------------------------
 #include "edk2asm.h"
 .text
 .align 2
 GCC_ASM_EXPORT(__aeabi_llsl)
 #
 #VOID
 #EFIAPI
 #__aeabi_llsl (
 # IN  VOID    *Destination,
 # IN  VOID    *Source,
 # IN  UINT32  Size
 # );
 #
 ASM_PFX(__aeabi_llsl):
    subs     r3,r2,#0x20
    bpl      1f
    rsb      r3,r2,#0x20
    lsl      r1,r1,r2
    orr      r1,r1,r0,lsr r3
    lsl      r0,r0,r2
    bx       lr
 1:
    lsl      r1,r0,r3
    mov      r0,#0
    bx       lr
--- a/lib/arm/llsr.S
+++ b/lib/arm/llsr.S
@ -0,0 +1,41 @@
 #------------------------------------------------------------------------------
 #
 # Copyright (c) 2013, ARM. All rights reserved.<BR>
 #
 # This program and the accompanying materials
 # are licensed and made available under the terms and conditions of the BSD License
 # which accompanies this distribution.  The full text of the license may be found at
 # http://opensource.org/licenses/bsd-license.php
 #
 # THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
 # WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
 #
 #------------------------------------------------------------------------------
 #include "edk2asm.h"
 .text
 .align 2
 GCC_ASM_EXPORT(__aeabi_llsr)
 #
 #VOID
 #EFIAPI
 #__aeabi_llsr (
 # IN  VOID    *Destination,
 # IN  VOID    *Source,
 # IN  UINT32  Size
 # );
 #
 ASM_PFX(__aeabi_llsr):
    subs     r3,r2,#0x20
    bpl      1f
    rsb      r3,r2,#0x20
    lsr      r0,r0,r2
    orr      r0,r0,r1,lsl r3
    lsr      r1,r1,r2
    bx       lr
 1:
    lsr      r0,r1,r3
    mov      r1,#0
    bx       lr
--- a/lib/arm/mullu.S
+++ b/lib/arm/mullu.S
@ -0,0 +1,33 @@
 #------------------------------------------------------------------------------
 #
 # Copyright (c) 2008 - 2009, Apple Inc. All rights reserved.<BR>
 #
 # This program and the accompanying materials
 # are licensed and made available under the terms and conditions of the BSD License
 # which accompanies this distribution.  The full text of the license may be found at
 # http://opensource.org/licenses/bsd-license.php
 #
 # THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
 # WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
 #
 #------------------------------------------------------------------------------
 #include "edk2asm.h"
 .text
 GCC_ASM_EXPORT(__aeabi_lmul)
 #
 #INT64
 #EFIAPI
 #__aeabi_lmul (
 #  IN INT64   Multiplicand
 #  IN INT64   Multiplier
 #  );
 #
 ASM_PFX(__aeabi_lmul):
  stmdb   sp!, {lr}
  mov     lr, r0
  umull   r0, ip, r2, lr
  mla     r1, r2, r1, ip
  mla     r1, r3, lr, r1
  ldmia   sp!, {pc}
--- a/lib/arm/uldiv.S
+++ b/lib/arm/uldiv.S
@ -0,0 +1,267 @@
 //------------------------------------------------------------------------------
 //
 // Copyright (c) 2008 - 2009, Apple Inc. All rights reserved.<BR>
 //
 // This program and the accompanying materials
 // are licensed and made available under the terms and conditions of the BSD License
 // which accompanies this distribution.  The full text of the license may be found at
 // http://opensource.org/licenses/bsd-license.php
 //
 // THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
 // WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
 //
 //------------------------------------------------------------------------------
 #include "edk2asm.h"
  .text
  .align 2
  GCC_ASM_EXPORT(__aeabi_uldivmod)
 //
 //UINT64
 //EFIAPI
 //__aeabi_uldivmod (
 //  IN  UINT64   Dividend
 //  IN  UINT64   Divisor
 //  )
 //
 ASM_PFX(__aeabi_uldivmod):
  stmdb   sp!, {r4, r5, r6, lr}
  mov     r4, r1
  mov     r5, r0
  mov     r6, #0  // 0x0
  orrs    ip, r3, r2, lsr #31
  bne     ASM_PFX(__aeabi_uldivmod_label1)
  tst     r2, r2
  beq     ASM_PFX(_ll_div0)
  movs    ip, r2, lsr #15
  addeq   r6, r6, #16     // 0x10
  mov     ip, r2, lsl r6
  movs    lr, ip, lsr #23
  moveq   ip, ip, lsl #8
  addeq   r6, r6, #8      // 0x8
  movs    lr, ip, lsr #27
  moveq   ip, ip, lsl #4
  addeq   r6, r6, #4      // 0x4
  movs    lr, ip, lsr #29
  moveq   ip, ip, lsl #2
  addeq   r6, r6, #2      // 0x2
  movs    lr, ip, lsr #30
  moveq   ip, ip, lsl #1
  addeq   r6, r6, #1      // 0x1
  b       ASM_PFX(_ll_udiv_small)
 ASM_PFX(__aeabi_uldivmod_label1):
  tst     r3, #-2147483648        // 0x80000000
  bne     ASM_PFX(__aeabi_uldivmod_label2)
  movs    ip, r3, lsr #15
  addeq   r6, r6, #16     // 0x10
  mov     ip, r3, lsl r6
  movs    lr, ip, lsr #23
  moveq   ip, ip, lsl #8
  addeq   r6, r6, #8      // 0x8
  movs    lr, ip, lsr #27
  moveq   ip, ip, lsl #4
  addeq   r6, r6, #4      // 0x4
  movs    lr, ip, lsr #29
  moveq   ip, ip, lsl #2
  addeq   r6, r6, #2      // 0x2
  movs    lr, ip, lsr #30
  addeq   r6, r6, #1      // 0x1
  rsb     r3, r6, #32     // 0x20
  moveq   ip, ip, lsl #1
  orr     ip, ip, r2, lsr r3
  mov     lr, r2, lsl r6
  b       ASM_PFX(_ll_udiv_big)
 ASM_PFX(__aeabi_uldivmod_label2):
  mov     ip, r3
  mov     lr, r2
  b       ASM_PFX(_ll_udiv_ginormous)
 ASM_PFX(_ll_udiv_small):
  cmp     r4, ip, lsl #1
  mov     r3, #0  // 0x0
  subcs   r4, r4, ip, lsl #1
  addcs   r3, r3, #2      // 0x2
  cmp     r4, ip
  subcs   r4, r4, ip
  adcs    r3, r3, #0      // 0x0
  add     r2, r6, #32     // 0x20
  cmp     r2, #32 // 0x20
  rsb     ip, ip, #0      // 0x0
  bcc     ASM_PFX(_ll_udiv_small_label1)
  orrs    r0, r4, r5, lsr #30
  moveq   r4, r5
  moveq   r5, #0  // 0x0
  subeq   r2, r2, #32     // 0x20
 ASM_PFX(_ll_udiv_small_label1):
  mov     r1, #0  // 0x0
  cmp     r2, #16 // 0x10
  bcc     ASM_PFX(_ll_udiv_small_label2)
  movs    r0, r4, lsr #14
  moveq   r4, r4, lsl #16
  addeq   r1, r1, #16     // 0x10
 ASM_PFX(_ll_udiv_small_label2):
  sub     lr, r2, r1
  cmp     lr, #8  // 0x8
  bcc     ASM_PFX(_ll_udiv_small_label3)
  movs    r0, r4, lsr #22
  moveq   r4, r4, lsl #8
  addeq   r1, r1, #8      // 0x8
 ASM_PFX(_ll_udiv_small_label3):
  rsb     r0, r1, #32     // 0x20
  sub     r2, r2, r1
  orr     r4, r4, r5, lsr r0
  mov     r5, r5, lsl r1
  cmp     r2, #1  // 0x1
  bcc     ASM_PFX(_ll_udiv_small_label5)
  sub     r2, r2, #1      // 0x1
  and     r0, r2, #7      // 0x7
  eor     r0, r0, #7      // 0x7
  adds    r0, r0, r0, lsl #1
  add     pc, pc, r0, lsl #2
  nop                     // (mov r0,r0)
 ASM_PFX(_ll_udiv_small_label4):
  adcs    r5, r5, r5
  adcs    r4, ip, r4, lsl #1
  rsbcc   r4, ip, r4
  adcs    r5, r5, r5
  adcs    r4, ip, r4, lsl #1
  rsbcc   r4, ip, r4
  adcs    r5, r5, r5
  adcs    r4, ip, r4, lsl #1
  rsbcc   r4, ip, r4
  adcs    r5, r5, r5
  adcs    r4, ip, r4, lsl #1
  rsbcc   r4, ip, r4
  adcs    r5, r5, r5
  adcs    r4, ip, r4, lsl #1
  rsbcc   r4, ip, r4
  adcs    r5, r5, r5
  adcs    r4, ip, r4, lsl #1
  rsbcc   r4, ip, r4
  adcs    r5, r5, r5
  adcs    r4, ip, r4, lsl #1
  rsbcc   r4, ip, r4
  adcs    r5, r5, r5
  adcs    r4, ip, r4, lsl #1
  sub     r2, r2, #8      // 0x8
  tst     r2, r2
  rsbcc   r4, ip, r4
  bpl     ASM_PFX(_ll_udiv_small_label4)
 ASM_PFX(_ll_udiv_small_label5):
  mov     r2, r4, lsr r6
  bic     r4, r4, r2, lsl r6
  adcs    r0, r5, r5
  adc     r1, r4, r4
  add     r1, r1, r3, lsl r6
  mov     r3, #0  // 0x0
  ldmia   sp!, {r4, r5, r6, pc}
 ASM_PFX(_ll_udiv_big):
  subs    r0, r5, lr
  mov     r3, #0  // 0x0
  sbcs    r1, r4, ip
  movcs   r5, r0
  movcs   r4, r1
  adcs    r3, r3, #0      // 0x0
  subs    r0, r5, lr
  sbcs    r1, r4, ip
  movcs   r5, r0
  movcs   r4, r1
  adcs    r3, r3, #0      // 0x0
  subs    r0, r5, lr
  sbcs    r1, r4, ip
  movcs   r5, r0
  movcs   r4, r1
  adcs    r3, r3, #0      // 0x0
  mov     r1, #0  // 0x0
  rsbs    lr, lr, #0      // 0x0
  rsc     ip, ip, #0      // 0x0
  cmp     r6, #16 // 0x10
  bcc     ASM_PFX(_ll_udiv_big_label1)
  movs    r0, r4, lsr #14
  moveq   r4, r4, lsl #16
  addeq   r1, r1, #16     // 0x10
 ASM_PFX(_ll_udiv_big_label1):
  sub     r2, r6, r1
  cmp     r2, #8  // 0x8
  bcc     ASM_PFX(_ll_udiv_big_label2)
  movs    r0, r4, lsr #22
  moveq   r4, r4, lsl #8
  addeq   r1, r1, #8      // 0x8
 ASM_PFX(_ll_udiv_big_label2):
  rsb     r0, r1, #32     // 0x20
  sub     r2, r6, r1
  orr     r4, r4, r5, lsr r0
  mov     r5, r5, lsl r1
  cmp     r2, #1  // 0x1
  bcc     ASM_PFX(_ll_udiv_big_label4)
  sub     r2, r2, #1      // 0x1
  and     r0, r2, #3      // 0x3
  rsb     r0, r0, #3      // 0x3
  adds    r0, r0, r0, lsl #1
  add     pc, pc, r0, lsl #3
  nop                     // (mov r0,r0)
 ASM_PFX(_ll_udiv_big_label3):
  adcs    r5, r5, r5
  adcs    r4, r4, r4
  adcs    r0, lr, r5
  adcs    r1, ip, r4
  movcs   r5, r0
  movcs   r4, r1
  adcs    r5, r5, r5
  adcs    r4, r4, r4
  adcs    r0, lr, r5
  adcs    r1, ip, r4
  movcs   r5, r0
  movcs   r4, r1
  adcs    r5, r5, r5
  adcs    r4, r4, r4
  adcs    r0, lr, r5
  adcs    r1, ip, r4
  movcs   r5, r0
  movcs   r4, r1
  sub     r2, r2, #4      // 0x4
  adcs    r5, r5, r5
  adcs    r4, r4, r4
  adcs    r0, lr, r5
  adcs    r1, ip, r4
  tst     r2, r2
  movcs   r5, r0
  movcs   r4, r1
  bpl     ASM_PFX(_ll_udiv_big_label3)
 ASM_PFX(_ll_udiv_big_label4):
  mov     r1, #0  // 0x0
  mov     r2, r5, lsr r6
  bic     r5, r5, r2, lsl r6
  adcs    r0, r5, r5
  adc     r1, r1, #0      // 0x0
  movs    lr, r3, lsl r6
  mov     r3, r4, lsr r6
  bic     r4, r4, r3, lsl r6
  adc     r1, r1, #0      // 0x0
  adds    r0, r0, lr
  orr     r2, r2, r4, ror r6
  adc     r1, r1, #0      // 0x0
  ldmia   sp!, {r4, r5, r6, pc}
 ASM_PFX(_ll_udiv_ginormous):
  subs    r2, r5, lr
  mov     r1, #0  // 0x0
  sbcs    r3, r4, ip
  adc     r0, r1, r1
  movcc   r2, r5
  movcc   r3, r4
  ldmia   sp!, {r4, r5, r6, pc}
 ASM_PFX(_ll_div0):
  ldmia   sp!, {r4, r5, r6, lr}
  mov     r0, #0  // 0x0
  mov     r1, #0  // 0x0
  b       ASM_PFX(__aeabi_ldiv0)
 ASM_PFX(__aeabi_ldiv0):
  bx      r14