ARM: Use local versions of libgcc functions instead of linking against libgcc.

The flags used to compile libgcc may make it incompatible with the code it's linked against, and/or the hardware it's going to run on. Rather than try to tease the right libgcc from the compiler, lets just leave it out and use our own implementations of the necessary functions. Most of these implementations were taken from the Linux kernel, except for uldivmod.S which was taken from a CL originally written for U-Boot by Che-Liang Chiou in December of 2010. It was modified to not use the CLZ instruction on machines that don't have it, anything earlier than ARMv5. The top block was taken from an earlier version of the same CL which didn't use CLZ in that spot. The later block was written from scratch. BUG=None TEST=Built and booted into the bootblock on nyan. Ran a series of tests which divided and modded a 64 bit value by various 32 bit values which were powers of 2. Confirmed that this function was used and that the returned value was correct. Printed decimal and hex versions of some values and verified that they equaled each other. Built and booted on pit with serial enabled. BRANCH=None Original-Change-Id: I7527e28af411b7aa7f94579be95a6b352a91a224 Original-Signed-off-by: Gabe Black <gabeblack@google.com> Original-Reviewed-on: https://chromium-review.googlesource.com/172401 Original-Reviewed-by: David Hendricks <dhendrix@chromium.org> Original-Commit-Queue: Gabe Black <gabeblack@chromium.org> Original-Tested-by: Gabe Black <gabeblack@chromium.org> (cherry picked from commit be8c7a8f3292a7d7651b7c6dafc9a2c53afbd402) *** This second patch is cherry-picked and squashed again to *** pick up the libgcc changes that were skipped previously. arm: Move libgcc assembly macros to arch/asm.h libgcc/macros.h contains some useful assembly macros that are common in Linux kernel code and facilitate things such as unified ARM/THUMB assembly. This patch moves it to a more general place where it can be used by other code as well. BUG=None TEST=Snow still boots. Original-Change-Id: If68e8930aaafa706c54cf9a156fac826b31bb193 Original-Signed-off-by: Julius Werner <jwerner@chromium.org> Original-Reviewed-on: https://chromium-review.googlesource.com/182178 Original-Reviewed-by: Vincent Palatin <vpalatin@chromium.org> (cherry picked from commit a780670def94a969829811fa8cf257f12b88f085) *** Additional changes for stage specific builds Signed-off-by: Marc Jones <marc.jones@se-eng.com> Change-Id: Ie3e48f34ebf6fbe20c3dd76ecbcbea7844e9466e Reviewed-on: http://review.coreboot.org/7322 Tested-by: build bot (Jenkins) Reviewed-by: Stefan Reinauer <stefan.reinauer@coreboot.org>
2013-10-08 18:24:10 -07:00 · 2013-10-08 18:24:10 -07:00 · 169c0df6b8
parent 44bc11c3eb
commit 169c0df6b8
10 changed files with 1206 additions and 8 deletions
--- a/src/arch/arm/Makefile.inc
+++ b/src/arch/arm/Makefile.inc
@ -2,7 +2,7 @@
 ##
 ## This file is part of the coreboot project.
 ##
-## Copyright (C) 2012 The ChromiumOS Authors
+## Copyright (C) 2012-2013 The ChromiumOS Authors
 ## Copyright (C) 2012 Alexandru Gagniuc <mr.nuke.me@gmail.com>
 ## Copyright (C) 2009-2010 coresystems GmbH
 ## Copyright (C) 2009 Ronald G. Minnich
@ -23,6 +23,7 @@
 ###############################################################################
 # Take care of subdirectories
 ###############################################################################
+subdirs-y += libgcc/
 subdirs-y += armv4/ armv7/

 ###############################################################################
@ -58,13 +59,14 @@ bootblock-y += eabi_compat.c
 bootblock-y += memset.S
 bootblock-y += memcpy.S
 bootblock-y += memmove.S
+bootblock-y += div0.c

 $(objcbfs)/bootblock.debug: $(src)/arch/arm/bootblock.ld $(obj)/ldoptions $$(bootblock-objs)
 	@printf "    LINK       $(subst $(obj)/,,$(@))\n"
 ifeq ($(CONFIG_COMPILER_LLVM_CLANG),y)
 	$(LD_bootblock) -m armelf_linux_eabi --gc-sections -static -o $@ -L$(obj) $< -T $(src)/arch/arm/bootblock.ld
 else
-	$(CC_bootblock) $(CFLAGS_bootblock) -nostartfiles -Wl,--gc-sections -static -o $@ -L$(obj) -T $(src)/arch/arm/bootblock.ld -Wl,--start-group $(bootblock-objs) $(LIBGCC_FILE_NAME_bootblock) -Wl,--end-group
+	$(CC_bootblock) $(CFLAGS_bootblock) -nostartfiles -Wl,--gc-sections -static -o $@ -L$(obj) -T $(src)/arch/arm/bootblock.ld -Wl,--start-group $(bootblock-objs) -Wl,--end-group
 endif

 endif # CONFIG_ARCH_BOOTBLOCK_ARM
@ -89,7 +91,7 @@ $(objcbfs)/romstage.debug: $$(romstage-objs) $(src)/arch/arm/romstage.ld $(obj)/
 ifeq ($(CONFIG_COMPILER_LLVM_CLANG),y)
 	$(LD_romstage) -nostdlib -nostartfiles --gc-sections -static -o $@ -L$(obj) $(romstage-objs) -T $(src)/arch/arm/romstage.ld
 else
-	$(CC_romstage) $(CFLAGS_romstage) -nostartfiles -Wl,--gc-sections -static -o $@ -L$(obj) -T $(src)/arch/arm/romstage.ld -Wl,--start-group $(romstage-objs) $(LIBGCC_FILE_NAME_romstage) -Wl,--end-group
+	$(CC_romstage) $(CFLAGS_romstage) -nostartfiles -Wl,--gc-sections -static -o $@ -L$(obj) -T $(src)/arch/arm/romstage.ld -Wl,--start-group $(romstage-objs) -Wl,--end-group
 endif

 endif # CONFIG_ARCH_ROMSTAGE_ARM
@ -111,20 +113,20 @@ ramstage-y += memcpy.S
 ramstage-y += memmove.S
 ramstage-srcs += $(wildcard src/mainboard/$(MAINBOARDDIR)/mainboard.c)

-$(objcbfs)/ramstage.debug: $$(ramstage-objs) $(LIBGCC_FILE_NAME_ramstage) $(src)/arch/arm/ramstage.ld $(obj)/ldoptions
+$(objcbfs)/ramstage.debug: $$(ramstage-objs) $(src)/arch/arm/ramstage.ld $(obj)/ldoptions
 	@printf "    CC         $(subst $(obj)/,,$(@))\n"
 ifeq ($(CONFIG_COMPILER_LLVM_CLANG),y)
 	$(LD_ramstage) -m armelf_linux_eabi --gc-sections -o $@ -L$(obj) $< -T $(src)/arch/arm/ramstage.ld
 else
-	$(CC_ramstage) $(CFLAGS_ramstage) -nostdlib -Wl,--gc-sections -static -o $@ -L$(obj) -Wl,--start-group $(ramstage-objs) $(LIBGCC_FILE_NAME_ramstage) -Wl,--end-group -T $(src)/arch/arm/ramstage.ld
+	$(CC_ramstage) $(CFLAGS_ramstage) -nostdlib -Wl,--gc-sections -static -o $@ -L$(obj) -Wl,--start-group $(ramstage-objs) -Wl,--end-group -T $(src)/arch/arm/ramstage.ld
 endif

-$(objgenerated)/ramstage.o: $(stages_o) $$(ramstage-objs) $(LIBGCC_FILE_NAME_ramstage)
+$(objgenerated)/ramstage.o: $(stages_o) $$(ramstage-objs)
 	@printf "    CC         $(subst $(obj)/,,$(@))\n"
 ifeq ($(CONFIG_COMPILER_LLVM_CLANG),y)
-	$(LD_ramstage) -m -m armelf_linux_eabi --gc-sections -r -o $@ --wrap __divdi3 --wrap __udivdi3 --wrap __moddi3 --wrap __umoddi3 --wrap __uidiv --start-group $(ramstage-objs) $(LIBGCC_FILE_NAME_ramstage) --end-group
+	$(LD_ramstage) -m -m armelf_linux_eabi --gc-sections -r -o $@ --wrap __divdi3 --wrap __udivdi3 --wrap __moddi3 --wrap __umoddi3 --wrap __uidiv --start-group $(ramstage-objs) --end-group
 else
-	$(CC_ramstage) $(CFLAGS_ramstage) $(CPPFLAGS_ramstage) -nostdlib -Wl,--gc-sections -r -o $@ -Wl,--start-group $(stages_o) $(ramstage-objs) $(LIBGCC_FILE_NAME_ramstage) -Wl,--end-group
+	$(CC_ramstage) $(CFLAGS_ramstage) $(CPPFLAGS_ramstage) -nostdlib -Wl,--gc-sections -r -o $@ -Wl,--start-group $(stages_o) $(ramstage-objs) -Wl,--end-group
 endif

 endif # CONFIG_ARCH_RAMSTAGE_ARM
--- a/src/arch/arm/libgcc/Makefile.inc
+++ b/src/arch/arm/libgcc/Makefile.inc
@ -0,0 +1,34 @@
+################################################################################
+##
+## This file is part of the coreboot project.
+##
+## Copyright (C) 2013 The ChromiumOS Authors
+##
+## This program is free software; you can redistribute it and/or modify
+## it under the terms of the GNU General Public License as published by
+## the Free Software Foundation; version 2 of the License.
+##
+## This program is distributed in the hope that it will be useful,
+## but WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+## GNU General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License
+## along with this program; if not, write to the Free Software
+## Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+##
+################################################################################
+
+libgcc_files = ashldi3.S lib1funcs.S lshrdi3.S muldi3.S ucmpdi2.S uldivmod.S
+
+ifeq ($(CONFIG_ARCH_BOOTBLOCK_ARM),y)
+bootblock-y += $(libgcc_files)
+endif
+
+ifeq ($(CONFIG_ARCH_ROMSTAGE_ARM),y)
+romstage-y += $(libgcc_files)
+endif
+
+ifeq ($(CONFIG_ARCH_RAMSTAGE_ARM),y)
+ramstage-y += $(libgcc_files)
+endif
--- a/src/arch/arm/libgcc/ashldi3.S
+++ b/src/arch/arm/libgcc/ashldi3.S
@ -0,0 +1,57 @@
+/* Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005
+   Free Software Foundation, Inc.
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 2, or (at your option) any
+later version.
+
+In addition to the permissions in the GNU General Public License, the
+Free Software Foundation gives you unlimited permission to link the
+compiled version of this file into combinations with other programs,
+and to distribute those combinations without any restriction coming
+from the use of this file.  (The General Public License restrictions
+do apply in other respects; for example, they cover modification of
+the file, and distribution when not linked into a combine
+executable.)
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; see the file COPYING.  If not, write to
+the Free Software Foundation, 51 Franklin Street, Fifth Floor,
+Boston, MA 02110-1301, USA.  */
+
+
+#if defined __GNUC__
+
+#include <arch/asm.h>
+
+#ifdef __ARMEB__
+#define al r1
+#define ah r0
+#else
+#define al r0
+#define ah r1
+#endif
+
+ENTRY(__ashldi3)
+ENTRY(__aeabi_llsl)
+
+	subs	r3, r2, #32
+	rsb	ip, r2, #32
+	movmi	ah, ah, lsl r2
+	movpl	ah, al, lsl r3
+ ARM(	orrmi	ah, ah, al, lsr ip	)
+ THUMB(	lsrmi	r3, al, ip		)
+ THUMB(	orrmi	ah, ah, r3		)
+	mov	al, al, lsl r2
+	mov	pc, lr
+
+ENDPROC(__ashldi3)
+ENDPROC(__aeabi_llsl)
+
+#endif
--- a/src/arch/arm/libgcc/bpabi.c
+++ b/src/arch/arm/libgcc/bpabi.c
@ -0,0 +1,214 @@
+/* Miscellaneous BPABI functions.
+
+   Copyright (C) 2003, 2004  Free Software Foundation, Inc.
+   Contributed by CodeSourcery, LLC.
+
+   This file is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by the
+   Free Software Foundation; either version 2, or (at your option) any
+   later version.
+
+   In addition to the permissions in the GNU General Public License, the
+   Free Software Foundation gives you unlimited permission to link the
+   compiled version of this file into combinations with other programs,
+   and to distribute those combinations without any restriction coming
+   from the use of this file.  (The General Public License restrictions
+   do apply in other respects; for example, they cover modification of
+   the file, and distribution when not linked into a combine
+   executable.)
+
+   This file is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; see the file COPYING.  If not, write to
+   the Free Software Foundation, 51 Franklin Street, Fifth Floor,
+   Boston, MA 02110-1301, USA.  */
+
+#if defined __GNUC__
+
+#include <stdint.h>
+
+uint64_t __udivmoddi4(uint64_t n, uint64_t d, uint64_t *rp);
+extern int64_t __divdi3(int64_t, int64_t);
+extern uint64_t __udivdi3(uint64_t, uint64_t);
+extern int64_t __gnu_ldivmod_helper(int64_t, int64_t, int64_t *);
+extern uint64_t __gnu_uldivmod_helper(uint64_t, uint64_t, uint64_t *);
+
+typedef union
+{
+	struct {
+		int32_t low;
+		int32_t high;
+	} s;
+	int64_t ll;
+} DWunion;
+
+uint64_t
+__udivmoddi4(uint64_t n, uint64_t d, uint64_t *rp)
+{
+	const DWunion nn = {.ll = n};
+	const DWunion dd = {.ll = d};
+	DWunion rr;
+	uint32_t d0, d1, n0, n1, n2;
+	uint32_t q0, q1;
+	uint32_t b, bm;
+
+	d0 = dd.s.low;
+	d1 = dd.s.high;
+	n0 = nn.s.low;
+	n1 = nn.s.high;
+
+	if (d1 == 0) {
+		if (d0 > n1) {
+			/* 0q = nn / 0D */
+			udiv_qrnnd(q0, n0, n1, n0, d0);
+			q1 = 0;
+			/* Remainder in n0. */
+		} else {
+			/* qq = NN / 0d */
+			if (d0 == 0)
+				d0 = 1 / d0; /* Divide intentionally by zero. */
+
+			udiv_qrnnd(q1, n1, 0, n1, d0);
+			udiv_qrnnd(q0, n0, n1, n0, d0);
+
+			/* Remainder in n0. */
+		}
+
+		if (rp != 0) {
+			rr.s.low = n0;
+			rr.s.high = 0;
+			*rp = rr.ll;
+		}
+	} else {
+		if (d1 > n1) {
+			/* 00 = nn / DD */
+			q0 = 0;
+			q1 = 0;
+
+			/* Remainder in n1n0. */
+			if (rp != 0) {
+				rr.s.low = n0;
+				rr.s.high = n1;
+				*rp = rr.ll;
+			}
+		} else {
+			/* 0q = NN / dd */
+
+			count_leading_zeros(bm, d1);
+			if (bm == 0) {
+				/* From (n1 >= d1) /\ (the most significant
+				   bit of d1 is set), conclude (the most
+				   significant bit of n1 is set) /\ (the
+				   quotient digit q0 = 0 or 1).
+
+				   This special case is necessary, not an
+				   optimization. */
+
+				/* The condition on the next line takes
+				   advantage of that n1 >= d1 (true due to
+				   program flow). */
+				if (n1 > d1 || n0 >= d0) {
+					q0 = 1;
+					sub_ddmmss(n1, n0, n1, n0, d1, d0);
+				} else
+					q0 = 0;
+
+				q1 = 0;
+
+				if (rp != 0) {
+					rr.s.low = n0;
+					rr.s.high = n1;
+					*rp = rr.ll;
+				}
+			} else {
+				uint32_t m1, m0;
+				/* Normalize.  */
+
+				b = 32 - bm;
+
+				d1 = (d1 << bm) | (d0 >> b);
+				d0 = d0 << bm;
+				n2 = n1 >> b;
+				n1 = (n1 << bm) | (n0 >> b);
+				n0 = n0 << bm;
+
+				udiv_qrnnd(q0, n1, n2, n1, d1);
+				umul_ppmm(m1, m0, q0, d0);
+
+				if (m1 > n1 || (m1 == n1 && m0 > n0)) {
+					q0--;
+					sub_ddmmss(m1, m0, m1, m0, d1, d0);
+				}
+
+				q1 = 0;
+
+				/* Remainder in (n1n0 - m1m0) >> bm. */
+				if (rp != 0) {
+					sub_ddmmss(n1, n0, n1, n0, m1, m0);
+					rr.s.low = (n1 << b) | (n0 >> bm);
+					rr.s.high = n1 >> bm;
+					*rp = rr.ll;
+				}
+			}
+		}
+	}
+
+	const DWunion ww = {{.low = q0, .high = q1}};
+	return ww.ll;
+}
+
+int64_t
+__divdi3(int64_t u, int64_t v)
+{
+	int32_t c = 0;
+	DWunion uu = {.ll = u};
+	DWunion vv = {.ll = v};
+	int64_t w;
+
+	if (uu.s.high < 0) {
+		c = ~c;
+		uu.ll = -uu.ll;
+	}
+	if (vv.s.high < 0) {
+		c = ~c;
+		vv.ll = -vv.ll;
+	}
+
+	w = __udivmoddi4(uu.ll, vv.ll, (uint64_t *)0);
+	if (c)
+		w = -w;
+
+	return w;
+}
+
+int64_t
+__gnu_ldivmod_helper (int64_t a, int64_t b, int64_t *remainder)
+{
+	int64_t quotient;
+
+	quotient = __divdi3(a, b);
+	*remainder = a - b * quotient;
+	return quotient;
+}
+
+uint64_t
+__udivdi3(uint64_t n, uint64_t d)
+{
+	return __udivmoddi4(n, d, (uint64_t *)0);
+}
+
+uint64_t
+__gnu_uldivmod_helper(uint64_t a, uint64_t b, uint64_t *remainder)
+{
+	uint64_t quotient;
+
+	quotient = __udivdi3(a, b);
+	*remainder = a - b * quotient;
+	return quotient;
+}
+
+#endif
--- a/src/arch/arm/libgcc/bpabi_asm.S
+++ b/src/arch/arm/libgcc/bpabi_asm.S
@ -0,0 +1,130 @@
+/* Miscellaneous BPABI functions.
+
+   Copyright (C) 2003, 2004, 2007, 2008  Free Software Foundation, Inc.
+   Contributed by CodeSourcery, LLC.
+
+   This file is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by the
+   Free Software Foundation; either version 2, or (at your option) any
+   later version.
+
+   In addition to the permissions in the GNU General Public License, the
+   Free Software Foundation gives you unlimited permission to link the
+   compiled version of this file into combinations with other programs,
+   and to distribute those combinations without any restriction coming
+   from the use of this file.  (The General Public License restrictions
+   do apply in other respects; for example, they cover modification of
+   the file, and distribution when not linked into a combine
+   executable.)
+
+   This file is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; see the file COPYING.  If not, write to
+   the Free Software Foundation, 51 Franklin Street, Fifth Floor,
+   Boston, MA 02110-1301, USA.  */
+
+
+#include <arch/asm.h>
+
+#ifdef __ARMEB__
+#define xxh r0
+#define xxl r1
+#define yyh r2
+#define yyl r3
+#else
+#define xxh r1
+#define xxl r0
+#define yyh r3
+#define yyl r2
+#endif
+
+#if defined __thumb2__
+
+.macro do_it cond
+	it	\cond
+.endm
+#define do_push push
+#define do_pop pop
+
+#else
+
+.macro do_it cond
+.endm
+#define do_push stmfd sp!,
+#define do_pop ldmfd sp!,
+
+#endif
+
+ENTRY(__aeabi_lcmp)
+
+	cmp	xxh, yyh
+	do_it	lt
+	movlt	r0, #-1
+	do_it	gt
+	movgt	r0, #1
+	do_it	ne
+	movne	pc, lr
+	subs	r0, xxl, yyl
+	do_it	lo
+	movlo	r0, #-1
+	do_it	hi
+	movhi	r0, #1
+	mov	pc, lr
+
+ENDPROC(__aeabi_lcmp)
+
+ENTRY(__aeabi_ulcmp)
+
+	cmp	xxh, yyh
+	do_it	lo
+	movlo	r0, #-1
+	do_it	hi
+	movhi	r0, #1
+	do_it	ne
+	movne	pc, lr
+	cmp	xxl, yyl
+	do_it	lo
+	movlo	r0, #-1
+	do_it	hi
+	movhi	r0, #1
+	do_it	eq
+	moveq	r0, #0
+	mov	pc, lr
+
+ENDPROC(__aeabi_ulcmp)
+
+ENTRY(__aeabi_ldivmod)
+	sub sp, sp, #8
+#if defined(__thumb2__)
+	mov ip, sp
+	push {ip, lr}
+#else
+	do_push {sp, lr}
+#endif
+	bl __gnu_ldivmod_helper
+	ldr lr, [sp, #4]
+	add sp, sp, #8
+	do_pop {r2, r3}
+	mov	pc, lr
+
+ENDPROC(__aeabi_ldivmod)
+
+ENTRY(__aeabi_uldivmod)
+	sub sp, sp, #8
+#if defined(__thumb2__)
+	mov ip, sp
+	push {ip, lr}
+#else
+	do_push {sp, lr}
+#endif
+	bl __gnu_uldivmod_helper
+	ldr lr, [sp, #4]
+	add sp, sp, #8
+	do_pop {r2, r3}
+	mov	pc, lr
+
+ENDPROC(__aeabi_uldivmod)
--- a/src/arch/arm/libgcc/lib1funcs.S
+++ b/src/arch/arm/libgcc/lib1funcs.S
@ -0,0 +1,344 @@
+/*
+ * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines
+ *
+ * Author: Nicolas Pitre <nico@fluxnic.net>
+ *   - contributed to gcc-3.4 on Sep 30, 2003
+ *   - adapted for the Linux kernel on Oct 2, 2003
+ */
+
+/* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc.
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 2, or (at your option) any
+later version.
+
+In addition to the permissions in the GNU General Public License, the
+Free Software Foundation gives you unlimited permission to link the
+compiled version of this file into combinations with other programs,
+and to distribute those combinations without any restriction coming
+from the use of this file.  (The General Public License restrictions
+do apply in other respects; for example, they cover modification of
+the file, and distribution when not linked into a combine
+executable.)
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; see the file COPYING.  If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA.  */
+
+
+#if defined __GNUC__
+
+#include <arch/asm.h>
+
+.macro ARM_DIV_BODY dividend, divisor, result, curbit
+
+#if __COREBOOT_ARM_ARCH__ >= 5
+
+	clz	\curbit, \divisor
+	clz	\result, \dividend
+	sub	\result, \curbit, \result
+	mov	\curbit, #1
+	mov	\divisor, \divisor, lsl \result
+	mov	\curbit, \curbit, lsl \result
+	mov	\result, #0
+
+#else
+
+	@ Initially shift the divisor left 3 bits if possible,
+	@ set curbit accordingly.  This allows for curbit to be located
+	@ at the left end of each 4 bit nibbles in the division loop
+	@ to save one loop in most cases.
+	tst	\divisor, #0xe0000000
+	moveq	\divisor, \divisor, lsl #3
+	moveq	\curbit, #8
+	movne	\curbit, #1
+
+	@ Unless the divisor is very big, shift it up in multiples of
+	@ four bits, since this is the amount of unwinding in the main
+	@ division loop.  Continue shifting until the divisor is
+	@ larger than the dividend.
+1:	cmp	\divisor, #0x10000000
+	cmplo	\divisor, \dividend
+	movlo	\divisor, \divisor, lsl #4
+	movlo	\curbit, \curbit, lsl #4
+	blo	1b
+
+	@ For very big divisors, we must shift it a bit at a time, or
+	@ we will be in danger of overflowing.
+1:	cmp	\divisor, #0x80000000
+	cmplo	\divisor, \dividend
+	movlo	\divisor, \divisor, lsl #1
+	movlo	\curbit, \curbit, lsl #1
+	blo	1b
+
+	mov	\result, #0
+
+#endif
+
+	@ Division loop
+1:	cmp	\dividend, \divisor
+	subhs	\dividend, \dividend, \divisor
+	orrhs	\result,   \result,   \curbit
+	cmp	\dividend, \divisor,  lsr #1
+	subhs	\dividend, \dividend, \divisor, lsr #1
+	orrhs	\result,   \result,   \curbit,  lsr #1
+	cmp	\dividend, \divisor,  lsr #2
+	subhs	\dividend, \dividend, \divisor, lsr #2
+	orrhs	\result,   \result,   \curbit,  lsr #2
+	cmp	\dividend, \divisor,  lsr #3
+	subhs	\dividend, \dividend, \divisor, lsr #3
+	orrhs	\result,   \result,   \curbit,  lsr #3
+	cmp	\dividend, #0			@ Early termination?
+	movnes	\curbit,   \curbit,  lsr #4	@ No, any more bits to do?
+	movne	\divisor,  \divisor, lsr #4
+	bne	1b
+
+.endm
+
+
+.macro ARM_DIV2_ORDER divisor, order
+
+#if __COREBOOT_ARM_ARCH__ >= 5
+
+	clz	\order, \divisor
+	rsb	\order, \order, #31
+
+#else
+
+	cmp	\divisor, #(1 << 16)
+	movhs	\divisor, \divisor, lsr #16
+	movhs	\order, #16
+	movlo	\order, #0
+
+	cmp	\divisor, #(1 << 8)
+	movhs	\divisor, \divisor, lsr #8
+	addhs	\order, \order, #8
+
+	cmp	\divisor, #(1 << 4)
+	movhs	\divisor, \divisor, lsr #4
+	addhs	\order, \order, #4
+
+	cmp	\divisor, #(1 << 2)
+	addhi	\order, \order, #3
+	addls	\order, \order, \divisor, lsr #1
+
+#endif
+
+.endm
+
+
+.macro ARM_MOD_BODY dividend, divisor, order, spare
+
+#if __COREBOOT_ARM_ARCH__ >= 5
+
+	clz	\order, \divisor
+	clz	\spare, \dividend
+	sub	\order, \order, \spare
+	mov	\divisor, \divisor, lsl \order
+
+#else
+
+	mov	\order, #0
+
+	@ Unless the divisor is very big, shift it up in multiples of
+	@ four bits, since this is the amount of unwinding in the main
+	@ division loop.  Continue shifting until the divisor is
+	@ larger than the dividend.
+1:	cmp	\divisor, #0x10000000
+	cmplo	\divisor, \dividend
+	movlo	\divisor, \divisor, lsl #4
+	addlo	\order, \order, #4
+	blo	1b
+
+	@ For very big divisors, we must shift it a bit at a time, or
+	@ we will be in danger of overflowing.
+1:	cmp	\divisor, #0x80000000
+	cmplo	\divisor, \dividend
+	movlo	\divisor, \divisor, lsl #1
+	addlo	\order, \order, #1
+	blo	1b
+
+#endif
+
+	@ Perform all needed substractions to keep only the reminder.
+	@ Do comparisons in batch of 4 first.
+	subs	\order, \order, #3		@ yes, 3 is intended here
+	blt	2f
+
+1:	cmp	\dividend, \divisor
+	subhs	\dividend, \dividend, \divisor
+	cmp	\dividend, \divisor,  lsr #1
+	subhs	\dividend, \dividend, \divisor, lsr #1
+	cmp	\dividend, \divisor,  lsr #2
+	subhs	\dividend, \dividend, \divisor, lsr #2
+	cmp	\dividend, \divisor,  lsr #3
+	subhs	\dividend, \dividend, \divisor, lsr #3
+	cmp	\dividend, #1
+	mov	\divisor, \divisor, lsr #4
+	subges	\order, \order, #4
+	bge	1b
+
+	tst	\order, #3
+	teqne	\dividend, #0
+	beq	5f
+
+	@ Either 1, 2 or 3 comparison/substractions are left.
+2:	cmn	\order, #2
+	blt	4f
+	beq	3f
+	cmp	\dividend, \divisor
+	subhs	\dividend, \dividend, \divisor
+	mov	\divisor,  \divisor,  lsr #1
+3:	cmp	\dividend, \divisor
+	subhs	\dividend, \dividend, \divisor
+	mov	\divisor,  \divisor,  lsr #1
+4:	cmp	\dividend, \divisor
+	subhs	\dividend, \dividend, \divisor
+5:
+.endm
+
+
+ENTRY(__udivsi3)
+ENTRY(__aeabi_uidiv)
+
+	subs	r2, r1, #1
+	moveq	pc, lr
+	bcc	Ldiv0
+	cmp	r0, r1
+	bls	11f
+	tst	r1, r2
+	beq	12f
+
+	ARM_DIV_BODY r0, r1, r2, r3
+
+	mov	r0, r2
+	mov	pc, lr
+
+11:	moveq	r0, #1
+	movne	r0, #0
+	mov	pc, lr
+
+12:	ARM_DIV2_ORDER r1, r2
+
+	mov	r0, r0, lsr r2
+	mov	pc, lr
+
+ENDPROC(__udivsi3)
+ENDPROC(__aeabi_uidiv)
+
+ENTRY(__umodsi3)
+
+	subs	r2, r1, #1			@ compare divisor with 1
+	bcc	Ldiv0
+	cmpne	r0, r1				@ compare dividend with divisor
+	moveq   r0, #0
+	tsthi	r1, r2				@ see if divisor is power of 2
+	andeq	r0, r0, r2
+	movls	pc, lr
+
+	ARM_MOD_BODY r0, r1, r2, r3
+
+	mov	pc, lr
+
+ENDPROC(__umodsi3)
+
+ENTRY(__divsi3)
+ENTRY(__aeabi_idiv)
+
+	cmp	r1, #0
+	eor	ip, r0, r1			@ save the sign of the result.
+	beq	Ldiv0
+	rsbmi	r1, r1, #0			@ loops below use unsigned.
+	subs	r2, r1, #1			@ division by 1 or -1 ?
+	beq	10f
+	movs	r3, r0
+	rsbmi	r3, r0, #0			@ positive dividend value
+	cmp	r3, r1
+	bls	11f
+	tst	r1, r2				@ divisor is power of 2 ?
+	beq	12f
+
+	ARM_DIV_BODY r3, r1, r0, r2
+
+	cmp	ip, #0
+	rsbmi	r0, r0, #0
+	mov	pc, lr
+
+10:	teq	ip, r0				@ same sign ?
+	rsbmi	r0, r0, #0
+	mov	pc, lr
+
+11:	movlo	r0, #0
+	moveq	r0, ip, asr #31
+	orreq	r0, r0, #1
+	mov	pc, lr
+
+12:	ARM_DIV2_ORDER r1, r2
+
+	cmp	ip, #0
+	mov	r0, r3, lsr r2
+	rsbmi	r0, r0, #0
+	mov	pc, lr
+
+ENDPROC(__divsi3)
+ENDPROC(__aeabi_idiv)
+
+ENTRY(__modsi3)
+
+	cmp	r1, #0
+	beq	Ldiv0
+	rsbmi	r1, r1, #0			@ loops below use unsigned.
+	movs	ip, r0				@ preserve sign of dividend
+	rsbmi	r0, r0, #0			@ if negative make positive
+	subs	r2, r1, #1			@ compare divisor with 1
+	cmpne	r0, r1				@ compare dividend with divisor
+	moveq	r0, #0
+	tsthi	r1, r2				@ see if divisor is power of 2
+	andeq	r0, r0, r2
+	bls	10f
+
+	ARM_MOD_BODY r0, r1, r2, r3
+
+10:	cmp	ip, #0
+	rsbmi	r0, r0, #0
+	mov	pc, lr
+
+ENDPROC(__modsi3)
+
+ENTRY(__aeabi_uidivmod)
+
+	stmfd	sp!, {r0, r1, ip, lr}
+	bl	__aeabi_uidiv
+	ldmfd	sp!, {r1, r2, ip, lr}
+	mul	r3, r0, r2
+	sub	r1, r1, r3
+	mov	pc, lr
+
+ENDPROC(__aeabi_uidivmod)
+
+ENTRY(__aeabi_idivmod)
+	stmfd	sp!, {r0, r1, ip, lr}
+	bl	__aeabi_idiv
+	ldmfd	sp!, {r1, r2, ip, lr}
+	mul	r3, r0, r2
+	sub	r1, r1, r3
+	mov	pc, lr
+
+ENDPROC(__aeabi_idivmod)
+
+
+Ldiv0:
+	str	lr, [sp, #-8]!
+	bl	__div0
+	mov	r0, #0			@ About as wrong as it could be.
+	ldr	pc, [sp], #8
+ENDPROC(Ldiv0)
+
+#endif
--- a/src/arch/arm/libgcc/lshrdi3.S
+++ b/src/arch/arm/libgcc/lshrdi3.S
@ -0,0 +1,57 @@
+/* Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005
+   Free Software Foundation, Inc.
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 2, or (at your option) any
+later version.
+
+In addition to the permissions in the GNU General Public License, the
+Free Software Foundation gives you unlimited permission to link the
+compiled version of this file into combinations with other programs,
+and to distribute those combinations without any restriction coming
+from the use of this file.  (The General Public License restrictions
+do apply in other respects; for example, they cover modification of
+the file, and distribution when not linked into a combine
+executable.)
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; see the file COPYING.  If not, write to
+the Free Software Foundation, 51 Franklin Street, Fifth Floor,
+Boston, MA 02110-1301, USA.  */
+
+
+#if defined __GNUC__
+
+#include <arch/asm.h>
+
+#ifdef __ARMEB__
+#define al r1
+#define ah r0
+#else
+#define al r0
+#define ah r1
+#endif
+
+ENTRY(__lshrdi3)
+ENTRY(__aeabi_llsr)
+
+	subs	r3, r2, #32
+	rsb	ip, r2, #32
+	movmi	al, al, lsr r2
+	movpl	al, ah, lsr r3
+ ARM(	orrmi	al, al, ah, lsl ip	)
+ THUMB(	lslmi	r3, ah, ip		)
+ THUMB(	orrmi	al, al, r3		)
+	mov	ah, ah, lsr r2
+	mov	pc, lr
+
+ENDPROC(__lshrdi3)
+ENDPROC(__aeabi_llsr)
+
+#endif
--- a/src/arch/arm/libgcc/muldi3.S
+++ b/src/arch/arm/libgcc/muldi3.S
@ -0,0 +1,51 @@
+/*
+ *  linux/arch/arm/lib/muldi3.S
+ *
+ *  Author:     Nicolas Pitre
+ *  Created:    Oct 19, 2005
+ *  Copyright:  Monta Vista Software, Inc.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+
+#if defined __GNUC__
+
+#include <arch/asm.h>
+
+#ifdef __ARMEB__
+#define xh r0
+#define xl r1
+#define yh r2
+#define yl r3
+#else
+#define xl r0
+#define xh r1
+#define yl r2
+#define yh r3
+#endif
+
+ENTRY(__muldi3)
+ENTRY(__aeabi_lmul)
+
+	mul	xh, yl, xh
+	mla	xh, xl, yh, xh
+	mov	ip, xl, lsr #16
+	mov	yh, yl, lsr #16
+	bic	xl, xl, ip, lsl #16
+	bic	yl, yl, yh, lsl #16
+	mla	xh, yh, ip, xh
+	mul	yh, xl, yh
+	mul	xl, yl, xl
+	mul	ip, yl, ip
+	adds	xl, xl, yh, lsl #16
+	adc	xh, xh, yh, lsr #16
+	adds	xl, xl, ip, lsl #16
+	adc	xh, xh, ip, lsr #16
+	mov	pc, lr
+
+ENDPROC(__muldi3)
+ENDPROC(__aeabi_lmul)
+
+#endif
--- a/src/arch/arm/libgcc/ucmpdi2.S
+++ b/src/arch/arm/libgcc/ucmpdi2.S
@ -0,0 +1,51 @@
+/*
+ *  linux/arch/arm/lib/ucmpdi2.S
+ *
+ *  Author:	Nicolas Pitre
+ *  Created:	Oct 19, 2005
+ *  Copyright:	Monta Vista Software, Inc.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+
+#if defined __GNUC__
+
+#include <arch/asm.h>
+
+#ifdef __ARMEB__
+#define xh r0
+#define xl r1
+#define yh r2
+#define yl r3
+#else
+#define xl r0
+#define xh r1
+#define yl r2
+#define yh r3
+#endif
+
+ENTRY(__ucmpdi2)
+
+	cmp	xh, yh
+	cmpeq	xl, yl
+	movlo	r0, #0
+	moveq	r0, #1
+	movhi	r0, #2
+	mov	pc, lr
+
+ENDPROC(__ucmpdi2)
+
+ENTRY(__aeabi_ulcmp)
+
+	cmp	xh, yh
+	cmpeq	xl, yl
+	movlo	r0, #-1
+	moveq	r0, #0
+	movhi	r0, #1
+	mov	pc, lr
+
+ENDPROC(__aeabi_ulcmp)
+
+#endif
--- a/src/arch/arm/libgcc/uldivmod.S
+++ b/src/arch/arm/libgcc/uldivmod.S
@ -0,0 +1,258 @@
+/*
+ * Copyright 2010, Google Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ */
+
+#include <arch/asm.h>
+
+/*
+ * A, Q = r0 + (r1 << 32)
+ * B, R = r2 + (r3 << 32)
+ * A / B = Q ... R
+ */
+
+A_0	.req	r0
+A_1	.req	r1
+B_0	.req	r2
+B_1	.req	r3
+C_0	.req	r4
+C_1	.req	r5
+D_0	.req	r6
+D_1	.req	r7
+
+Q_0	.req	r0
+Q_1	.req	r1
+R_0	.req	r2
+R_1	.req	r3
+
+ENTRY(__aeabi_uldivmod)
+	stmfd	sp!, {r4, r5, r6, r7, lr}
+	@ Test if B == 0
+	orrs	ip, B_0, B_1		@ Z set -> B == 0
+	beq	L_div_by_0
+	@ Test if B is power of 2: (B & (B - 1)) == 0
+	subs	C_0, B_0, #1
+	sbc	C_1, B_1, #0
+	tst	C_0, B_0
+	tsteq	B_1, C_1
+	beq	L_pow2
+	@ Test if A_1 == B_1 == 0
+	orrs	ip, A_1, B_1
+	beq	L_div_32_32
+
+L_div_64_64:
+/* CLZ only exists in ARM architecture version 5 and above. */
+#if __COREBOOT_ARM_ARCH__ >= 5
+	mov	C_0, #1
+	mov	C_1, #0
+	@ D_0 = clz A
+	teq	A_1, #0
+	clz	D_0, A_1
+	clzeq	ip, A_0
+	addeq	D_0, D_0, ip
+	@ D_1 = clz B
+	teq	B_1, #0
+	clz	D_1, B_1
+	clzeq	ip, B_0
+	addeq	D_1, D_1, ip
+	@ if clz B - clz A > 0
+	subs	D_0, D_1, D_0
+	bls	L_done_shift
+	@ B <<= (clz B - clz A)
+	subs	D_1, D_0, #32
+	rsb	ip, D_0, #32
+	movmi	B_1, B_1, lsl D_0
+	orrmi	B_1, B_1, B_0, lsr ip
+	movpl	B_1, B_0, lsl D_1
+	mov	B_0, B_0, lsl D_0
+	@ C = 1 << (clz B - clz A)
+	movmi	C_1, C_1, lsl D_0
+	orrmi	C_1, C_1, C_0, lsr ip
+	movpl	C_1, C_0, lsl D_1
+	mov	C_0, C_0, lsl D_0
+L_done_shift:
+	mov	D_0, #0
+	mov	D_1, #0
+	@ C: current bit; D: result
+#else
+	@ C: current bit; D: result
+	mov	C_0, #1
+	mov	C_1, #0
+	mov	D_0, #0
+	mov	D_1, #0
+L_lsl_4:
+	cmp	B_1, #0x10000000
+	cmpcc	B_1, A_1
+	cmpeq	B_0, A_0
+	bcs	L_lsl_1
+	@ B <<= 4
+	mov	B_1, B_1, lsl #4
+	orr	B_1, B_1, B_0, lsr #28
+	mov	B_0, B_0, lsl #4
+	@ C <<= 4
+	mov	C_1, C_1, lsl #4
+	orr	C_1, C_1, C_0, lsr #28
+	mov	C_0, C_0, lsl #4
+	b	L_lsl_4
+L_lsl_1:
+	cmp	B_1, #0x80000000
+	cmpcc	B_1, A_1
+	cmpeq	B_0, A_0
+	bcs	L_subtract
+	@ B <<= 1
+	mov	B_1, B_1, lsl #1
+	orr	B_1, B_1, B_0, lsr #31
+	mov	B_0, B_0, lsl #1
+	@ C <<= 1
+	mov	C_1, C_1, lsl #1
+	orr	C_1, C_1, C_0, lsr #31
+	mov	C_0, C_0, lsl #1
+	b	L_lsl_1
+#endif
+L_subtract:
+	@ if A >= B
+	cmp	A_1, B_1
+	cmpeq	A_0, B_0
+	bcc	L_update
+	@ A -= B
+	subs	A_0, A_0, B_0
+	sbc	A_1, A_1, B_1
+	@ D |= C
+	orr	D_0, D_0, C_0
+	orr	D_1, D_1, C_1
+L_update:
+	@ if A == 0: break
+	orrs	ip, A_1, A_0
+	beq	L_exit
+	@ C >>= 1
+	movs	C_1, C_1, lsr #1
+	movs	C_0, C_0, rrx
+	@ if C == 0: break
+	orrs	ip, C_1, C_0
+	beq	L_exit
+	@ B >>= 1
+	movs	B_1, B_1, lsr #1
+	mov	B_0, B_0, rrx
+	b	L_subtract
+L_exit:
+	@ Note: A, B & Q, R are aliases
+	mov	R_0, A_0
+	mov	R_1, A_1
+	mov	Q_0, D_0
+	mov	Q_1, D_1
+	ldmfd	sp!, {r4, r5, r6, r7, pc}
+
+L_div_32_32:
+	@ Note:	A_0 &	r0 are aliases
+	@	Q_1	r1
+	mov	r1, B_0
+	bl	__aeabi_uidivmod
+	mov	R_0, r1
+	mov	R_1, #0
+	mov	Q_1, #0
+	ldmfd	sp!, {r4, r5, r6, r7, pc}
+
+L_pow2:
+/* CLZ only exists in ARM architecture version 5 and above. */
+#if __COREBOOT_ARM_ARCH__ >= 5
+	@ Note: A, B and Q, R are aliases
+	@ R = A & (B - 1)
+	and	C_0, A_0, C_0
+	and	C_1, A_1, C_1
+	@ Q = A >> log2(B)
+	@ Note: B must not be 0 here!
+	clz	D_0, B_0
+	add	D_1, D_0, #1
+	rsbs	D_0, D_0, #31
+	bpl	L_1
+	clz	D_0, B_1
+	rsb	D_0, D_0, #31
+	mov	A_0, A_1, lsr D_0
+	add	D_0, D_0, #32
+L_1:
+	movpl	A_0, A_0, lsr D_0
+	orrpl	A_0, A_0, A_1, lsl D_1
+	mov	A_1, A_1, lsr D_0
+	@ Mov back C to R
+	mov	R_0, C_0
+	mov	R_1, C_1
+	ldmfd	sp!, {r4, r5, r6, r7, pc}
+#else
+	@ Note: A, B and Q, R are aliases
+	@ R = A & (B - 1)
+	and	C_0, A_0, C_0
+	and	C_1, A_1, C_1
+	@ Q = A >> log2(B)
+	@ Note: B must not be 0 here!
+	@ Count the leading zeroes in B.
+	mov	D_0, #0
+	orrs	B_0, B_0, B_0
+	@ If B is greater than 1 << 31, divide A and B by 1 << 32.
+	moveq	A_0, A_1
+	moveq	A_1, #0
+	moveq	B_0, B_1
+	@ Count the remaining leading zeroes in B.
+	movs	B_1, B_0, lsl #16
+	addeq	D_0, #16
+	moveq	B_0, B_0, lsr #16
+	tst	B_0, #0xff
+	addeq	D_0, #8
+	moveq	B_0, B_0, lsr #8
+	tst	B_0, #0xf
+	addeq	D_0, #4
+	moveq	B_0, B_0, lsr #4
+	tst	B_0, #0x3
+	addeq	D_0, #2
+	moveq	B_0, B_0, lsr #2
+	tst	B_0, #0x1
+	addeq	D_0, #1
+	@ Shift A to the right by the appropriate amount.
+	rsb	D_1, D_0, #32
+	mov	Q_0, A_0, lsr D_0
+	orr	Q_0, A_1, lsl D_1
+	mov	Q_1, A_1, lsr D_0
+	@ Move C to R
+	mov	R_0, C_0
+	mov	R_1, C_1
+	ldmfd	sp!, {r4, r5, r6, r7, pc}
+#endif
+
+L_div_by_0:
+	bl	__div0
+	@ As wrong as it could be
+	mov	Q_0, #0
+	mov	Q_1, #0
+	mov	R_0, #0
+	mov	R_1, #0
+	ldmfd	sp!, {r4, r5, r6, r7, pc}
+ENDPROC(__aeabi_uldivmod)