Eliminate do_div().
This eliminates the use of do_div() in favor of using libgcc functions. This was tested by building and booting on Google Snow (ARMv7) and Qemu (x86). printk()s which use division in vtxprintf() look good. Change-Id: Icad001d84a3c05bfbf77098f3d644816280b4a4d Signed-off-by: Gabe Black <gabeblack@chromium.org> Signed-off-by: David Hendricks <dhendrix@chromium.org> Reviewed-on: http://review.coreboot.org/2606 Tested-by: build bot (Jenkins) Reviewed-by: Paul Menzel <paulepanter@users.sourceforge.net> Reviewed-by: Ronald G. Minnich <rminnich@gmail.com>
This commit is contained in:
parent
31c5e07a04
commit
ae0e8d3613
|
@ -124,7 +124,7 @@ endif
|
||||||
$(objgenerated)/coreboot_ram.o: $(stages_o) $$(ramstage-objs) $(LIBGCC_FILE_NAME)
|
$(objgenerated)/coreboot_ram.o: $(stages_o) $$(ramstage-objs) $(LIBGCC_FILE_NAME)
|
||||||
@printf " CC $(subst $(obj)/,,$(@))\n"
|
@printf " CC $(subst $(obj)/,,$(@))\n"
|
||||||
ifeq ($(CONFIG_COMPILER_LLVM_CLANG),y)
|
ifeq ($(CONFIG_COMPILER_LLVM_CLANG),y)
|
||||||
$(LD) -m -m armelf_linux_eabi -r -o $@ --wrap __divdi3 --wrap __udivdi3 --wrap __moddi3 --wrap __umoddi3 --wrap __uidiv --wrap __do_div64 --start-group $(ramstage-objs) $(LIBGCC_FILE_NAME) --end-group
|
$(LD) -m -m armelf_linux_eabi -r -o $@ --wrap __divdi3 --wrap __udivdi3 --wrap __moddi3 --wrap __umoddi3 --wrap __uidiv --start-group $(ramstage-objs) $(LIBGCC_FILE_NAME) --end-group
|
||||||
else
|
else
|
||||||
$(CC) $(CFLAGS) -nostdlib -r -o $@ -Wl,--start-group $(stages_o) $(ramstage-objs) $(LIBGCC_FILE_NAME) -Wl,--end-group
|
$(CC) $(CFLAGS) -nostdlib -r -o $@ -Wl,--start-group $(stages_o) $(ramstage-objs) $(LIBGCC_FILE_NAME) -Wl,--end-group
|
||||||
endif
|
endif
|
||||||
|
|
|
@ -1,233 +0,0 @@
|
||||||
/* taken from linux 2.6.31.14 */
|
|
||||||
|
|
||||||
#ifndef __ASM_ARM_DIV64
|
|
||||||
#define __ASM_ARM_DIV64
|
|
||||||
|
|
||||||
//#include <asm/system.h>
|
|
||||||
//#include <linux/types.h>
|
|
||||||
// FIXME
|
|
||||||
|
|
||||||
#define __asmeq(x, y) ".ifnc " x "," y " ; .err ; .endif\n\t"
|
|
||||||
#define __LINUX_ARM_ARCH__ 7
|
|
||||||
|
|
||||||
/*
|
|
||||||
* The semantics of do_div() are:
|
|
||||||
*
|
|
||||||
* uint32_t do_div(uint64_t *n, uint32_t base)
|
|
||||||
* {
|
|
||||||
* uint32_t remainder = *n % base;
|
|
||||||
* *n = *n / base;
|
|
||||||
* return remainder;
|
|
||||||
* }
|
|
||||||
*
|
|
||||||
* In other words, a 64-bit dividend with a 32-bit divisor producing
|
|
||||||
* a 64-bit result and a 32-bit remainder. To accomplish this optimally
|
|
||||||
* we call a special __do_div64 helper with completely non standard
|
|
||||||
* calling convention for arguments and results (beware).
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifdef __ARMEB__
|
|
||||||
#define __xh "r0"
|
|
||||||
#define __xl "r1"
|
|
||||||
#else
|
|
||||||
#define __xl "r0"
|
|
||||||
#define __xh "r1"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define __do_div_asm(n, base) \
|
|
||||||
({ \
|
|
||||||
register unsigned int __base asm("r4") = base; \
|
|
||||||
register unsigned long long __n asm("r0") = n; \
|
|
||||||
register unsigned long long __res asm("r2"); \
|
|
||||||
register unsigned int __rem asm(__xh); \
|
|
||||||
asm( __asmeq("%0", __xh) \
|
|
||||||
__asmeq("%1", "r2") \
|
|
||||||
__asmeq("%2", "r0") \
|
|
||||||
__asmeq("%3", "r4") \
|
|
||||||
"bl __do_div64" \
|
|
||||||
: "=r" (__rem), "=r" (__res) \
|
|
||||||
: "r" (__n), "r" (__base) \
|
|
||||||
: "ip", "lr", "cc"); \
|
|
||||||
n = __res; \
|
|
||||||
__rem; \
|
|
||||||
})
|
|
||||||
|
|
||||||
#if __GNUC__ < 4
|
|
||||||
|
|
||||||
/*
|
|
||||||
* gcc versions earlier than 4.0 are simply too problematic for the
|
|
||||||
* optimized implementation below. First there is gcc PR 15089 that
|
|
||||||
* tend to trig on more complex constructs, spurious .global __udivsi3
|
|
||||||
* are inserted even if none of those symbols are referenced in the
|
|
||||||
* generated code, and those gcc versions are not able to do constant
|
|
||||||
* propagation on long long values anyway.
|
|
||||||
*/
|
|
||||||
#define do_div(n, base) __do_div_asm(n, base)
|
|
||||||
|
|
||||||
#elif __GNUC__ >= 4
|
|
||||||
|
|
||||||
//#include <asm/bug.h>
|
|
||||||
|
|
||||||
/*
|
|
||||||
* If the divisor happens to be constant, we determine the appropriate
|
|
||||||
* inverse at compile time to turn the division into a few inline
|
|
||||||
* multiplications instead which is much faster. And yet only if compiling
|
|
||||||
* for ARMv4 or higher (we need umull/umlal) and if the gcc version is
|
|
||||||
* sufficiently recent to perform proper long long constant propagation.
|
|
||||||
* (It is unfortunate that gcc doesn't perform all this internally.)
|
|
||||||
*/
|
|
||||||
#define do_div(n, base) \
|
|
||||||
({ \
|
|
||||||
unsigned int __r, __b = (base); \
|
|
||||||
if (!__builtin_constant_p(__b) || __b == 0 || \
|
|
||||||
(__LINUX_ARM_ARCH__ < 4 && (__b & (__b - 1)) != 0)) { \
|
|
||||||
/* non-constant divisor (or zero): slow path */ \
|
|
||||||
__r = __do_div_asm(n, __b); \
|
|
||||||
} else if ((__b & (__b - 1)) == 0) { \
|
|
||||||
/* Trivial: __b is constant and a power of 2 */ \
|
|
||||||
/* gcc does the right thing with this code. */ \
|
|
||||||
__r = n; \
|
|
||||||
__r &= (__b - 1); \
|
|
||||||
n /= __b; \
|
|
||||||
} else { \
|
|
||||||
/* Multiply by inverse of __b: n/b = n*(p/b)/p */ \
|
|
||||||
/* We rely on the fact that most of this code gets */ \
|
|
||||||
/* optimized away at compile time due to constant */ \
|
|
||||||
/* propagation and only a couple inline assembly */ \
|
|
||||||
/* instructions should remain. Better avoid any */ \
|
|
||||||
/* code construct that might prevent that. */ \
|
|
||||||
unsigned long long __res, __x, __t, __m, __n = n; \
|
|
||||||
unsigned int __c, __p, __z = 0; \
|
|
||||||
/* preserve low part of n for reminder computation */ \
|
|
||||||
__r = __n; \
|
|
||||||
/* determine number of bits to represent __b */ \
|
|
||||||
__p = 1 << __div64_fls(__b); \
|
|
||||||
/* compute __m = ((__p << 64) + __b - 1) / __b */ \
|
|
||||||
__m = (~0ULL / __b) * __p; \
|
|
||||||
__m += (((~0ULL % __b + 1) * __p) + __b - 1) / __b; \
|
|
||||||
/* compute __res = __m*(~0ULL/__b*__b-1)/(__p << 64) */ \
|
|
||||||
__x = ~0ULL / __b * __b - 1; \
|
|
||||||
__res = (__m & 0xffffffff) * (__x & 0xffffffff); \
|
|
||||||
__res >>= 32; \
|
|
||||||
__res += (__m & 0xffffffff) * (__x >> 32); \
|
|
||||||
__t = __res; \
|
|
||||||
__res += (__x & 0xffffffff) * (__m >> 32); \
|
|
||||||
__t = (__res < __t) ? (1ULL << 32) : 0; \
|
|
||||||
__res = (__res >> 32) + __t; \
|
|
||||||
__res += (__m >> 32) * (__x >> 32); \
|
|
||||||
__res /= __p; \
|
|
||||||
/* Now sanitize and optimize what we've got. */ \
|
|
||||||
if (~0ULL % (__b / (__b & -__b)) == 0) { \
|
|
||||||
/* those cases can be simplified with: */ \
|
|
||||||
__n /= (__b & -__b); \
|
|
||||||
__m = ~0ULL / (__b / (__b & -__b)); \
|
|
||||||
__p = 1; \
|
|
||||||
__c = 1; \
|
|
||||||
} else if (__res != __x / __b) { \
|
|
||||||
/* We can't get away without a correction */ \
|
|
||||||
/* to compensate for bit truncation errors. */ \
|
|
||||||
/* To avoid it we'd need an additional bit */ \
|
|
||||||
/* to represent __m which would overflow it. */ \
|
|
||||||
/* Instead we do m=p/b and n/b=(n*m+m)/p. */ \
|
|
||||||
__c = 1; \
|
|
||||||
/* Compute __m = (__p << 64) / __b */ \
|
|
||||||
__m = (~0ULL / __b) * __p; \
|
|
||||||
__m += ((~0ULL % __b + 1) * __p) / __b; \
|
|
||||||
} else { \
|
|
||||||
/* Reduce __m/__p, and try to clear bit 31 */ \
|
|
||||||
/* of __m when possible otherwise that'll */ \
|
|
||||||
/* need extra overflow handling later. */ \
|
|
||||||
unsigned int __bits = -(__m & -__m); \
|
|
||||||
__bits |= __m >> 32; \
|
|
||||||
__bits = (~__bits) << 1; \
|
|
||||||
/* If __bits == 0 then setting bit 31 is */ \
|
|
||||||
/* unavoidable. Simply apply the maximum */ \
|
|
||||||
/* possible reduction in that case. */ \
|
|
||||||
/* Otherwise the MSB of __bits indicates the */ \
|
|
||||||
/* best reduction we should apply. */ \
|
|
||||||
if (!__bits) { \
|
|
||||||
__p /= (__m & -__m); \
|
|
||||||
__m /= (__m & -__m); \
|
|
||||||
} else { \
|
|
||||||
__p >>= __div64_fls(__bits); \
|
|
||||||
__m >>= __div64_fls(__bits); \
|
|
||||||
} \
|
|
||||||
/* No correction needed. */ \
|
|
||||||
__c = 0; \
|
|
||||||
} \
|
|
||||||
/* Now we have a combination of 2 conditions: */ \
|
|
||||||
/* 1) whether or not we need a correction (__c), and */ \
|
|
||||||
/* 2) whether or not there might be an overflow in */ \
|
|
||||||
/* the cross product (__m & ((1<<63) | (1<<31))) */ \
|
|
||||||
/* Select the best insn combination to perform the */ \
|
|
||||||
/* actual __m * __n / (__p << 64) operation. */ \
|
|
||||||
if (!__c) { \
|
|
||||||
asm ( "umull %Q0, %R0, %1, %Q2\n\t" \
|
|
||||||
"mov %Q0, #0" \
|
|
||||||
: "=&r" (__res) \
|
|
||||||
: "r" (__m), "r" (__n) \
|
|
||||||
: "cc" ); \
|
|
||||||
} else if (!(__m & ((1ULL << 63) | (1ULL << 31)))) { \
|
|
||||||
__res = __m; \
|
|
||||||
asm ( "umlal %Q0, %R0, %Q1, %Q2\n\t" \
|
|
||||||
"mov %Q0, #0" \
|
|
||||||
: "+&r" (__res) \
|
|
||||||
: "r" (__m), "r" (__n) \
|
|
||||||
: "cc" ); \
|
|
||||||
} else { \
|
|
||||||
asm ( "umull %Q0, %R0, %Q1, %Q2\n\t" \
|
|
||||||
"cmn %Q0, %Q1\n\t" \
|
|
||||||
"adcs %R0, %R0, %R1\n\t" \
|
|
||||||
"adc %Q0, %3, #0" \
|
|
||||||
: "=&r" (__res) \
|
|
||||||
: "r" (__m), "r" (__n), "r" (__z) \
|
|
||||||
: "cc" ); \
|
|
||||||
} \
|
|
||||||
if (!(__m & ((1ULL << 63) | (1ULL << 31)))) { \
|
|
||||||
asm ( "umlal %R0, %Q0, %R1, %Q2\n\t" \
|
|
||||||
"umlal %R0, %Q0, %Q1, %R2\n\t" \
|
|
||||||
"mov %R0, #0\n\t" \
|
|
||||||
"umlal %Q0, %R0, %R1, %R2" \
|
|
||||||
: "+&r" (__res) \
|
|
||||||
: "r" (__m), "r" (__n) \
|
|
||||||
: "cc" ); \
|
|
||||||
} else { \
|
|
||||||
asm ( "umlal %R0, %Q0, %R2, %Q3\n\t" \
|
|
||||||
"umlal %R0, %1, %Q2, %R3\n\t" \
|
|
||||||
"mov %R0, #0\n\t" \
|
|
||||||
"adds %Q0, %1, %Q0\n\t" \
|
|
||||||
"adc %R0, %R0, #0\n\t" \
|
|
||||||
"umlal %Q0, %R0, %R2, %R3" \
|
|
||||||
: "+&r" (__res), "+&r" (__z) \
|
|
||||||
: "r" (__m), "r" (__n) \
|
|
||||||
: "cc" ); \
|
|
||||||
} \
|
|
||||||
__res /= __p; \
|
|
||||||
/* The reminder can be computed with 32-bit regs */ \
|
|
||||||
/* only, and gcc is good at that. */ \
|
|
||||||
{ \
|
|
||||||
unsigned int __res0 = __res; \
|
|
||||||
unsigned int __b0 = __b; \
|
|
||||||
__r -= __res0 * __b0; \
|
|
||||||
} \
|
|
||||||
/* BUG_ON(__r >= __b || __res * __b + __r != n); */ \
|
|
||||||
n = __res; \
|
|
||||||
} \
|
|
||||||
__r; \
|
|
||||||
})
|
|
||||||
|
|
||||||
/* our own fls implementation to make sure constant propagation is fine */
|
|
||||||
#define __div64_fls(bits) \
|
|
||||||
({ \
|
|
||||||
unsigned int __left = (bits), __nr = 0; \
|
|
||||||
if (__left & 0xffff0000) __nr += 16, __left >>= 16; \
|
|
||||||
if (__left & 0x0000ff00) __nr += 8, __left >>= 8; \
|
|
||||||
if (__left & 0x000000f0) __nr += 4, __left >>= 4; \
|
|
||||||
if (__left & 0x0000000c) __nr += 2, __left >>= 2; \
|
|
||||||
if (__left & 0x00000002) __nr += 1; \
|
|
||||||
__nr; \
|
|
||||||
})
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif
|
|
|
@ -6,12 +6,10 @@ bootblock-y += cache-cp15.c
|
||||||
romstage-y += cache_v7.c
|
romstage-y += cache_v7.c
|
||||||
romstage-y += cache-cp15.c
|
romstage-y += cache-cp15.c
|
||||||
romstage-y += div0.c
|
romstage-y += div0.c
|
||||||
romstage-y += div64.S
|
|
||||||
romstage-y += syslib.c
|
romstage-y += syslib.c
|
||||||
romstage-$(CONFIG_EARLY_CONSOLE) += early_console.c
|
romstage-$(CONFIG_EARLY_CONSOLE) += early_console.c
|
||||||
|
|
||||||
ramstage-y += div0.c
|
ramstage-y += div0.c
|
||||||
ramstage-y += div64.S
|
|
||||||
#ramstage-y += interrupts.c
|
#ramstage-y += interrupts.c
|
||||||
#ramstage-y += memcpy.S
|
#ramstage-y += memcpy.S
|
||||||
#ramstage-y += memset.S
|
#ramstage-y += memset.S
|
||||||
|
|
|
@ -1,208 +0,0 @@
|
||||||
/*
|
|
||||||
* linux/arch/arm/lib/div64.S
|
|
||||||
*
|
|
||||||
* Optimized computation of 64-bit dividend / 32-bit divisor
|
|
||||||
*
|
|
||||||
* Author: Nicolas Pitre
|
|
||||||
* Created: Oct 5, 2003
|
|
||||||
* Copyright: Monta Vista Software, Inc.
|
|
||||||
*
|
|
||||||
* This program is free software; you can redistribute it and/or modify
|
|
||||||
* it under the terms of the GNU General Public License version 2 as
|
|
||||||
* published by the Free Software Foundation.
|
|
||||||
*/
|
|
||||||
|
|
||||||
// FIXME
|
|
||||||
//#include <linux/linkage.h>
|
|
||||||
#define __LINUX_ARM_ARCH__ 7
|
|
||||||
|
|
||||||
#ifdef __ARMEB__
|
|
||||||
#define xh r0
|
|
||||||
#define xl r1
|
|
||||||
#define yh r2
|
|
||||||
#define yl r3
|
|
||||||
#else
|
|
||||||
#define xl r0
|
|
||||||
#define xh r1
|
|
||||||
#define yl r2
|
|
||||||
#define yh r3
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/*
|
|
||||||
* __do_div64: perform a division with 64-bit dividend and 32-bit divisor.
|
|
||||||
*
|
|
||||||
* Note: Calling convention is totally non standard for optimal code.
|
|
||||||
* This is meant to be used by do_div() from include/asm/div64.h only.
|
|
||||||
*
|
|
||||||
* Input parameters:
|
|
||||||
* xh-xl = dividend (clobbered)
|
|
||||||
* r4 = divisor (preserved)
|
|
||||||
*
|
|
||||||
* Output values:
|
|
||||||
* yh-yl = result
|
|
||||||
* xh = remainder
|
|
||||||
*
|
|
||||||
* Clobbered regs: xl, ip
|
|
||||||
*/
|
|
||||||
|
|
||||||
|
|
||||||
.globl __do_div64;
|
|
||||||
.align 4,0x90
|
|
||||||
__do_div64:
|
|
||||||
|
|
||||||
@ Test for easy paths first.
|
|
||||||
subs ip, r4, #1
|
|
||||||
bls 9f @ divisor is 0 or 1
|
|
||||||
tst ip, r4
|
|
||||||
beq 8f @ divisor is power of 2
|
|
||||||
|
|
||||||
@ See if we need to handle upper 32-bit result.
|
|
||||||
cmp xh, r4
|
|
||||||
mov yh, #0
|
|
||||||
blo 3f
|
|
||||||
|
|
||||||
@ Align divisor with upper part of dividend.
|
|
||||||
@ The aligned divisor is stored in yl preserving the original.
|
|
||||||
@ The bit position is stored in ip.
|
|
||||||
|
|
||||||
#if __LINUX_ARM_ARCH__ >= 5
|
|
||||||
|
|
||||||
clz yl, r4
|
|
||||||
clz ip, xh
|
|
||||||
sub yl, yl, ip
|
|
||||||
mov ip, #1
|
|
||||||
mov ip, ip, lsl yl
|
|
||||||
mov yl, r4, lsl yl
|
|
||||||
|
|
||||||
#else
|
|
||||||
|
|
||||||
mov yl, r4
|
|
||||||
mov ip, #1
|
|
||||||
1: cmp yl, #0x80000000
|
|
||||||
cmpcc yl, xh
|
|
||||||
movcc yl, yl, lsl #1
|
|
||||||
movcc ip, ip, lsl #1
|
|
||||||
bcc 1b
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
@ The division loop for needed upper bit positions.
|
|
||||||
@ Break out early if dividend reaches 0.
|
|
||||||
2: cmp xh, yl
|
|
||||||
orrcs yh, yh, ip
|
|
||||||
subcss xh, xh, yl
|
|
||||||
movnes ip, ip, lsr #1
|
|
||||||
mov yl, yl, lsr #1
|
|
||||||
bne 2b
|
|
||||||
|
|
||||||
@ See if we need to handle lower 32-bit result.
|
|
||||||
3: cmp xh, #0
|
|
||||||
mov yl, #0
|
|
||||||
cmpeq xl, r4
|
|
||||||
movlo xh, xl
|
|
||||||
movlo pc, lr
|
|
||||||
|
|
||||||
@ The division loop for lower bit positions.
|
|
||||||
@ Here we shift remainer bits leftwards rather than moving the
|
|
||||||
@ divisor for comparisons, considering the carry-out bit as well.
|
|
||||||
mov ip, #0x80000000
|
|
||||||
4: movs xl, xl, lsl #1
|
|
||||||
adcs xh, xh, xh
|
|
||||||
beq 6f
|
|
||||||
cmpcc xh, r4
|
|
||||||
5: orrcs yl, yl, ip
|
|
||||||
subcs xh, xh, r4
|
|
||||||
movs ip, ip, lsr #1
|
|
||||||
bne 4b
|
|
||||||
mov pc, lr
|
|
||||||
|
|
||||||
@ The top part of remainder became zero. If carry is set
|
|
||||||
@ (the 33th bit) this is a false positive so resume the loop.
|
|
||||||
@ Otherwise, if lower part is also null then we are done.
|
|
||||||
6: bcs 5b
|
|
||||||
cmp xl, #0
|
|
||||||
moveq pc, lr
|
|
||||||
|
|
||||||
@ We still have remainer bits in the low part. Bring them up.
|
|
||||||
|
|
||||||
#if __LINUX_ARM_ARCH__ >= 5
|
|
||||||
|
|
||||||
clz xh, xl @ we know xh is zero here so...
|
|
||||||
add xh, xh, #1
|
|
||||||
mov xl, xl, lsl xh
|
|
||||||
mov ip, ip, lsr xh
|
|
||||||
|
|
||||||
#else
|
|
||||||
|
|
||||||
7: movs xl, xl, lsl #1
|
|
||||||
mov ip, ip, lsr #1
|
|
||||||
bcc 7b
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
@ Current remainder is now 1. It is worthless to compare with
|
|
||||||
@ divisor at this point since divisor can not be smaller than 3 here.
|
|
||||||
@ If possible, branch for another shift in the division loop.
|
|
||||||
@ If no bit position left then we are done.
|
|
||||||
movs ip, ip, lsr #1
|
|
||||||
mov xh, #1
|
|
||||||
bne 4b
|
|
||||||
mov pc, lr
|
|
||||||
|
|
||||||
8: @ Division by a power of 2: determine what that divisor order is
|
|
||||||
@ then simply shift values around
|
|
||||||
|
|
||||||
#if __LINUX_ARM_ARCH__ >= 5
|
|
||||||
|
|
||||||
clz ip, r4
|
|
||||||
rsb ip, ip, #31
|
|
||||||
|
|
||||||
#else
|
|
||||||
|
|
||||||
mov yl, r4
|
|
||||||
cmp r4, #(1 << 16)
|
|
||||||
mov ip, #0
|
|
||||||
movhs yl, yl, lsr #16
|
|
||||||
movhs ip, #16
|
|
||||||
|
|
||||||
cmp yl, #(1 << 8)
|
|
||||||
movhs yl, yl, lsr #8
|
|
||||||
addhs ip, ip, #8
|
|
||||||
|
|
||||||
cmp yl, #(1 << 4)
|
|
||||||
movhs yl, yl, lsr #4
|
|
||||||
addhs ip, ip, #4
|
|
||||||
|
|
||||||
cmp yl, #(1 << 2)
|
|
||||||
addhi ip, ip, #3
|
|
||||||
addls ip, ip, yl, lsr #1
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
mov yh, xh, lsr ip
|
|
||||||
mov yl, xl, lsr ip
|
|
||||||
rsb ip, ip, #32
|
|
||||||
orr yl, yl, xh, lsl ip
|
|
||||||
mov xh, xl, lsl ip
|
|
||||||
mov xh, xh, lsr ip
|
|
||||||
mov pc, lr
|
|
||||||
|
|
||||||
@ eq -> division by 1: obvious enough...
|
|
||||||
9: moveq yl, xl
|
|
||||||
moveq yh, xh
|
|
||||||
moveq xh, #0
|
|
||||||
moveq pc, lr
|
|
||||||
|
|
||||||
@ Division by 0:
|
|
||||||
str lr, [sp, #-8]!
|
|
||||||
bl __div0
|
|
||||||
|
|
||||||
@ as wrong as it could be...
|
|
||||||
mov yl, #0
|
|
||||||
mov yh, #0
|
|
||||||
mov xh, #0
|
|
||||||
ldr pc, [sp], #8
|
|
||||||
|
|
||||||
@.type __do_div64, @function;
|
|
||||||
@.size __do_div64, .-__do_div64,
|
|
||||||
|
|
|
@ -354,9 +354,9 @@ endif
|
||||||
$(objcbfs)/romstage_null.debug: $$(romstage-objs) $(objgenerated)/romstage_null.ld
|
$(objcbfs)/romstage_null.debug: $$(romstage-objs) $(objgenerated)/romstage_null.ld
|
||||||
@printf " LINK $(subst $(obj)/,,$(@))\n"
|
@printf " LINK $(subst $(obj)/,,$(@))\n"
|
||||||
ifeq ($(CONFIG_COMPILER_LLVM_CLANG),y)
|
ifeq ($(CONFIG_COMPILER_LLVM_CLANG),y)
|
||||||
$(LD) -nostdlib -nostartfiles -static -o $@ -L$(obj) $(romstage-objs) -T $(objgenerated)/romstage_null.ld
|
$(LD) -nostdlib -nostartfiles -static -o $@ -L$(obj) --wrap __divdi3 --wrap __udivdi3 --wrap __moddi3 --wrap __umoddi3 --start-group $(romstage-objs) $(LIBGCC_FILE_NAME) --end-group -T $(objgenerated)/romstage_null.ld
|
||||||
else
|
else
|
||||||
$(CC) -nostdlib -nostartfiles -static -o $@ -L$(obj) -T $(objgenerated)/romstage_null.ld $(romstage-objs)
|
$(CC) -nostdlib -nostartfiles -static -o $@ -L$(obj) -T $(objgenerated)/romstage_null.ld -Wl,--wrap,__divdi3 -Wl,--wrap,__udivdi3 -Wl,--wrap,__moddi3 -Wl,--wrap,__umoddi3 -Wl,--start-group $(romstage-objs) $(LIBGCC_FILE_NAME) -Wl,--end-group
|
||||||
endif
|
endif
|
||||||
$(NM) $@ | grep -q " [DdBb] "; if [ $$? -eq 0 ]; then \
|
$(NM) $@ | grep -q " [DdBb] "; if [ $$? -eq 0 ]; then \
|
||||||
echo "Forbidden global variables in romstage:"; \
|
echo "Forbidden global variables in romstage:"; \
|
||||||
|
@ -366,9 +366,9 @@ endif
|
||||||
$(objcbfs)/romstage_xip.debug: $$(romstage-objs) $(objgenerated)/romstage_xip.ld
|
$(objcbfs)/romstage_xip.debug: $$(romstage-objs) $(objgenerated)/romstage_xip.ld
|
||||||
@printf " LINK $(subst $(obj)/,,$(@))\n"
|
@printf " LINK $(subst $(obj)/,,$(@))\n"
|
||||||
ifeq ($(CONFIG_COMPILER_LLVM_CLANG),y)
|
ifeq ($(CONFIG_COMPILER_LLVM_CLANG),y)
|
||||||
$(LD) -nostdlib -nostartfiles -static -o $@ -L$(obj) $(romstage-objs) -T $(objgenerated)/romstage_xip.ld
|
$(LD) -nostdlib -nostartfiles -static -o $@ -L$(obj) --wrap __divdi3 --wrap __udivdi3 --wrap __moddi3 --wrap __umoddi3 --start-group $(romstage-objs) $(LIBGCC_FILE_NAME) --end-group -T $(objgenerated)/romstage_xip.ld
|
||||||
else
|
else
|
||||||
$(CC) -nostdlib -nostartfiles -static -o $@ -L$(obj) -T $(objgenerated)/romstage_xip.ld $(romstage-objs)
|
$(CC) -nostdlib -nostartfiles -static -o $@ -L$(obj) -T $(objgenerated)/romstage_xip.ld -Wl,--wrap,__divdi3 -Wl,--wrap,__udivdi3 -Wl,--wrap,__moddi3 -Wl,--wrap,__umoddi3 -Wl,--start-group $(romstage-objs) $(LIBGCC_FILE_NAME) -Wl,--end-group
|
||||||
endif
|
endif
|
||||||
|
|
||||||
$(objgenerated)/romstage_null.ld: $$(ldscripts) $(obj)/ldoptions
|
$(objgenerated)/romstage_null.ld: $$(ldscripts) $(obj)/ldoptions
|
||||||
|
|
|
@ -1,30 +0,0 @@
|
||||||
#ifndef __I386_DIV64
|
|
||||||
#define __I386_DIV64
|
|
||||||
|
|
||||||
/*
|
|
||||||
* do_div() is NOT a C function. It wants to return
|
|
||||||
* two values (the quotient and the remainder), but
|
|
||||||
* since that doesn't work very well in C, what it
|
|
||||||
* does is:
|
|
||||||
*
|
|
||||||
* - modifies the 64-bit dividend _in_place_
|
|
||||||
* - returns the 32-bit remainder
|
|
||||||
*
|
|
||||||
* This ends up being the most efficient "calling
|
|
||||||
* convention" on x86.
|
|
||||||
*/
|
|
||||||
#define do_div(n,base) ({ \
|
|
||||||
unsigned long __upper, __low, __high, __mod, __base; \
|
|
||||||
__base = (base); \
|
|
||||||
asm("":"=a" (__low), "=d" (__high):"A" (n)); \
|
|
||||||
__upper = __high; \
|
|
||||||
if (__high) { \
|
|
||||||
__upper = __high % (__base); \
|
|
||||||
__high = __high / (__base); \
|
|
||||||
} \
|
|
||||||
asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (__base), "0" (__low), "1" (__upper)); \
|
|
||||||
asm("":"=A" (n):"a" (__low),"d" (__high)); \
|
|
||||||
__mod; \
|
|
||||||
})
|
|
||||||
|
|
||||||
#endif
|
|
|
@ -5,7 +5,6 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <div64.h>
|
|
||||||
#include <console/console.h>
|
#include <console/console.h>
|
||||||
#include <console/vtxprintf.h>
|
#include <console/vtxprintf.h>
|
||||||
|
|
||||||
|
@ -70,20 +69,8 @@ static int number(void (*tx_byte)(unsigned char byte),
|
||||||
if (num == 0)
|
if (num == 0)
|
||||||
tmp[i++]='0';
|
tmp[i++]='0';
|
||||||
else while (num != 0){
|
else while (num != 0){
|
||||||
/* there are some nice optimizations in the
|
tmp[i++] = digits[num % base];
|
||||||
* Macros-From-Hell that form the div64 code
|
num /= base;
|
||||||
* *IF* you call it with a constant.
|
|
||||||
* We're firmware, we only do bases
|
|
||||||
* 8, 10, and 16. Let's be smart.
|
|
||||||
* This greatly helps ARM, reduces the
|
|
||||||
* code footprint at compile time, and does not hurt x86.
|
|
||||||
*/
|
|
||||||
if (base == 10)
|
|
||||||
tmp[i++] = digits[do_div(num,10)];
|
|
||||||
else if (base == 8)
|
|
||||||
tmp[i++] = digits[do_div(num,8)];
|
|
||||||
else /* sorry, you're out of choices */
|
|
||||||
tmp[i++] = digits[do_div(num,16)];
|
|
||||||
}
|
}
|
||||||
if (i > precision)
|
if (i > precision)
|
||||||
precision = i;
|
precision = i;
|
||||||
|
|
|
@ -36,8 +36,8 @@ endif
|
||||||
smm-y += smihandler.c
|
smm-y += smihandler.c
|
||||||
smm-y += smiutil.c
|
smm-y += smiutil.c
|
||||||
|
|
||||||
$(obj)/cpu/x86/smm/smm.o: $$(smm-objs)
|
$(obj)/cpu/x86/smm/smm.o: $$(smm-objs) $(LIBGCC_FILE_NAME)
|
||||||
$(CC) $(LDFLAGS) -nostdlib -r -o $@ $^
|
$(CC) $(LDFLAGS) -nostdlib -r -o $@ -Wl,--wrap,__divdi3 -Wl,--wrap,__udivdi3 -Wl,--wrap,__moddi3 -Wl,--wrap,__umoddi3 -Wl,--start-group $(smm-objs) $(LIBGCC_FILE_NAME) -Wl,--end-group
|
||||||
|
|
||||||
$(obj)/cpu/x86/smm/smm_wrap: $(obj)/cpu/x86/smm/smm.o $(src)/cpu/x86/smm/$(SMM_LDSCRIPT) $(obj)/ldoptions
|
$(obj)/cpu/x86/smm/smm_wrap: $(obj)/cpu/x86/smm/smm.o $(src)/cpu/x86/smm/$(SMM_LDSCRIPT) $(obj)/ldoptions
|
||||||
$(CC) $(SMM_LDFLAGS) -nostdlib -nostartfiles -static -o $(obj)/cpu/x86/smm/smm.elf -T $(src)/cpu/x86/smm/$(SMM_LDSCRIPT) $(obj)/cpu/x86/smm/smm.o
|
$(CC) $(SMM_LDFLAGS) -nostdlib -nostartfiles -static -o $(obj)/cpu/x86/smm/smm.elf -T $(src)/cpu/x86/smm/$(SMM_LDSCRIPT) $(obj)/cpu/x86/smm/smm.o
|
||||||
|
|
|
@ -48,6 +48,7 @@ romstage-$(CONFIG_USBDEBUG) += usbdebug.c
|
||||||
romstage-$(CONFIG_COLLECT_TIMESTAMPS) += timestamp.c cbmem.c
|
romstage-$(CONFIG_COLLECT_TIMESTAMPS) += timestamp.c cbmem.c
|
||||||
romstage-y += compute_ip_checksum.c
|
romstage-y += compute_ip_checksum.c
|
||||||
romstage-y += memmove.c
|
romstage-y += memmove.c
|
||||||
|
romstage-$(CONFIG_ARCH_X86) += gcc.c
|
||||||
|
|
||||||
ramstage-y += hardwaremain.c
|
ramstage-y += hardwaremain.c
|
||||||
ramstage-y += selfboot.c
|
ramstage-y += selfboot.c
|
||||||
|
@ -94,6 +95,7 @@ smm-y += cbfs.c memcmp.c
|
||||||
smm-$(CONFIG_CONSOLE_SERIAL8250) += uart8250.c
|
smm-$(CONFIG_CONSOLE_SERIAL8250) += uart8250.c
|
||||||
smm-$(CONFIG_CONSOLE_SERIAL8250MEM) += uart8250mem.c
|
smm-$(CONFIG_CONSOLE_SERIAL8250MEM) += uart8250mem.c
|
||||||
smm-$(CONFIG_USBDEBUG) += usbdebug.c
|
smm-$(CONFIG_USBDEBUG) += usbdebug.c
|
||||||
|
smm-y += gcc.c
|
||||||
|
|
||||||
$(obj)/lib/version.ramstage.o : $(obj)/build.h
|
$(obj)/lib/version.ramstage.o : $(obj)/build.h
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue