From 66a476ad5f29553ad7c46e58eb35faa7a059a5af Mon Sep 17 00:00:00 2001 From: Julius Werner Date: Mon, 12 Oct 2015 16:45:21 -0700 Subject: [PATCH] arm64: Implement generic stage transitions for non-Tegra SoCs The existing arm64 architecture code has been developed for the Tegra132 and Tegra210 SoCs, which only start their ARM64 cores in ramstage. It interweaves the stage entry point with code that initializes a CPU (and should not be run again if that CPU already ran a previous stage). It also still contains some vestiges of SMP/secmon support (such as setting up stacks in the BSS instead of using the stage-peristent one from memlayout). This patch splits those functions apart and makes the code layout similar to how things work on ARM32. The default stage_entry() symbol is a no-op wrapper that just calls main() for the current stage, for the normal case where a stage ran on the same core as the last one. It can be overridden by SoC code to support special cases like Tegra. The CPU initialization code is split out into armv8/cpu.S (similar to what arm_init_caches() does for ARM32) and called by the default bootblock entry code. SoCs where a CPU starts up in a later stage can call the same code from a stage_entry() override instead. The Tegra132 and Tegra210 code is not touched by this patch to make it easier to review and validate. A follow-up patch will bring those SoCs in line with the model. BRANCH=None BUG=None TEST=Booted Oak with a single mmu_init()/mmu_enable(). Built Ryu and Smaug. Change-Id: I28302a6ace47e8ab7a736e089f64922cef1a2f93 Signed-off-by: Julius Werner Reviewed-on: http://review.coreboot.org/12077 Tested-by: build bot (Jenkins) Reviewed-by: Aaron Durbin --- src/Kconfig | 4 +- src/arch/arm64/Makefile.inc | 22 ++----- src/arch/arm64/armv8/Makefile.inc | 3 + src/arch/arm64/armv8/bootblock.S | 36 ++++++----- src/arch/arm64/armv8/cpu.S | 62 ++++++++----------- src/arch/arm64/armv8/exception.c | 20 +++--- src/arch/arm64/boot.c | 8 +++ src/arch/arm64/include/arch/header.ld | 6 +- src/arch/arm64/include/arch/memlayout.h | 8 +-- src/arch/arm64/include/armv8/arch/exception.h | 4 +- src/arch/arm64/stages.c | 44 ------------- src/arch/arm64/transition_asm.S | 13 +++- src/soc/nvidia/tegra132/Kconfig | 5 ++ src/soc/nvidia/tegra210/Kconfig | 5 ++ 14 files changed, 102 insertions(+), 138 deletions(-) delete mode 100644 src/arch/arm64/stages.c diff --git a/src/Kconfig b/src/Kconfig index 35628877f7..98d553a70b 100644 --- a/src/Kconfig +++ b/src/Kconfig @@ -437,8 +437,8 @@ config HEAP_SIZE config STACK_SIZE hex - default 0x0 if (ARCH_RAMSTAGE_ARM || ARCH_RAMSTAGE_MIPS || ARCH_RAMSTAGE_RISCV) - default 0x1000 + default 0x1000 if ARCH_X86 + default 0x0 config MAX_CPUS int diff --git a/src/arch/arm64/Makefile.inc b/src/arch/arm64/Makefile.inc index 8bcad75867..21fda04bda 100644 --- a/src/arch/arm64/Makefile.inc +++ b/src/arch/arm64/Makefile.inc @@ -33,16 +33,6 @@ ifeq ($(CONFIG_ARCH_ROMSTAGE_ARM64),y) CBFSTOOL_PRE1_OPTS = -m arm64 -s $(CONFIG_CBFS_SIZE) endif -ifeq ($(CONFIG_ARCH_ARM64),y) -stages_c = $(src)/arch/arm64/stages.c -stages_o = $(obj)/arch/arm64/stages.o - -$(stages_o): $(stages_c) $(obj)/config.h - @printf " CC $(subst $(obj)/,,$(@))\n" - $(CC_arm) -I. $(CPPFLAGS_arm) -c -o $@ $< -marm - -endif - ################################################################################ # bootblock ################################################################################ @@ -55,9 +45,7 @@ $(obj)/arch/arm64/id.bootblock.o: $(obj)/build.h bootblock-y += boot.c bootblock-y += c_entry.c -bootblock-y += stage_entry.S bootblock-y += cpu-stubs.c -bootblock-y += stages.c bootblock-y += eabi_compat.c bootblock-y += transition.c transition_asm.S @@ -89,7 +77,6 @@ verstage-y += eabi_compat.c verstage-y += ../../lib/memset.c verstage-y += ../../lib/memcpy.c verstage-y += ../../lib/memmove.c -verstage-y += stages.c endif # CONFIG_ARCH_VERSTAGE_ARM64 @@ -101,9 +88,7 @@ ifeq ($(CONFIG_ARCH_ROMSTAGE_ARM64),y) romstage-y += boot.c romstage-y += c_entry.c -romstage-y += stage_entry.S romstage-y += cpu-stubs.c -romstage-y += stages.c romstage-y += div0.c romstage-y += eabi_compat.c romstage-y += memset.S @@ -129,7 +114,6 @@ endif # CONFIG_ARCH_ROMSTAGE_ARM64 ifeq ($(CONFIG_ARCH_RAMSTAGE_ARM64),y) ramstage-y += c_entry.c -ramstage-y += stages.c ramstage-y += div0.c ramstage-y += eabi_compat.c ramstage-y += boot.c @@ -137,11 +121,15 @@ ramstage-y += tables.c ramstage-y += memset.S ramstage-y += memcpy.S ramstage-y += memmove.S -ramstage-y += stage_entry.S ramstage-y += cpu-stubs.c ramstage-$(CONFIG_ARM64_USE_ARM_TRUSTED_FIRMWARE) += arm_tf.c ramstage-y += transition.c transition_asm.S +# TODO: Replace this with a simpler ramstage entry point in soc/nvidia/tegra* +ifeq ($(CONFIG_SOC_NVIDIA_TEGRA132)$(CONFIG_SOC_NVIDIA_TEGRA210),y) +ramstage-y += stage_entry.S +endif + rmodules_arm64-y += memset.S rmodules_arm64-y += memcpy.S rmodules_arm64-y += memmove.S diff --git a/src/arch/arm64/armv8/Makefile.inc b/src/arch/arm64/armv8/Makefile.inc index 359a368ab9..f25a567b29 100644 --- a/src/arch/arm64/armv8/Makefile.inc +++ b/src/arch/arm64/armv8/Makefile.inc @@ -26,6 +26,9 @@ armv8_asm_flags = $(armv8_flags) ################################################################################ ifeq ($(CONFIG_ARCH_BOOTBLOCK_ARMV8_64),y) +ifneq ($(CONFIG_BOOTBLOCK_CUSTOM),y) +bootblock-y += bootblock.S +endif bootblock-y += cache.c bootblock-y += cache_helpers.S bootblock-y += cpu.S diff --git a/src/arch/arm64/armv8/bootblock.S b/src/arch/arm64/armv8/bootblock.S index a23a5f2788..4a9fea9af6 100644 --- a/src/arch/arm64/armv8/bootblock.S +++ b/src/arch/arm64/armv8/bootblock.S @@ -1,7 +1,7 @@ /* * Early initialization code for aarch64 (a.k.a. armv8) * - * Copyright 2013 Google Inc. + * Copyright 2015 Google Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as @@ -14,21 +14,23 @@ * GNU General Public License for more details. */ - .section ".id", "a", %progbits +#include - .globl __id_start -__id_start: -ver: - .asciz COREBOOT_VERSION -vendor: - .asciz CONFIG_MAINBOARD_VENDOR -part: - .asciz CONFIG_MAINBOARD_PART_NUMBER -.long __id_end - ver /* Reverse offset to the vendor id */ -.long __id_end - vendor /* Reverse offset to the vendor id */ -.long __id_end - part /* Reverse offset to the part number */ -.long CONFIG_ROM_SIZE /* Size of this romimage */ - .globl __id_end +ENTRY(_start) + /* Initialize PSTATE, SCTLR and caches to clean state. */ + bl arm64_init_cpu -__id_end: -.previous + /* Initialize stack with sentinel value to later check overflow. */ + ldr x0, =_stack + ldr x1, =_estack + ldr x2, =0xdeadbeefdeadbeef +stack_init_loop: + stp x2, x2, [x0], #16 + cmp x0, x1 + bne stack_init_loop + + /* Leave a line of beef dead for easier visibility in stack dumps. */ + sub sp, x0, #16 + + bl main +ENDPROC(_start) diff --git a/src/arch/arm64/armv8/cpu.S b/src/arch/arm64/armv8/cpu.S index c248cb3a81..4713ca59f9 100644 --- a/src/arch/arm64/armv8/cpu.S +++ b/src/arch/arm64/armv8/cpu.S @@ -19,27 +19,40 @@ /* * Bring an ARMv8 processor we just gained control of (e.g. from IROM) into a - * known state regarding caches/SCTLR. Completely cleans and invalidates + * known state regarding caches/SCTLR/PSTATE. Completely cleans and invalidates * icache/dcache, disables MMU and dcache (if active), and enables unaligned - * accesses, icache and branch prediction (if inactive). Clobbers x4 and x5. + * accesses, icache and branch prediction (if inactive). Clobbers R22 and R23. */ -ENTRY(arm_init_caches) - /* w4: SCTLR, return address: x8 (stay valid for the whole function) */ - mov x8, x30 - /* XXX: Assume that we always start running at EL3 */ - mrs x4, sctlr_el3 +ENTRY(arm64_init_cpu) + /* Initialize PSTATE (unmask all exceptions, select SP_EL0). */ + msr SPSel, #0 + msr DAIFClr, #0xf - /* FIXME: How to enable branch prediction on ARMv8? */ + /* TODO: This is where we'd put non-boot CPUs into WFI if needed. */ + + /* x22: SCTLR, return address: x23 (callee-saved by subroutine) */ + mov x23, x30 + /* TODO: Assert that we always start running at EL3 */ + mrs x22, sctlr_el3 + + /* Activate ICache (12) already for speed during cache flush below. */ + orr x22, x22, #(1 << 12) + msr sctlr_el3, x22 + isb /* Flush and invalidate dcache */ mov x0, #DCCISW bl flush_dcache_all /* Deactivate MMU (0), Alignment Check (1) and DCache (2) */ - and x4, x4, # ~(1 << 0) & ~(1 << 1) & ~(1 << 2) - /* Activate ICache (12) already for speed */ - orr x4, x4, #(1 << 12) - msr sctlr_el3, x4 + and x22, x22, # ~(1 << 0) & ~(1 << 1) & ~(1 << 2) + /* Activate Stack Alignment (3) because why not */ + orr x22, x22, #(1 << 3) + /* Set to little-endian (25) */ + and x22, x22, # ~(1 << 25) + /* Deactivate write-xor-execute enforcement (19) */ + and x22, x22, # ~(1 << 19) + msr sctlr_el3, x22 /* Invalidate icache and TLB for good measure */ ic iallu @@ -47,26 +60,5 @@ ENTRY(arm_init_caches) dsb sy isb - ret x8 -ENDPROC(arm_init_caches) - -/* Based on u-boot transition.S */ -ENTRY(switch_el3_to_el2) - mov x0, #0x5b1 /* Non-secure EL0/EL1 | HVC | 64bit EL2 */ - msr scr_el3, x0 - msr cptr_el3, xzr /* Disable coprocessor traps to EL3 */ - mov x0, #0x33ff - msr cptr_el2, x0 /* Disable coprocessor traps to EL2 */ - - /* Return to the EL2_SP2 mode from EL3 */ - mov x0, sp - msr sp_el2, x0 /* Migrate SP */ - mrs x0, vbar_el3 - msr vbar_el2, x0 /* Migrate VBAR */ - mrs x0, sctlr_el3 - msr sctlr_el2, x0 /* Migrate SCTLR */ - mov x0, #0x3c9 - msr spsr_el3, x0 /* EL2_SP2 | D | A | I | F */ - msr elr_el3, x30 - eret -ENDPROC(switch_el3_to_el2) + ret x23 +ENDPROC(arm64_init_cpu) diff --git a/src/arch/arm64/armv8/exception.c b/src/arch/arm64/armv8/exception.c index afbaf6da78..35e3f7fac6 100644 --- a/src/arch/arm64/armv8/exception.c +++ b/src/arch/arm64/armv8/exception.c @@ -36,6 +36,8 @@ #include #include +uint8_t exception_stack[0x200] __attribute__((aligned(16))); + static const char *exception_names[NUM_EXC_VIDS] = { [EXC_VID_CUR_SP_EL0_SYNC] = "_sync_sp_el0", [EXC_VID_CUR_SP_EL0_IRQ] = "_irq_sp_el0", @@ -193,19 +195,17 @@ static uint64_t test_exception(void) return 0; } -void exception_hwinit(void) -{ - exc_set_vbar(); -} - void exception_init(void) { - /* Load the exception table. */ - exception_hwinit(); + /* Load the exception table and initialize SP_EL3. */ + exception_init_asm(exception_stack + ARRAY_SIZE(exception_stack)); printk(BIOS_DEBUG, "ARM64: Exception handlers installed.\n"); - printk(BIOS_DEBUG, "ARM64: Testing exception\n"); - test_exception(); - printk(BIOS_DEBUG, "ARM64: Done test exception\n"); + /* Only spend time testing on debug builds that are trying to detect more errors. */ + if (IS_ENABLED(CONFIG_FATAL_ASSERTS)) { + printk(BIOS_DEBUG, "ARM64: Testing exception\n"); + test_exception(); + printk(BIOS_DEBUG, "ARM64: Done test exception\n"); + } } diff --git a/src/arch/arm64/boot.c b/src/arch/arm64/boot.c index 10d72576d9..1012d97a0a 100644 --- a/src/arch/arm64/boot.c +++ b/src/arch/arm64/boot.c @@ -71,3 +71,11 @@ void arch_prog_run(struct prog *prog) doit(prog_entry_arg(prog)); } + +#if !IS_ENABLED(CONFIG_SOC_NVIDIA_TEGRA132) +/* Generic stage entry point. Can be overridden by board/SoC if needed. */ +__attribute__((weak)) void stage_entry(void) +{ + main(); +} +#endif diff --git a/src/arch/arm64/include/arch/header.ld b/src/arch/arm64/include/arch/header.ld index 2ea0a19304..c82cb3f8c7 100644 --- a/src/arch/arm64/include/arch/header.ld +++ b/src/arch/arm64/include/arch/header.ld @@ -24,11 +24,7 @@ PHDRS to_load PT_LOAD; } -#if ENV_BOOTBLOCK -TARGET(binary) -#endif - -#if ENV_RMODULE +#if ENV_BOOTBLOCK || ENV_RMODULE ENTRY(_start) #else ENTRY(stage_entry) diff --git a/src/arch/arm64/include/arch/memlayout.h b/src/arch/arm64/include/arch/memlayout.h index ea4a1ba80c..6db67a9dee 100644 --- a/src/arch/arm64/include/arch/memlayout.h +++ b/src/arch/arm64/include/arch/memlayout.h @@ -22,13 +22,13 @@ REGION(ttb, addr, size, 4K) \ _ = ASSERT(size % 4K == 0, "TTB size must be divisible by 4K!"); -/* ARM64 stacks need 16-byte alignment. The ramstage will set up its own stacks - * in BSS, so this is only used for the SRAM stages. */ -#ifdef __PRE_RAM__ +/* ARM64 stacks need 16-byte alignment. */ +#if !(IS_ENABLED(CONFIG_SOC_NVIDIA_TEGRA132) || \ + IS_ENABLED(CONFIG_SOC_NVIDIA_TEGRA210)) #define STACK(addr, size) \ REGION(stack, addr, size, 16) \ _ = ASSERT(size >= 2K, "stack should be >= 2K, see toolchain.inc"); -#else +#else /* Hack around old Tegra stage_entry.S implementation. TODO: remove */ #define STACK(addr, size) REGION(preram_stack, addr, size, 16) #endif diff --git a/src/arch/arm64/include/armv8/arch/exception.h b/src/arch/arm64/include/armv8/arch/exception.h index 49ea747700..0ec0eea953 100644 --- a/src/arch/arm64/include/armv8/arch/exception.h +++ b/src/arch/arm64/include/armv8/arch/exception.h @@ -33,9 +33,11 @@ #include /* Initialize the exception handling on the current CPU. */ -void exception_hwinit(void); void exception_init(void); +/* Initialize VBAR and SP_EL3. */ +void exception_init_asm(void *exception_stack_end); + /* * Order matters for handling return values. The larger the value the higher * the precedence. diff --git a/src/arch/arm64/stages.c b/src/arch/arm64/stages.c deleted file mode 100644 index 4d4217c099..0000000000 --- a/src/arch/arm64/stages.c +++ /dev/null @@ -1,44 +0,0 @@ -/* - * This file is part of the coreboot project. - * - * Copyright 2014 Google Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; version 2 of the License. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - */ - -/* - * This file contains entry/exit functions for each stage during coreboot - * execution (bootblock entry and ramstage exit will depend on external - * loading). - * - * Entry points must be placed at the location the previous stage jumps - * to (the lowest address in the stage image). This is done by giving - * stage_entry() its own section in .text and placing it first in the - * linker script. - */ - -#include -#include - -/* we had marked 'doit' as 'noreturn'. - * There is no apparent harm in leaving it as something we can return from, and in the one - * case where we call a payload, the payload is allowed to return. - * Hence, leave it as something we can return from. - */ -void stage_exit(void *addr) -{ - void (*doit)(void) = addr; - /* - * Most stages load code so we need to sync caches here. Should maybe - * go into cbfs_load_stage() instead... - */ - cache_sync_instructions(); - doit(); -} diff --git a/src/arch/arm64/transition_asm.S b/src/arch/arm64/transition_asm.S index 24eab13fa2..8fd629a8af 100644 --- a/src/arch/arm64/transition_asm.S +++ b/src/arch/arm64/transition_asm.S @@ -155,11 +155,18 @@ ENTRY(exc_exit) eret ENDPROC(exc_exit) -/* exc_set_vbar: Initialize the exception entry address in vbar */ -ENTRY(exc_set_vbar) +/* + * exception_init_asm: Initialize VBAR and point SP_EL3 to exception stack. + * x0 = end of exception stack + */ +ENTRY(exception_init_asm) + msr SPSel, #SPSR_USE_H + mov sp, x0 + msr SPSel, #SPSR_USE_L + adr x0, exc_vectors write_current vbar, x0, x1 dsb sy isb ret -ENDPROC(exc_set_vbar) +ENDPROC(exception_init_asm) diff --git a/src/soc/nvidia/tegra132/Kconfig b/src/soc/nvidia/tegra132/Kconfig index 4be9a4c2a8..8473d4ac5b 100644 --- a/src/soc/nvidia/tegra132/Kconfig +++ b/src/soc/nvidia/tegra132/Kconfig @@ -17,6 +17,11 @@ config SOC_NVIDIA_TEGRA132 if SOC_NVIDIA_TEGRA132 +# TODO: Remove after replacing arch/arm64/stage_entry.S +config STACK_SIZE + hex + default 0x1000 + config MAINBOARD_DO_DSI_INIT bool "Use dsi graphics interface" depends on MAINBOARD_DO_NATIVE_VGA_INIT diff --git a/src/soc/nvidia/tegra210/Kconfig b/src/soc/nvidia/tegra210/Kconfig index e70cdc6517..002a6d3806 100644 --- a/src/soc/nvidia/tegra210/Kconfig +++ b/src/soc/nvidia/tegra210/Kconfig @@ -22,6 +22,11 @@ if SOC_NVIDIA_TEGRA210 config CHROMEOS select CHROMEOS_RAMOOPS_NON_ACPI +# TODO: Remove after replacing arch/arm64/stage_entry.S +config STACK_SIZE + hex + default 0x1000 + config MAINBOARD_DO_DSI_INIT bool "Use dsi graphics interface" depends on MAINBOARD_DO_NATIVE_VGA_INIT