coreboot-kgpe-d16/src/lib/Makefile.inc

343 lines
10 KiB
PHP
Raw Normal View History

#
# This file is part of the coreboot project.
#
# Copyright (C) 2009 coresystems GmbH
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; version 2 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
subdirs-y += gnat
ifeq ($(CONFIG_UBSAN),y)
ramstage-y += ubsan.c
CFLAGS_ramstage += -fsanitize=undefined
endif
Introduce bootblock self-decompression Masked ROMs are the silent killers of boot speed on devices without memory-mapped SPI flash. They often contain awfully slow SPI drivers (presumably bit-banged) that take hundreds of milliseconds to load our bootblock, and every extra kilobyte of bootblock size has a hugely disproportionate impact on boot speed. The coreboot timestamps can never show that component, but it impacts our users all the same. This patch tries to alleviate that issue a bit by allowing us to compress the bootblock with LZ4, which can cut its size down to nearly half. Of course, masked ROMs usually don't come with decompression algorithms built in, so we need to introduce a little decompression stub that can decompress the rest of the bootblock. This is done by creating a new "decompressor" stage which runs before the bootblock, but includes the compressed bootblock code in its data section. It needs to be as small as possible to get a real benefit from this approach, which means no device drivers, no console output, no exception handling, etc. Besides the decompression algorithm itself we only include the timer driver so that we can measure the boot speed impact of decompression. On ARM and ARM64 systems, we also need to give SoC code a chance to initialize the MMU, since running decompression without MMU is prohibitively slow on these architectures. This feature is implemented for ARM and ARM64 architectures for now, although most of it is architecture-independent and it should be relatively simple to port to other platforms where a masked ROM loads the bootblock into SRAM. It is also supposed to be a clean starting point from which later optimizations can hopefully cut down the decompression stub size (currently ~4K on RK3399) a bit more. NOTE: Bootblock compression is not for everyone. Possible side effects include trying to run LZ4 on CPUs that come out of reset extremely underclocked or enabling this too early in SoC bring-up and getting frustrated trying to find issues in an undebuggable environment. Ask your SoC vendor if bootblock compression is right for you. Change-Id: I0dc1cad9ae7508892e477739e743cd1afb5945e8 Signed-off-by: Julius Werner <jwerner@chromium.org> Reviewed-on: https://review.coreboot.org/26340 Tested-by: build bot (Jenkins) <no-reply@coreboot.org> Reviewed-by: Aaron Durbin <adurbin@chromium.org>
2018-05-16 23:14:04 +02:00
decompressor-y += decompressor.c
$(call src-to-obj,decompressor,$(dir)/decompressor.c): $(objcbfs)/bootblock.lz4
$(call src-to-obj,decompressor,$(dir)/decompressor.c): CCACHE_EXTRAFILES=$(objcbfs)/bootblock.lz4
# Must reset CCACHE_EXTRAFILES or make applies it transitively to dependencies.
$(objcbfs)/bootblock.lz4: CCACHE_EXTRAFILES=
decompressor-y += delay.c
decompressor-$(CONFIG_GENERIC_GPIO_LIB) += gpio.c
decompressor-y += memchr.c
decompressor-y += memcmp.c
decompressor-y += prog_ops.c
decompressor-$(CONFIG_COLLECT_TIMESTAMPS) += timestamp.c
ifneq ($(CONFIG_BOOTBLOCK_CUSTOM),y)
bootblock-y += bootblock.c
endif
bootblock-y += prog_loaders.c
bootblock-y += prog_ops.c
bootblock-y += cbfs.c
bootblock-$(CONFIG_GENERIC_GPIO_LIB) += gpio.c
lib: Unify log2() and related functions This patch adds a few bit counting functions that are commonly needed for certain register calculations. We previously had a log2() implementation already, but it was awkwardly split between some C code that's only available in ramstage and an optimized x86-specific implementation in pre-RAM that prevented other archs from pulling it into earlier stages. Using __builtin_clz() as the baseline allows GCC to inline optimized assembly for most archs (including CLZ on ARM/ARM64 and BSR on x86), and to perform constant-folding if possible. What was previously named log2f on pre-RAM x86 is now ffs, since that's the standard name for that operation and I honestly don't have the slightest idea how it could've ever ended up being called log2f (which in POSIX is 'binary(2) LOGarithm with Float result, whereas the Find First Set operation has no direct correlation to logarithms that I know of). Make ffs result 0-based instead of the POSIX standard's 1-based since that is consistent with clz, log2 and the former log2f, and generally closer to what you want for most applications (a value that can directly be used as a shift to reach the found bit). Call it __ffs() instead of ffs() to avoid problems when importing code, since that's what Linux uses for the 0-based operation. CQ-DEPEND=CL:273023 BRANCH=None BUG=None TEST=Built on Big, Falco, Jerry, Oak and Urara. Compared old and new log2() and __ffs() results on Falco for a bunch of test values. Change-Id: I599209b342059e17b3130621edb6b6bbeae26876 Signed-off-by: Patrick Georgi <pgeorgi@chromium.org> Original-Commit-Id: 3701a16ae944ecff9c54fa9a50d28015690fcb2f Original-Change-Id: I60f7cf893792508188fa04d088401a8bca4b4af6 Original-Signed-off-by: Julius Werner <jwerner@chromium.org> Original-Reviewed-on: https://chromium-review.googlesource.com/273008 Original-Reviewed-by: Patrick Georgi <pgeorgi@chromium.org> Reviewed-on: http://review.coreboot.org/10394 Tested-by: build bot (Jenkins) Reviewed-by: Stefan Reinauer <stefan.reinauer@coreboot.org>
2015-05-23 01:26:40 +02:00
bootblock-y += libgcc.c
bootblock-$(CONFIG_GENERIC_UDELAY) += timer.c
ifeq ($(CONFIG_EARLY_CBMEM_INIT),y)
bootblock-$(CONFIG_COLLECT_TIMESTAMPS) += timestamp.c
endif
bootblock-$(CONFIG_CONSOLE_CBMEM) += cbmem_console.c
bootblock-y += delay.c
bootblock-y += memchr.c
bootblock-y += memcmp.c
bootblock-y += boot_device.c
bootblock-y += fmap.c
verstage-y += prog_loaders.c
verstage-y += prog_ops.c
verstage-y += delay.c
verstage-y += cbfs.c
verstage-y += halt.c
verstage-y += fmap.c
lib: Unify log2() and related functions This patch adds a few bit counting functions that are commonly needed for certain register calculations. We previously had a log2() implementation already, but it was awkwardly split between some C code that's only available in ramstage and an optimized x86-specific implementation in pre-RAM that prevented other archs from pulling it into earlier stages. Using __builtin_clz() as the baseline allows GCC to inline optimized assembly for most archs (including CLZ on ARM/ARM64 and BSR on x86), and to perform constant-folding if possible. What was previously named log2f on pre-RAM x86 is now ffs, since that's the standard name for that operation and I honestly don't have the slightest idea how it could've ever ended up being called log2f (which in POSIX is 'binary(2) LOGarithm with Float result, whereas the Find First Set operation has no direct correlation to logarithms that I know of). Make ffs result 0-based instead of the POSIX standard's 1-based since that is consistent with clz, log2 and the former log2f, and generally closer to what you want for most applications (a value that can directly be used as a shift to reach the found bit). Call it __ffs() instead of ffs() to avoid problems when importing code, since that's what Linux uses for the 0-based operation. CQ-DEPEND=CL:273023 BRANCH=None BUG=None TEST=Built on Big, Falco, Jerry, Oak and Urara. Compared old and new log2() and __ffs() results on Falco for a bunch of test values. Change-Id: I599209b342059e17b3130621edb6b6bbeae26876 Signed-off-by: Patrick Georgi <pgeorgi@chromium.org> Original-Commit-Id: 3701a16ae944ecff9c54fa9a50d28015690fcb2f Original-Change-Id: I60f7cf893792508188fa04d088401a8bca4b4af6 Original-Signed-off-by: Julius Werner <jwerner@chromium.org> Original-Reviewed-on: https://chromium-review.googlesource.com/273008 Original-Reviewed-by: Patrick Georgi <pgeorgi@chromium.org> Reviewed-on: http://review.coreboot.org/10394 Tested-by: build bot (Jenkins) Reviewed-by: Stefan Reinauer <stefan.reinauer@coreboot.org>
2015-05-23 01:26:40 +02:00
verstage-y += libgcc.c
verstage-y += memcmp.c
verstage-$(CONFIG_COLLECT_TIMESTAMPS) += timestamp.c
verstage-y += boot_device.c
verstage-$(CONFIG_CONSOLE_CBMEM) += cbmem_console.c
verstage-$(CONFIG_GENERIC_UDELAY) += timer.c
verstage-$(CONFIG_GENERIC_GPIO_LIB) += gpio.c
romstage-y += prog_loaders.c
romstage-y += prog_ops.c
romstage-y += memchr.c
romstage-y += memcmp.c
$(foreach arch,$(ARCH_SUPPORTED),\
New mechanism to define SRAM/memory map with automatic bounds checking This patch creates a new mechanism to define the static memory layout (primarily in SRAM) for a given board, superseding the brittle mass of Kconfigs that we were using before. The core part is a memlayout.ld file in the mainboard directory (although boards are expected to just include the SoC default in most cases), which is the primary linker script for all stages (though not rmodules for now). It uses preprocessor macros from <memlayout.h> to form a different valid linker script for all stages while looking like a declarative, boilerplate-free map of memory addresses to the programmer. Linker asserts will automatically guarantee that the defined regions cannot overlap. Stages are defined with a maximum size that will be enforced by the linker. The file serves to both define and document the memory layout, so that the documentation cannot go missing or out of date. The mechanism is implemented for all boards in the ARM, ARM64 and MIPS architectures, and should be extended onto all systems using SRAM in the future. The CAR/XIP environment on x86 has very different requirements and the layout is generally not as static, so it will stay like it is and be unaffected by this patch (save for aligning some symbol names for consistency and sharing the new common ramstage linker script include). BUG=None TEST=Booted normally and in recovery mode, checked suspend/resume and the CBMEM console on Falco, Blaze (both normal and vboot2), Pinky and Pit. Compiled Ryu, Storm and Urara, manually compared the disassemblies with ToT and looked for red flags. Change-Id: Ifd2276417f2036cbe9c056f17e42f051bcd20e81 Signed-off-by: Patrick Georgi <pgeorgi@chromium.org> Original-Commit-Id: f1e2028e7ebceeb2d71ff366150a37564595e614 Original-Change-Id: I005506add4e8fcdb74db6d5e6cb2d4cb1bd3cda5 Original-Signed-off-by: Julius Werner <jwerner@chromium.org> Original-Reviewed-on: https://chromium-review.googlesource.com/213370 Reviewed-on: http://review.coreboot.org/9283 Tested-by: build bot (Jenkins) Reviewed-by: Stefan Tauner <stefan.tauner@gmx.at> Reviewed-by: Aaron Durbin <adurbin@google.com>
2014-08-21 00:29:56 +02:00
$(eval rmodules_$(arch)-y += memcmp.c) \
$(eval rmodules_$(arch)-y += rmodule.ld))
romstage-y += fmap.c
romstage-y += delay.c
romstage-y += cbfs.c
romstage-$(CONFIG_COMPRESS_RAMSTAGE) += lzma.c lzmadecode.c
lib: Unify log2() and related functions This patch adds a few bit counting functions that are commonly needed for certain register calculations. We previously had a log2() implementation already, but it was awkwardly split between some C code that's only available in ramstage and an optimized x86-specific implementation in pre-RAM that prevented other archs from pulling it into earlier stages. Using __builtin_clz() as the baseline allows GCC to inline optimized assembly for most archs (including CLZ on ARM/ARM64 and BSR on x86), and to perform constant-folding if possible. What was previously named log2f on pre-RAM x86 is now ffs, since that's the standard name for that operation and I honestly don't have the slightest idea how it could've ever ended up being called log2f (which in POSIX is 'binary(2) LOGarithm with Float result, whereas the Find First Set operation has no direct correlation to logarithms that I know of). Make ffs result 0-based instead of the POSIX standard's 1-based since that is consistent with clz, log2 and the former log2f, and generally closer to what you want for most applications (a value that can directly be used as a shift to reach the found bit). Call it __ffs() instead of ffs() to avoid problems when importing code, since that's what Linux uses for the 0-based operation. CQ-DEPEND=CL:273023 BRANCH=None BUG=None TEST=Built on Big, Falco, Jerry, Oak and Urara. Compared old and new log2() and __ffs() results on Falco for a bunch of test values. Change-Id: I599209b342059e17b3130621edb6b6bbeae26876 Signed-off-by: Patrick Georgi <pgeorgi@chromium.org> Original-Commit-Id: 3701a16ae944ecff9c54fa9a50d28015690fcb2f Original-Change-Id: I60f7cf893792508188fa04d088401a8bca4b4af6 Original-Signed-off-by: Julius Werner <jwerner@chromium.org> Original-Reviewed-on: https://chromium-review.googlesource.com/273008 Original-Reviewed-by: Patrick Georgi <pgeorgi@chromium.org> Reviewed-on: http://review.coreboot.org/10394 Tested-by: build bot (Jenkins) Reviewed-by: Stefan Reinauer <stefan.reinauer@coreboot.org>
2015-05-23 01:26:40 +02:00
romstage-y += libgcc.c
romstage-y += memrange.c
romstage-$(CONFIG_PRIMITIVE_MEMTEST) += primitive_memtest.c
ramstage-$(CONFIG_PRIMITIVE_MEMTEST) += primitive_memtest.c
romstage-$(CONFIG_CACHE_AS_RAM) += ramtest.c
romstage-$(CONFIG_GENERIC_GPIO_LIB) += gpio.c
ramstage-y += region_file.c
romstage-y += region_file.c
ramstage-y += romstage_handoff.c
romstage-y += romstage_handoff.c
romstage-y += romstage_stack.c
ramstage-y += romstage_stack.c
romstage-y += stack.c
ramstage-y += rtc.c
ifeq ($(CONFIG_EARLY_CBMEM_INIT),y)
romstage-$(CONFIG_COLLECT_TIMESTAMPS) += timestamp.c
romstage-$(CONFIG_CONSOLE_CBMEM) += cbmem_console.c
endif
romstage-y += compute_ip_checksum.c
ifeq ($(CONFIG_COMPILER_GCC),y)
bootblock-$(CONFIG_ARCH_BOOTBLOCK_X86_32) += gcc.c
verstage-$(CONFIG_ARCH_VERSTAGE_X86_32) += gcc.c
Introduce stage-specific architecture for coreboot Make all three coreboot stages (bootblock, romstage and ramstage) aware of the architecture specific to that stage i.e. we will have CONFIG_ARCH variables for each of the three stages. This allows us to have an SOC with any combination of architectures and thus every stage can be made to run on a completely different architecture independent of others. Thus, bootblock can have an x86 arch whereas romstage and ramstage can have arm32 and arm64 arch respectively. These stage specific CONFIG_ARCH_ variables enable us to select the proper set of toolchain and compiler flags for every stage. These options can be considered as either arch or modes eg: x86 running in different modes or ARM having different arch types (v4, v7, v8). We have got rid of the original CONFIG_ARCH option completely as every stage can have any architecture of its own. Thus, almost all the components of coreboot are identified as being part of one of the three stages (bootblock, romstage or ramstage). The components which cannot be classified as such e.g. smm, rmodules can have their own compiler toolset which is for now set to *_i386. Hence, all special classes are treated in a similar way and the compiler toolset is defined using create_class_compiler defined in Makefile. In order to meet these requirements, changes have been made to CC, LD, OBJCOPY and family to add CC_bootblock, CC_romstage, CC_ramstage and similarly others. Additionally, CC_x86_32 and CC_armv7 handle all the special classes. All the toolsets are defined using create_class_compiler. Few additional macros have been introduced to identify the class to be used at various points, e.g.: CC_$(class) derives the $(class) part from the name of the stage being compiled. We have also got rid of COREBOOT_COMPILER, COREBOOT_ASSEMBLER and COREBOOT_LINKER as they do not make any sense for coreboot as a whole. All these attributes are associated with each of the stages. Change-Id: I923f3d4fb097d21071030b104c372cc138c68c7b Signed-off-by: Furquan Shaikh <furquan@google.com> Reviewed-on: http://review.coreboot.org/5577 Tested-by: build bot (Jenkins) Reviewed-by: Aaron Durbin <adurbin@gmail.com>
2014-04-23 19:18:48 +02:00
romstage-$(CONFIG_ARCH_ROMSTAGE_X86_32) += gcc.c
ramstage-$(CONFIG_ARCH_RAMSTAGE_X86_32) += gcc.c
smm-$(CONFIG_ARCH_RAMSTAGE_X86_32) += gcc.c
endif
romstage-$(CONFIG_GENERIC_UDELAY) += timer.c
ramstage-y += prog_loaders.c
ramstage-y += prog_ops.c
ramstage-y += hardwaremain.c
ramstage-y += selfboot.c
ramstage-y += coreboot_table.c
ramstage-y += bootmem.c
ramstage-y += fmap.c
ramstage-y += memchr.c
ramstage-y += memcmp.c
ramstage-y += malloc.c
smm-$(CONFIG_SMM_TSEG) += malloc.c
ramstage-y += dimm_info_util.c
ramstage-y += delay.c
ramstage-y += fallback_boot.c
ramstage-y += compute_ip_checksum.c
ramstage-y += cbfs.c
ramstage-y += lzma.c lzmadecode.c
ramstage-y += stack.c
ramstage-y += hexstrtobin.c
ramstage-y += wrdd.c
ramstage-$(CONFIG_CONSOLE_CBMEM) += cbmem_console.c
ramstage-$(CONFIG_BOOTSPLASH) += jpeg.c
ramstage-$(CONFIG_TRACE) += trace.c
ramstage-$(CONFIG_COLLECT_TIMESTAMPS) += timestamp.c
ramstage-$(CONFIG_COVERAGE) += libgcov.c
ramstage-y += edid.c
ifneq ($(CONFIG_NO_EDID_FILL_FB),y)
ramstage-y += edid_fill_fb.c
endif
ramstage-y += memrange.c
ramstage-$(CONFIG_COOP_MULTITASKING) += thread.c
ramstage-$(CONFIG_TIMER_QUEUE) += timer_queue.c
ramstage-$(CONFIG_GENERIC_GPIO_LIB) += gpio.c
ramstage-$(CONFIG_GENERIC_UDELAY) += timer.c
ramstage-y += b64_decode.c
ramstage-$(CONFIG_ACPI_NHLT) += nhlt.c
ramstage-y += list.c
ramstage-$(CONFIG_FLATTENED_DEVICE_TREE) += device_tree.c
ramstage-$(CONFIG_PAYLOAD_FIT_SUPPORT) += fit.c
ramstage-$(CONFIG_PAYLOAD_FIT_SUPPORT) += fit_payload.c
romstage-y += cbmem_common.c
romstage-y += imd_cbmem.c
romstage-y += imd.c
ramstage-y += cbmem_common.c
ramstage-y += imd_cbmem.c
ramstage-y += imd.c
postcar-y += cbmem_common.c
postcar-$(CONFIG_CONSOLE_CBMEM) += cbmem_console.c
postcar-y += imd_cbmem.c
postcar-y += imd.c
postcar-y += romstage_handoff.c
bootblock-y += hexdump.c
ramstage-y += hexdump.c
romstage-y += hexdump.c
verstage-y += hexdump.c
smm-y += hexdump.c
bootblock-$(CONFIG_REG_SCRIPT) += reg_script.c
verstage-$(CONFIG_REG_SCRIPT) += reg_script.c
romstage-$(CONFIG_REG_SCRIPT) += reg_script.c
ramstage-$(CONFIG_REG_SCRIPT) += reg_script.c
ifeq ($(CONFIG_CACHE_RELOCATED_RAMSTAGE_OUTSIDE_CBMEM),y)
ramstage-y += ext_stage_cache.c
romstage-y += ext_stage_cache.c
postcar-y += ext_stage_cache.c
else
ramstage-$(CONFIG_RELOCATABLE_RAMSTAGE) += cbmem_stage_cache.c
romstage-$(CONFIG_RELOCATABLE_RAMSTAGE) += cbmem_stage_cache.c
postcar-$(CONFIG_RELOCATABLE_RAMSTAGE) += cbmem_stage_cache.c
endif
romstage-y += boot_device.c
ramstage-y += boot_device.c
smm-y += boot_device.c
smm-y += delay.c
smm-y += fmap.c
smm-y += cbfs.c memcmp.c
smm-$(CONFIG_GENERIC_UDELAY) += timer.c
bootblock-y += version.c
romstage-y += version.c
ramstage-y += version.c
smm-y += version.c
verstage-y += version.c
postcar-y += version.c
$(call src-to-obj,bootblock,$(dir)/version.c) : $(obj)/build.h
$(call src-to-obj,romstage,$(dir)/version.c) : $(obj)/build.h
$(call src-to-obj,ramstage,$(dir)/version.c) : $(obj)/build.h
$(call src-to-obj,smm,$(dir)/version.c) : $(obj)/build.h
$(call src-to-obj,verstage,$(dir)/version.c) : $(obj)/build.h
$(call src-to-obj,postcar,$(dir)/version.c) : $(obj)/build.h
$(call src-to-obj,bootblock,$(dir)/cbfs.c) : $(obj)/fmap_config.h
$(call src-to-obj,romstage,$(dir)/cbfs.c) : $(obj)/fmap_config.h
$(call src-to-obj,ramstage,$(dir)/cbfs.c) : $(obj)/fmap_config.h
$(call src-to-obj,smm,$(dir)/cbfs.c) : $(obj)/fmap_config.h
$(call src-to-obj,verstage,$(dir)/cbfs.c) : $(obj)/fmap_config.h
$(call src-to-obj,postcar,$(dir)/cbfs.c) : $(obj)/fmap_config.h
$(call src-to-obj,bootblock,$(dir)/fmap.c) : $(obj)/fmap_config.h
$(call src-to-obj,romstage,$(dir)/fmap.c) : $(obj)/fmap_config.h
$(call src-to-obj,ramstage,$(dir)/fmap.c) : $(obj)/fmap_config.h
$(call src-to-obj,smm,$(dir)/fmap.c) : $(obj)/fmap_config.h
$(call src-to-obj,verstage,$(dir)/fmap.c) : $(obj)/fmap_config.h
$(call src-to-obj,postcar,$(dir)/fmap.c) : $(obj)/fmap_config.h
bootblock-y += bootmode.c
romstage-y += bootmode.c
ramstage-y += bootmode.c
verstage-y += bootmode.c
Introduce bootblock self-decompression Masked ROMs are the silent killers of boot speed on devices without memory-mapped SPI flash. They often contain awfully slow SPI drivers (presumably bit-banged) that take hundreds of milliseconds to load our bootblock, and every extra kilobyte of bootblock size has a hugely disproportionate impact on boot speed. The coreboot timestamps can never show that component, but it impacts our users all the same. This patch tries to alleviate that issue a bit by allowing us to compress the bootblock with LZ4, which can cut its size down to nearly half. Of course, masked ROMs usually don't come with decompression algorithms built in, so we need to introduce a little decompression stub that can decompress the rest of the bootblock. This is done by creating a new "decompressor" stage which runs before the bootblock, but includes the compressed bootblock code in its data section. It needs to be as small as possible to get a real benefit from this approach, which means no device drivers, no console output, no exception handling, etc. Besides the decompression algorithm itself we only include the timer driver so that we can measure the boot speed impact of decompression. On ARM and ARM64 systems, we also need to give SoC code a chance to initialize the MMU, since running decompression without MMU is prohibitively slow on these architectures. This feature is implemented for ARM and ARM64 architectures for now, although most of it is architecture-independent and it should be relatively simple to port to other platforms where a masked ROM loads the bootblock into SRAM. It is also supposed to be a clean starting point from which later optimizations can hopefully cut down the decompression stub size (currently ~4K on RK3399) a bit more. NOTE: Bootblock compression is not for everyone. Possible side effects include trying to run LZ4 on CPUs that come out of reset extremely underclocked or enabling this too early in SoC bring-up and getting frustrated trying to find issues in an undebuggable environment. Ask your SoC vendor if bootblock compression is right for you. Change-Id: I0dc1cad9ae7508892e477739e743cd1afb5945e8 Signed-off-by: Julius Werner <jwerner@chromium.org> Reviewed-on: https://review.coreboot.org/26340 Tested-by: build bot (Jenkins) <no-reply@coreboot.org> Reviewed-by: Aaron Durbin <adurbin@chromium.org>
2018-05-16 23:14:04 +02:00
decompressor-y += halt.c
bootblock-y += halt.c
romstage-y += halt.c
ramstage-y += halt.c
smm-y += halt.c
Introduce bootblock self-decompression Masked ROMs are the silent killers of boot speed on devices without memory-mapped SPI flash. They often contain awfully slow SPI drivers (presumably bit-banged) that take hundreds of milliseconds to load our bootblock, and every extra kilobyte of bootblock size has a hugely disproportionate impact on boot speed. The coreboot timestamps can never show that component, but it impacts our users all the same. This patch tries to alleviate that issue a bit by allowing us to compress the bootblock with LZ4, which can cut its size down to nearly half. Of course, masked ROMs usually don't come with decompression algorithms built in, so we need to introduce a little decompression stub that can decompress the rest of the bootblock. This is done by creating a new "decompressor" stage which runs before the bootblock, but includes the compressed bootblock code in its data section. It needs to be as small as possible to get a real benefit from this approach, which means no device drivers, no console output, no exception handling, etc. Besides the decompression algorithm itself we only include the timer driver so that we can measure the boot speed impact of decompression. On ARM and ARM64 systems, we also need to give SoC code a chance to initialize the MMU, since running decompression without MMU is prohibitively slow on these architectures. This feature is implemented for ARM and ARM64 architectures for now, although most of it is architecture-independent and it should be relatively simple to port to other platforms where a masked ROM loads the bootblock into SRAM. It is also supposed to be a clean starting point from which later optimizations can hopefully cut down the decompression stub size (currently ~4K on RK3399) a bit more. NOTE: Bootblock compression is not for everyone. Possible side effects include trying to run LZ4 on CPUs that come out of reset extremely underclocked or enabling this too early in SoC bring-up and getting frustrated trying to find issues in an undebuggable environment. Ask your SoC vendor if bootblock compression is right for you. Change-Id: I0dc1cad9ae7508892e477739e743cd1afb5945e8 Signed-off-by: Julius Werner <jwerner@chromium.org> Reviewed-on: https://review.coreboot.org/26340 Tested-by: build bot (Jenkins) <no-reply@coreboot.org> Reviewed-by: Aaron Durbin <adurbin@chromium.org>
2018-05-16 23:14:04 +02:00
decompressor-y += reset.c
Consolidate reset API, add generic reset_prepare mechanism There are many good reasons why we may want to run some sort of generic callback before we're executing a reset. Unfortunateley, that is really hard right now: code that wants to reset simply calls the hard_reset() function (or one of its ill-differentiated cousins) which is directly implemented by a myriad of different mainboards, northbridges, SoCs, etc. More recent x86 SoCs have tried to solve the problem in their own little corner of soc/intel/common, but it's really something that would benefit all of coreboot. This patch expands the concept onto all boards: hard_reset() and friends get implemented in a generic location where they can run hooks before calling the platform-specific implementation that is now called do_hard_reset(). The existing Intel reset_prepare() gets generalized as soc_reset_prepare() (and other hooks for arch, mainboard, etc. can now easily be added later if necessary). We will also use this central point to ensure all platforms flush their cache before reset, which is generally useful for all cases where we're trying to persist information in RAM across reboots (like the new persistent CBMEM console does). Also remove cpu_reset() completely since it's not used anywhere and doesn't seem very useful compared to the others. Change-Id: I41b89ce4a923102f0748922496e1dd9bce8a610f Signed-off-by: Julius Werner <jwerner@chromium.org> Reviewed-on: https://review.coreboot.org/19789 Reviewed-by: Aaron Durbin <adurbin@chromium.org> Tested-by: build bot (Jenkins) <no-reply@coreboot.org> Reviewed-by: Furquan Shaikh <furquan@google.com>
2017-05-19 01:03:26 +02:00
bootblock-y += reset.c
verstage-y += reset.c
romstage-y += reset.c
postcar-y += reset.c
ramstage-y += reset.c
smm-y += reset.c
postcar-y += bootmode.c
postcar-y += boot_device.c
postcar-y += cbfs.c
postcar-y += delay.c
postcar-y += fmap.c
postcar-y += gcc.c
postcar-y += halt.c
postcar-y += libgcc.c
postcar-$(CONFIG_COMPRESS_RAMSTAGE) += lzma.c lzmadecode.c
postcar-y += memchr.c
postcar-y += memcmp.c
postcar-y += prog_loaders.c
postcar-y += prog_ops.c
postcar-y += rmodule.c
postcar-$(CONFIG_COLLECT_TIMESTAMPS) += timestamp.c
postcar-$(CONFIG_GENERIC_UDELAY) += timer.c
# Use program.ld for all the platforms which use C fo the bootblock.
bootblock-$(CONFIG_C_ENVIRONMENT_BOOTBLOCK) += program.ld
Introduce bootblock self-decompression Masked ROMs are the silent killers of boot speed on devices without memory-mapped SPI flash. They often contain awfully slow SPI drivers (presumably bit-banged) that take hundreds of milliseconds to load our bootblock, and every extra kilobyte of bootblock size has a hugely disproportionate impact on boot speed. The coreboot timestamps can never show that component, but it impacts our users all the same. This patch tries to alleviate that issue a bit by allowing us to compress the bootblock with LZ4, which can cut its size down to nearly half. Of course, masked ROMs usually don't come with decompression algorithms built in, so we need to introduce a little decompression stub that can decompress the rest of the bootblock. This is done by creating a new "decompressor" stage which runs before the bootblock, but includes the compressed bootblock code in its data section. It needs to be as small as possible to get a real benefit from this approach, which means no device drivers, no console output, no exception handling, etc. Besides the decompression algorithm itself we only include the timer driver so that we can measure the boot speed impact of decompression. On ARM and ARM64 systems, we also need to give SoC code a chance to initialize the MMU, since running decompression without MMU is prohibitively slow on these architectures. This feature is implemented for ARM and ARM64 architectures for now, although most of it is architecture-independent and it should be relatively simple to port to other platforms where a masked ROM loads the bootblock into SRAM. It is also supposed to be a clean starting point from which later optimizations can hopefully cut down the decompression stub size (currently ~4K on RK3399) a bit more. NOTE: Bootblock compression is not for everyone. Possible side effects include trying to run LZ4 on CPUs that come out of reset extremely underclocked or enabling this too early in SoC bring-up and getting frustrated trying to find issues in an undebuggable environment. Ask your SoC vendor if bootblock compression is right for you. Change-Id: I0dc1cad9ae7508892e477739e743cd1afb5945e8 Signed-off-by: Julius Werner <jwerner@chromium.org> Reviewed-on: https://review.coreboot.org/26340 Tested-by: build bot (Jenkins) <no-reply@coreboot.org> Reviewed-by: Aaron Durbin <adurbin@chromium.org>
2018-05-16 23:14:04 +02:00
decompressor-y += program.ld
postcar-y += program.ld
romstage-y += program.ld
ramstage-y += program.ld
verstage-y += program.ld
New mechanism to define SRAM/memory map with automatic bounds checking This patch creates a new mechanism to define the static memory layout (primarily in SRAM) for a given board, superseding the brittle mass of Kconfigs that we were using before. The core part is a memlayout.ld file in the mainboard directory (although boards are expected to just include the SoC default in most cases), which is the primary linker script for all stages (though not rmodules for now). It uses preprocessor macros from <memlayout.h> to form a different valid linker script for all stages while looking like a declarative, boilerplate-free map of memory addresses to the programmer. Linker asserts will automatically guarantee that the defined regions cannot overlap. Stages are defined with a maximum size that will be enforced by the linker. The file serves to both define and document the memory layout, so that the documentation cannot go missing or out of date. The mechanism is implemented for all boards in the ARM, ARM64 and MIPS architectures, and should be extended onto all systems using SRAM in the future. The CAR/XIP environment on x86 has very different requirements and the layout is generally not as static, so it will stay like it is and be unaffected by this patch (save for aligning some symbol names for consistency and sharing the new common ramstage linker script include). BUG=None TEST=Booted normally and in recovery mode, checked suspend/resume and the CBMEM console on Falco, Blaze (both normal and vboot2), Pinky and Pit. Compiled Ryu, Storm and Urara, manually compared the disassemblies with ToT and looked for red flags. Change-Id: Ifd2276417f2036cbe9c056f17e42f051bcd20e81 Signed-off-by: Patrick Georgi <pgeorgi@chromium.org> Original-Commit-Id: f1e2028e7ebceeb2d71ff366150a37564595e614 Original-Change-Id: I005506add4e8fcdb74db6d5e6cb2d4cb1bd3cda5 Original-Signed-off-by: Julius Werner <jwerner@chromium.org> Original-Reviewed-on: https://chromium-review.googlesource.com/213370 Reviewed-on: http://review.coreboot.org/9283 Tested-by: build bot (Jenkins) Reviewed-by: Stefan Tauner <stefan.tauner@gmx.at> Reviewed-by: Aaron Durbin <adurbin@google.com>
2014-08-21 00:29:56 +02:00
ifeq ($(CONFIG_RELOCATABLE_MODULES),y)
ramstage-y += rmodule.c
romstage-$(CONFIG_RELOCATABLE_RAMSTAGE) += rmodule.c
RMODULE_LDFLAGS := -z defs -Bsymbolic
# rmodule_link_rules is a function that should be called with:
# (1) the object name to link
# (2) the dependencies
# (3) heap size of the relocatable module
# (4) arch for which the rmodules are to be linked
# It will create the necessary Make rules to create a rmodule. The resulting
# rmdoule is named $(1).rmod
define rmodule_link
$(strip $(1)): $(strip $(2)) $$(COMPILER_RT_rmodules_$(4)) $(call src-to-obj,rmodules_$(4),src/lib/rmodule.ld) | $$(RMODTOOL)
$$(LD_rmodules_$(4)) $$(LDFLAGS_rmodules_$(4)) $(RMODULE_LDFLAGS) -T $(call src-to-obj,rmodules_$(4),src/lib/rmodule.ld) --defsym=__heap_size=$(strip $(3)) -o $$@ --whole-archive --start-group $(filter-out %.ld,$(2)) --end-group
$$(NM_rmodules_$(4)) -n $$@ > $$(basename $$@).map
endef
endif
$(objcbfs)/%.debug.rmod: $(objcbfs)/%.debug | $(RMODTOOL)
$(RMODTOOL) -i $< -o $@
$(obj)/%.elf.rmod: $(obj)/%.elf | $(RMODTOOL)
$(RMODTOOL) -i $< -o $@
ifeq ($(CONFIG_RAMSTAGE_LIBHWBASE),y)
to-ada-hex = $(eval $(1) := 16\\\#$(patsubst 0x%,%,$($(1)))\\\#)
$(call to-ada-hex,CONFIG_HWBASE_DEFAULT_MMCONF)
$(call add-special-class,hw)
hw-handler = $(eval ramstage-srcs += $$(addprefix $(1),$(2)))
$(call add-special-class,hw-gen)
hw-gen-handler = \
$(eval additional-dirs += $(dir $(2))) \
$(eval ramstage-srcs += $(2)) \
$(eval ramstage-ads-deps += $(2)) \
$(eval ramstage-adb-deps += $(2)) \
$(eval $(2): $(obj)/config.h)
subdirs-y += ../../3rdparty/libhwbase
ramstage-$(CONFIG_HAVE_MONOTONIC_TIMER) += hw-time-timer.adb
endif # CONFIG_RAMSTAGE_LIBHWBASE
romstage-y += spd_bin.c
ifeq ($(CONFIG_GENERIC_SPD_BIN),y)
LIB_SPD_BIN = $(obj)/spd.bin
LIB_SPD_DEPS = $(foreach f, $(SPD_SOURCES), src/mainboard/$(MAINBOARDDIR)/spd/$(f).spd.hex)
# Include spd ROM data
$(LIB_SPD_BIN): $(LIB_SPD_DEPS)
for f in $(LIB_SPD_DEPS); \
do for c in $$(cat $$f | grep -v ^#); \
do printf $$(printf '\%o' 0x$$c); \
done; \
done > $@
cbfs-files-$(CONFIG_GENERIC_SPD_BIN) += spd.bin
spd.bin-file := $(LIB_SPD_BIN)
spd.bin-type := spd
endif