arm64: Align cache maintenance code with libpayload and ARM32

coreboot and libpayload currently use completely different code to
perform a full cache flush on ARM64, with even different function names.
The libpayload code is closely inspired by the ARM32 version, so for the
sake of overall consistency let's sync coreboot to that. Also align a
few other cache management details to work the same way as the
corresponding ARM32 parts (such as only flushing but not invalidating
the data cache after loading a new stage, which may have a small
performance benefit).

Change-Id: I9e05b425eeeaa27a447b37f98c0928fed3f74340
Signed-off-by: Julius Werner <jwerner@chromium.org>
Reviewed-on: https://review.coreboot.org/19785
Tested-by: build bot (Jenkins) <no-reply@coreboot.org>
Reviewed-by: Aaron Durbin <adurbin@chromium.org>
This commit is contained in:
Julius Werner 2015-04-21 14:32:36 -07:00
parent 3db7653aab
commit baa3e70084
8 changed files with 93 additions and 194 deletions

View File

@ -119,7 +119,11 @@ void dcache_invalidate_by_mva(void const *addr, size_t len)
void cache_sync_instructions(void) void cache_sync_instructions(void)
{ {
dcache_clean_all(); /* includes trailing DSB (in assembly) */ uint32_t sctlr = raw_read_sctlr_current();
if (sctlr & SCTLR_C)
dcache_clean_all(); /* includes trailing DSB (assembly) */
else if (sctlr & SCTLR_I)
dcache_clean_invalidate_all();
icache_invalidate_all(); /* includes leading DSB and trailing ISB */ icache_invalidate_all(); /* includes leading DSB and trailing ISB */
} }

View File

@ -28,7 +28,6 @@ ifneq ($(CONFIG_BOOTBLOCK_CUSTOM),y)
bootblock-y += bootblock.S bootblock-y += bootblock.S
endif endif
bootblock-y += cache.c bootblock-y += cache.c
bootblock-y += cache_helpers.S
bootblock-y += cpu.S bootblock-y += cpu.S
bootblock-y += mmu.c bootblock-y += mmu.c
@ -50,7 +49,6 @@ ifeq ($(CONFIG_ARCH_VERSTAGE_ARMV8_64),y)
verstage-y += cache.c verstage-y += cache.c
verstage-y += cpu.S verstage-y += cpu.S
verstage-y += cache_helpers.S
verstage-y += exception.c verstage-y += exception.c
verstage-generic-ccopts += $(armv8_flags) verstage-generic-ccopts += $(armv8_flags)
@ -63,7 +61,6 @@ endif
ifeq ($(CONFIG_ARCH_ROMSTAGE_ARMV8_64),y) ifeq ($(CONFIG_ARCH_ROMSTAGE_ARMV8_64),y)
romstage-y += cache.c romstage-y += cache.c
romstage-y += cache_helpers.S
romstage-y += cpu.S romstage-y += cpu.S
romstage-y += exception.c romstage-y += exception.c
romstage-y += mmu.c romstage-y += mmu.c
@ -80,7 +77,6 @@ endif
ifeq ($(CONFIG_ARCH_RAMSTAGE_ARMV8_64),y) ifeq ($(CONFIG_ARCH_RAMSTAGE_ARMV8_64),y)
ramstage-y += cache.c ramstage-y += cache.c
ramstage-y += cache_helpers.S
ramstage-y += cpu.S ramstage-y += cpu.S
ramstage-y += exception.c ramstage-y += exception.c
ramstage-y += mmu.c ramstage-y += mmu.c

View File

@ -34,7 +34,6 @@
#include <stdint.h> #include <stdint.h>
#include <arch/cache.h> #include <arch/cache.h>
#include <arch/cache_helpers.h>
#include <arch/lib_helpers.h> #include <arch/lib_helpers.h>
#include <program_loading.h> #include <program_loading.h>
@ -121,7 +120,11 @@ void dcache_invalidate_by_mva(void const *addr, size_t len)
void cache_sync_instructions(void) void cache_sync_instructions(void)
{ {
flush_dcache_all(DCCISW); /* includes trailing DSB (in assembly) */ uint32_t sctlr = raw_read_sctlr_current();
if (sctlr & SCTLR_C)
dcache_clean_all(); /* includes trailing DSB (assembly) */
else if (sctlr & SCTLR_I)
dcache_clean_invalidate_all();
icache_invalidate_all(); /* includdes leading DSB and trailing ISB. */ icache_invalidate_all(); /* includdes leading DSB and trailing ISB. */
} }
@ -131,6 +134,10 @@ void cache_sync_instructions(void)
*/ */
void arch_segment_loaded(uintptr_t start, size_t size, int flags) void arch_segment_loaded(uintptr_t start, size_t size, int flags)
{ {
dcache_clean_invalidate_by_mva((void *)start, size); uint32_t sctlr = raw_read_sctlr_current();
if (sctlr & SCTLR_C)
dcache_clean_by_mva((void *)start, size);
else if (sctlr & SCTLR_I)
dcache_clean_invalidate_by_mva((void *)start, size);
icache_invalidate_all(); icache_invalidate_all();
} }

View File

@ -1,124 +0,0 @@
/*
* Copyright (c) 2013-2014, ARM Limited and Contributors. All rights reserved.
* Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* Neither the name of ARM nor the names of its contributors may be used
* to endorse or promote products derived from this software without specific
* prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include <arch/asm.h>
#include <arch/cache_helpers.h>
/* ---------------------------------------------------------------
* Data cache operations by set/way to the level specified
*
* The main function, do_dcsw_op requires:
* x0: The operation type (0-2), as defined in cache_helpers.h
* x3: The last cache level to operate on
* x9: clidr_el1
* and will carry out the operation on each data cache from level 0
* to the level in x3 in sequence
*
* The dcsw_op macro sets up the x3 and x9 parameters based on
* clidr_el1 cache information before invoking the main function
* ---------------------------------------------------------------
*/
.macro dcsw_op shift, fw, ls
mrs x9, clidr_el1
ubfx x3, x9, \shift, \fw
lsl x3, x3, \ls
b do_dcsw_op
.endm
ENTRY(do_dcsw_op)
cbz x3, exit
mov x10, xzr
adr x14, dcsw_loop_table // compute inner loop address
add x14, x14, x0, lsl #5 // inner loop is 8x32-bit instructions
mov x0, x9
mov w8, #1
loop1:
add x2, x10, x10, lsr #1 // work out 3x current cache level
lsr x1, x0, x2 // extract cache type bits from clidr
and x1, x1, #7 // mask the bits for current cache only
cmp x1, #2 // see what cache we have at this level
b.lt level_done // nothing to do if no cache or icache
msr csselr_el1, x10 // select current cache level in csselr
isb // isb to sych the new cssr&csidr
mrs x1, ccsidr_el1 // read the new ccsidr
and x2, x1, #7 // extract the length of the cache lines
add x2, x2, #4 // add 4 (line length offset)
ubfx x4, x1, #3, #10 // maximum way number
clz w5, w4 // bit position of way size increment
lsl w9, w4, w5 // w9 = aligned max way number
lsl w16, w8, w5 // w16 = way number loop decrement
orr w9, w10, w9 // w9 = combine way and cache number
ubfx w6, w1, #13, #15 // w6 = max set number
lsl w17, w8, w2 // w17 = set number loop decrement
dsb sy // barrier before we start this level
br x14 // jump to DC operation specific loop
level_done:
add x10, x10, #2 // increment cache number
cmp x3, x10
b.gt loop1
msr csselr_el1, xzr // select cache level 0 in csselr
dsb sy // barrier to complete final cache operation
isb
exit:
ret
ENDPROC(do_dcsw_op)
.macro dcsw_loop _op
loop2_\_op:
lsl w7, w6, w2 // w7 = aligned max set number
loop3_\_op:
orr w11, w9, w7 // combine cache, way and set number
dc \_op, x11
subs w7, w7, w17 // decrement set number
b.ge loop3_\_op
subs x9, x9, x16 // decrement way number
b.ge loop2_\_op
b level_done
.endm
dcsw_loop_table:
dcsw_loop isw
dcsw_loop cisw
dcsw_loop csw
ENTRY(flush_dcache_louis)
dcsw_op #LOUIS_SHIFT, #CLIDR_FIELD_WIDTH, #LEVEL_SHIFT
ENDPROC(flush_dcache_louis)
ENTRY(flush_dcache_all)
dcsw_op #LOC_SHIFT, #CLIDR_FIELD_WIDTH, #LEVEL_SHIFT
ENDPROC(flush_dcache_all)

View File

@ -1,8 +1,8 @@
/* /*
* Based on arch/arm/include/asm/cacheflush.h * Optimized assembly for low-level CPU operations on ARM64 processors.
* *
* Copyright (C) 1999-2002 Russell King. * Copyright (c) 2010 Per Odlund <per.odlund@armagedon.se>
* Copyright (C) 2012 ARM Ltd. * Copyright (c) 2014 Google Inc.
* *
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as * it under the terms of the GNU General Public License version 2 as
@ -15,11 +15,77 @@
*/ */
#include <arch/asm.h> #include <arch/asm.h>
#include <arch/cache_helpers.h>
.macro dcache_apply_all crm
dsb sy
mrs x0, clidr_el1 // read CLIDR
and w3, w0, #0x07000000 // narrow to LoC
lsr w3, w3, #23 // left align LoC (low 4 bits)
cbz w3, 5f //done
mov w10, #0 // w10 = 2 * cache level
mov w8, #1 // w8 = constant 0b1
1: //next_level
add w2, w10, w10, lsr #1 // calculate 3 * cache level
lsr w1, w0, w2 // extract 3-bit cache type for this level
and w1, w1, #0x7 // w1 = cache type
cmp w1, #2 // is it data or i&d?
b.lt 4f //skip
msr csselr_el1, x10 // select current cache level
isb // sync change of csselr
mrs x1, ccsidr_el1 // w1 = read ccsidr
and w2, w1, #7 // w2 = log2(linelen_bytes) - 4
add w2, w2, #4 // w2 = log2(linelen_bytes)
ubfx w4, w1, #3, #10 // w4 = associativity - 1 (also
// max way number)
clz w5, w4 // w5 = 32 - log2(ways)
// (bit position of way in DC)
lsl w9, w4, w5 // w9 = max way number
// (aligned for DC)
lsl w16, w8, w5 // w16 = amount to decrement (way
// number per iteration)
2: //next_way
ubfx w7, w1, #13, #15 // w7 = max set #, right aligned
lsl w7, w7, w2 // w7 = max set #, DC aligned
lsl w17, w8, w2 // w17 = amount to decrement (set
// number per iteration)
3: //next_set
orr w11, w10, w9 // w11 = combine way # & cache #
orr w11, w11, w7 // ... and set #
dc \crm, x11 // clean and/or invalidate line
subs w7, w7, w17 // decrement set number
b.ge 3b //next_set
subs x9, x9, x16 // decrement way number
b.ge 2b //next_way
4: //skip
add w10, w10, #2 // increment 2 *cache level
cmp w3, w10 // Went beyond LoC?
b.gt 1b //next_level
5: //done
dsb sy
isb
ret
.endm
ENTRY(dcache_invalidate_all)
dcache_apply_all crm=isw
ENDPROC(dcache_invalidate_all)
ENTRY(dcache_clean_all)
dcache_apply_all crm=csw
ENDPROC(dcache_clean_all)
ENTRY(dcache_clean_invalidate_all)
dcache_apply_all crm=cisw
ENDPROC(dcache_clean_invalidate_all)
/* /*
* Bring an ARMv8 processor we just gained control of (e.g. from IROM) into a * Bring an ARMv8 processor we just gained control of (e.g. from IROM) into a
* known state regarding caches/SCTLR/PSTATE. Completely cleans and invalidates * known state regarding caches/SCTLR/PSTATE. Completely invalidates
* icache/dcache, disables MMU and dcache (if active), and enables unaligned * icache/dcache, disables MMU and dcache (if active), and enables unaligned
* accesses, icache and branch prediction (if inactive). Seeds the stack and * accesses, icache and branch prediction (if inactive). Seeds the stack and
* initializes SP_EL0. Clobbers R22 and R23. * initializes SP_EL0. Clobbers R22 and R23.
@ -41,9 +107,8 @@ ENTRY(arm64_init_cpu)
msr sctlr_el3, x22 msr sctlr_el3, x22
isb isb
/* Flush and invalidate dcache */ /* Invalidate dcache */
mov x0, #DCCISW bl dcache_invalidate_all
bl flush_dcache_all
/* Deactivate MMU (0), Alignment Check (1) and DCache (2) */ /* Deactivate MMU (0), Alignment Check (1) and DCache (2) */
and x22, x22, # ~(1 << 0) & ~(1 << 1) & ~(1 << 2) and x22, x22, # ~(1 << 0) & ~(1 << 1) & ~(1 << 2)

View File

@ -37,7 +37,6 @@
#include <arch/mmu.h> #include <arch/mmu.h>
#include <arch/lib_helpers.h> #include <arch/lib_helpers.h>
#include <arch/cache.h> #include <arch/cache.h>
#include <arch/cache_helpers.h>
/* This just caches the next free table slot (okay to do since they fill up from /* This just caches the next free table slot (okay to do since they fill up from
* bottom to top and can never be freed up again). It will reset to its initial * bottom to top and can never be freed up again). It will reset to its initial
@ -295,7 +294,7 @@ void mmu_enable(void)
*/ */
void mmu_disable(void) void mmu_disable(void)
{ {
flush_dcache_all(DCCISW); dcache_clean_invalidate_all();
uint32_t sctlr = raw_read_sctlr_el3(); uint32_t sctlr = raw_read_sctlr_el3();
sctlr &= ~(SCTLR_C | SCTLR_M); sctlr &= ~(SCTLR_C | SCTLR_M);
raw_write_sctlr_el3(sctlr); raw_write_sctlr_el3(sctlr);

View File

@ -1,47 +0,0 @@
/*
* Copyright (c) 2013-2014, ARM Limited and Contributors. All rights reserved.
* Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* Neither the name of ARM nor the names of its contributors may be used
* to endorse or promote products derived from this software without specific
* prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __CACHE_HELPERS_H
/* CLIDR definitions */
#define LOUIS_SHIFT 21
#define LOC_SHIFT 24
#define CLIDR_FIELD_WIDTH 3
/* CSSELR definitions */
#define LEVEL_SHIFT 1
/* D$ set/way op type defines */
#define DCISW 0x0
#define DCCISW 0x1
#define DCCSW 0x2
#endif /* __CACHE_HELPERS_H */

View File

@ -67,11 +67,10 @@ void dcache_clean_invalidate_by_mva(void const *addr, size_t len);
/* dcache invalidate by virtual address to PoC */ /* dcache invalidate by virtual address to PoC */
void dcache_invalidate_by_mva(void const *addr, size_t len); void dcache_invalidate_by_mva(void const *addr, size_t len);
/* dcache invalidate all */ /* dcache clean and/or invalidate all sets/ways to PoC */
void flush_dcache_all(int op_type); void dcache_clean_all(void);
void dcache_invalidate_all(void);
/* flush the dcache up to the Level of Unification Inner Shareable */ void dcache_clean_invalidate_all(void);
void flush_dcache_louis(int op_type);
/* returns number of bytes per cache line */ /* returns number of bytes per cache line */
unsigned int dcache_line_bytes(void); unsigned int dcache_line_bytes(void);