diff --git a/payloads/libpayload/arch/arm64/cache.c b/payloads/libpayload/arch/arm64/cache.c index 0755c56731..2d42522588 100644 --- a/payloads/libpayload/arch/arm64/cache.c +++ b/payloads/libpayload/arch/arm64/cache.c @@ -119,7 +119,11 @@ void dcache_invalidate_by_mva(void const *addr, size_t len) void cache_sync_instructions(void) { - dcache_clean_all(); /* includes trailing DSB (in assembly) */ + uint32_t sctlr = raw_read_sctlr_current(); + if (sctlr & SCTLR_C) + dcache_clean_all(); /* includes trailing DSB (assembly) */ + else if (sctlr & SCTLR_I) + dcache_clean_invalidate_all(); icache_invalidate_all(); /* includes leading DSB and trailing ISB */ } diff --git a/src/arch/arm64/armv8/Makefile.inc b/src/arch/arm64/armv8/Makefile.inc index a7a6b19f9f..21ebf70bfb 100644 --- a/src/arch/arm64/armv8/Makefile.inc +++ b/src/arch/arm64/armv8/Makefile.inc @@ -28,7 +28,6 @@ ifneq ($(CONFIG_BOOTBLOCK_CUSTOM),y) bootblock-y += bootblock.S endif bootblock-y += cache.c -bootblock-y += cache_helpers.S bootblock-y += cpu.S bootblock-y += mmu.c @@ -50,7 +49,6 @@ ifeq ($(CONFIG_ARCH_VERSTAGE_ARMV8_64),y) verstage-y += cache.c verstage-y += cpu.S -verstage-y += cache_helpers.S verstage-y += exception.c verstage-generic-ccopts += $(armv8_flags) @@ -63,7 +61,6 @@ endif ifeq ($(CONFIG_ARCH_ROMSTAGE_ARMV8_64),y) romstage-y += cache.c -romstage-y += cache_helpers.S romstage-y += cpu.S romstage-y += exception.c romstage-y += mmu.c @@ -80,7 +77,6 @@ endif ifeq ($(CONFIG_ARCH_RAMSTAGE_ARMV8_64),y) ramstage-y += cache.c -ramstage-y += cache_helpers.S ramstage-y += cpu.S ramstage-y += exception.c ramstage-y += mmu.c diff --git a/src/arch/arm64/armv8/cache.c b/src/arch/arm64/armv8/cache.c index 4f91de02f2..4b99cd7830 100644 --- a/src/arch/arm64/armv8/cache.c +++ b/src/arch/arm64/armv8/cache.c @@ -34,7 +34,6 @@ #include #include -#include #include #include @@ -121,7 +120,11 @@ void dcache_invalidate_by_mva(void const *addr, size_t len) void cache_sync_instructions(void) { - flush_dcache_all(DCCISW); /* includes trailing DSB (in assembly) */ + uint32_t sctlr = raw_read_sctlr_current(); + if (sctlr & SCTLR_C) + dcache_clean_all(); /* includes trailing DSB (assembly) */ + else if (sctlr & SCTLR_I) + dcache_clean_invalidate_all(); icache_invalidate_all(); /* includdes leading DSB and trailing ISB. */ } @@ -131,6 +134,10 @@ void cache_sync_instructions(void) */ void arch_segment_loaded(uintptr_t start, size_t size, int flags) { - dcache_clean_invalidate_by_mva((void *)start, size); + uint32_t sctlr = raw_read_sctlr_current(); + if (sctlr & SCTLR_C) + dcache_clean_by_mva((void *)start, size); + else if (sctlr & SCTLR_I) + dcache_clean_invalidate_by_mva((void *)start, size); icache_invalidate_all(); } diff --git a/src/arch/arm64/armv8/cache_helpers.S b/src/arch/arm64/armv8/cache_helpers.S deleted file mode 100644 index b94bc30781..0000000000 --- a/src/arch/arm64/armv8/cache_helpers.S +++ /dev/null @@ -1,124 +0,0 @@ -/* - * Copyright (c) 2013-2014, ARM Limited and Contributors. All rights reserved. - * Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * Neither the name of ARM nor the names of its contributors may be used - * to endorse or promote products derived from this software without specific - * prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include - - /* --------------------------------------------------------------- - * Data cache operations by set/way to the level specified - * - * The main function, do_dcsw_op requires: - * x0: The operation type (0-2), as defined in cache_helpers.h - * x3: The last cache level to operate on - * x9: clidr_el1 - * and will carry out the operation on each data cache from level 0 - * to the level in x3 in sequence - * - * The dcsw_op macro sets up the x3 and x9 parameters based on - * clidr_el1 cache information before invoking the main function - * --------------------------------------------------------------- - */ - -.macro dcsw_op shift, fw, ls - mrs x9, clidr_el1 - ubfx x3, x9, \shift, \fw - lsl x3, x3, \ls - b do_dcsw_op -.endm - -ENTRY(do_dcsw_op) - cbz x3, exit - mov x10, xzr - adr x14, dcsw_loop_table // compute inner loop address - add x14, x14, x0, lsl #5 // inner loop is 8x32-bit instructions - mov x0, x9 - mov w8, #1 -loop1: - add x2, x10, x10, lsr #1 // work out 3x current cache level - lsr x1, x0, x2 // extract cache type bits from clidr - and x1, x1, #7 // mask the bits for current cache only - cmp x1, #2 // see what cache we have at this level - b.lt level_done // nothing to do if no cache or icache - - msr csselr_el1, x10 // select current cache level in csselr - isb // isb to sych the new cssr&csidr - mrs x1, ccsidr_el1 // read the new ccsidr - and x2, x1, #7 // extract the length of the cache lines - add x2, x2, #4 // add 4 (line length offset) - ubfx x4, x1, #3, #10 // maximum way number - clz w5, w4 // bit position of way size increment - lsl w9, w4, w5 // w9 = aligned max way number - lsl w16, w8, w5 // w16 = way number loop decrement - orr w9, w10, w9 // w9 = combine way and cache number - ubfx w6, w1, #13, #15 // w6 = max set number - lsl w17, w8, w2 // w17 = set number loop decrement - dsb sy // barrier before we start this level - br x14 // jump to DC operation specific loop - -level_done: - add x10, x10, #2 // increment cache number - cmp x3, x10 - b.gt loop1 - msr csselr_el1, xzr // select cache level 0 in csselr - dsb sy // barrier to complete final cache operation - isb -exit: - ret -ENDPROC(do_dcsw_op) - -.macro dcsw_loop _op -loop2_\_op: - lsl w7, w6, w2 // w7 = aligned max set number - -loop3_\_op: - orr w11, w9, w7 // combine cache, way and set number - dc \_op, x11 - subs w7, w7, w17 // decrement set number - b.ge loop3_\_op - - subs x9, x9, x16 // decrement way number - b.ge loop2_\_op - - b level_done -.endm - -dcsw_loop_table: - dcsw_loop isw - dcsw_loop cisw - dcsw_loop csw - -ENTRY(flush_dcache_louis) - dcsw_op #LOUIS_SHIFT, #CLIDR_FIELD_WIDTH, #LEVEL_SHIFT -ENDPROC(flush_dcache_louis) - -ENTRY(flush_dcache_all) - dcsw_op #LOC_SHIFT, #CLIDR_FIELD_WIDTH, #LEVEL_SHIFT -ENDPROC(flush_dcache_all) diff --git a/src/arch/arm64/armv8/cpu.S b/src/arch/arm64/armv8/cpu.S index 711c338685..1bb8c838ad 100644 --- a/src/arch/arm64/armv8/cpu.S +++ b/src/arch/arm64/armv8/cpu.S @@ -1,8 +1,8 @@ /* - * Based on arch/arm/include/asm/cacheflush.h + * Optimized assembly for low-level CPU operations on ARM64 processors. * - * Copyright (C) 1999-2002 Russell King. - * Copyright (C) 2012 ARM Ltd. + * Copyright (c) 2010 Per Odlund + * Copyright (c) 2014 Google Inc. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -15,11 +15,77 @@ */ #include -#include + +.macro dcache_apply_all crm + dsb sy + mrs x0, clidr_el1 // read CLIDR + and w3, w0, #0x07000000 // narrow to LoC + lsr w3, w3, #23 // left align LoC (low 4 bits) + cbz w3, 5f //done + + mov w10, #0 // w10 = 2 * cache level + mov w8, #1 // w8 = constant 0b1 + +1: //next_level + add w2, w10, w10, lsr #1 // calculate 3 * cache level + lsr w1, w0, w2 // extract 3-bit cache type for this level + and w1, w1, #0x7 // w1 = cache type + cmp w1, #2 // is it data or i&d? + b.lt 4f //skip + msr csselr_el1, x10 // select current cache level + isb // sync change of csselr + mrs x1, ccsidr_el1 // w1 = read ccsidr + and w2, w1, #7 // w2 = log2(linelen_bytes) - 4 + add w2, w2, #4 // w2 = log2(linelen_bytes) + ubfx w4, w1, #3, #10 // w4 = associativity - 1 (also + // max way number) + clz w5, w4 // w5 = 32 - log2(ways) + // (bit position of way in DC) + lsl w9, w4, w5 // w9 = max way number + // (aligned for DC) + lsl w16, w8, w5 // w16 = amount to decrement (way + // number per iteration) +2: //next_way + ubfx w7, w1, #13, #15 // w7 = max set #, right aligned + lsl w7, w7, w2 // w7 = max set #, DC aligned + lsl w17, w8, w2 // w17 = amount to decrement (set + // number per iteration) + +3: //next_set + orr w11, w10, w9 // w11 = combine way # & cache # + orr w11, w11, w7 // ... and set # + dc \crm, x11 // clean and/or invalidate line + subs w7, w7, w17 // decrement set number + b.ge 3b //next_set + subs x9, x9, x16 // decrement way number + b.ge 2b //next_way + +4: //skip + add w10, w10, #2 // increment 2 *cache level + cmp w3, w10 // Went beyond LoC? + b.gt 1b //next_level + +5: //done + dsb sy + isb + ret +.endm + +ENTRY(dcache_invalidate_all) + dcache_apply_all crm=isw +ENDPROC(dcache_invalidate_all) + +ENTRY(dcache_clean_all) + dcache_apply_all crm=csw +ENDPROC(dcache_clean_all) + +ENTRY(dcache_clean_invalidate_all) + dcache_apply_all crm=cisw +ENDPROC(dcache_clean_invalidate_all) /* * Bring an ARMv8 processor we just gained control of (e.g. from IROM) into a - * known state regarding caches/SCTLR/PSTATE. Completely cleans and invalidates + * known state regarding caches/SCTLR/PSTATE. Completely invalidates * icache/dcache, disables MMU and dcache (if active), and enables unaligned * accesses, icache and branch prediction (if inactive). Seeds the stack and * initializes SP_EL0. Clobbers R22 and R23. @@ -41,9 +107,8 @@ ENTRY(arm64_init_cpu) msr sctlr_el3, x22 isb - /* Flush and invalidate dcache */ - mov x0, #DCCISW - bl flush_dcache_all + /* Invalidate dcache */ + bl dcache_invalidate_all /* Deactivate MMU (0), Alignment Check (1) and DCache (2) */ and x22, x22, # ~(1 << 0) & ~(1 << 1) & ~(1 << 2) diff --git a/src/arch/arm64/armv8/mmu.c b/src/arch/arm64/armv8/mmu.c index 9280fc2d7c..55bd703d44 100644 --- a/src/arch/arm64/armv8/mmu.c +++ b/src/arch/arm64/armv8/mmu.c @@ -37,7 +37,6 @@ #include #include #include -#include /* This just caches the next free table slot (okay to do since they fill up from * bottom to top and can never be freed up again). It will reset to its initial @@ -295,7 +294,7 @@ void mmu_enable(void) */ void mmu_disable(void) { - flush_dcache_all(DCCISW); + dcache_clean_invalidate_all(); uint32_t sctlr = raw_read_sctlr_el3(); sctlr &= ~(SCTLR_C | SCTLR_M); raw_write_sctlr_el3(sctlr); diff --git a/src/arch/arm64/include/arch/cache_helpers.h b/src/arch/arm64/include/arch/cache_helpers.h deleted file mode 100644 index 2919d99492..0000000000 --- a/src/arch/arm64/include/arch/cache_helpers.h +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Copyright (c) 2013-2014, ARM Limited and Contributors. All rights reserved. - * Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * Neither the name of ARM nor the names of its contributors may be used - * to endorse or promote products derived from this software without specific - * prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __CACHE_HELPERS_H - -/* CLIDR definitions */ -#define LOUIS_SHIFT 21 -#define LOC_SHIFT 24 -#define CLIDR_FIELD_WIDTH 3 - -/* CSSELR definitions */ -#define LEVEL_SHIFT 1 - -/* D$ set/way op type defines */ -#define DCISW 0x0 -#define DCCISW 0x1 -#define DCCSW 0x2 - -#endif /* __CACHE_HELPERS_H */ diff --git a/src/arch/arm64/include/armv8/arch/cache.h b/src/arch/arm64/include/armv8/arch/cache.h index 84f051d533..3647290866 100644 --- a/src/arch/arm64/include/armv8/arch/cache.h +++ b/src/arch/arm64/include/armv8/arch/cache.h @@ -67,11 +67,10 @@ void dcache_clean_invalidate_by_mva(void const *addr, size_t len); /* dcache invalidate by virtual address to PoC */ void dcache_invalidate_by_mva(void const *addr, size_t len); -/* dcache invalidate all */ -void flush_dcache_all(int op_type); - -/* flush the dcache up to the Level of Unification Inner Shareable */ -void flush_dcache_louis(int op_type); +/* dcache clean and/or invalidate all sets/ways to PoC */ +void dcache_clean_all(void); +void dcache_invalidate_all(void); +void dcache_clean_invalidate_all(void); /* returns number of bytes per cache line */ unsigned int dcache_line_bytes(void);