From f574a327eed82ce00ea94d3f904f3dd8001d240c Mon Sep 17 00:00:00 2001 From: Daisuke Nojiri Date: Thu, 27 Feb 2014 14:56:39 -0800 Subject: [PATCH] ARM: Use LPAE for Virtual Address Translation This change introduces LPAE for virtual address translation. To enable it, set ARM_LPAE. Boot slows down about 4ms on Tegra124 with LPAE enabled. TEST=Booted nyan with and without LPAE. Built nyan_big and daisy. BUG=None BRANCH=none Signed-off-by: Daisuke Nojiri Tested-by: Daisuke Nojiri Original-Change-Id: I74aa729b6fe6d243f57123dc792302359c661cad Original-Reviewed-on: https://chromium-review.googlesource.com/187862 Original-Reviewed-by: Julius Werner Original-Commit-Queue: Daisuke Nojiri Original-Tested-by: Daisuke Nojiri (cherry picked from commit 6d8c8b2bbdc70555076081eb3bfaabde7b4a398f) Signed-off-by: Marc Jones Change-Id: I8980375c14758af35f7d5ec5244be963e5462d8a Reviewed-on: http://review.coreboot.org/7749 Reviewed-by: Stefan Reinauer Tested-by: build bot (Jenkins) --- src/arch/arm/Kconfig | 6 +- src/arch/arm/armv7/mmu.c | 222 +++++++++++++++++------- src/arch/arm/include/armv7/arch/cache.h | 24 +++ src/soc/nvidia/tegra124/Kconfig | 17 +- 4 files changed, 195 insertions(+), 74 deletions(-) diff --git a/src/arch/arm/Kconfig b/src/arch/arm/Kconfig index 57849a64fc..e946f599f3 100644 --- a/src/arch/arm/Kconfig +++ b/src/arch/arm/Kconfig @@ -30,4 +30,8 @@ config CPU_HAS_BOOTBLOCK_INIT config MAINBOARD_HAS_BOOTBLOCK_INIT bool - default n \ No newline at end of file + default n + +config ARM_LPAE + bool "Enable LPAE" + default n diff --git a/src/arch/arm/armv7/mmu.c b/src/arch/arm/armv7/mmu.c index d71003057a..1b5957c14b 100644 --- a/src/arch/arm/armv7/mmu.c +++ b/src/arch/arm/armv7/mmu.c @@ -27,6 +27,7 @@ * SUCH DAMAGE. */ +#include #include #include #include @@ -37,32 +38,29 @@ #include #include -static void *const ttb_buff = (void *)CONFIG_TTB_BUFFER; +#if CONFIG_ARM_LPAE +/* See B3.6.2 of ARMv7 Architecture Reference Manual */ +/* TODO: Utilize the contiguous hint flag */ +#define ATTR_BASE (\ + 0ULL << 54 | /* PN. 0:Not restricted */ \ + 0ULL << 53 | /* PXN. 0:Not restricted */ \ + 1 << 10 | /* AF. 1:Accessed. This is to prevent access \ + * fault when accessed for the first time */ \ + 0 << 6 | /* AP[2:1]. 0b00:full access from PL1 */ \ + 0 << 5 | /* NS. 0:Output address is in Secure space */ \ + 0 << 1 | /* block/table. 0:block entry */ \ + 1 << 0 /* validity. 1:valid */ \ + ) +#define ATTR_NC (ATTR_BASE | (MAIR_INDX_NC << 2)) +#define ATTR_WT (ATTR_BASE | (MAIR_INDX_WT << 2)) +#define ATTR_WB (ATTR_BASE | (MAIR_INDX_WB << 2)) -void mmu_disable_range(unsigned long start_mb, unsigned long size_mb) -{ - unsigned int i; - uint32_t *ttb_entry = ttb_buff; - printk(BIOS_DEBUG, "Disabling: 0x%08lx:0x%08lx\n", - start_mb*MiB, start_mb*MiB + size_mb*MiB - 1); - - for (i = start_mb; i < start_mb + size_mb; i++) - writel(0, &ttb_entry[i]); - - for (i = start_mb; i < start_mb + size_mb; i++) { - dccmvac((uintptr_t)&ttb_entry[i]); - tlbimvaa(i*MiB); - } -} - -void mmu_config_range(unsigned long start_mb, unsigned long size_mb, - enum dcache_policy policy) -{ - unsigned int i; - uint32_t attr; - uint32_t *ttb_entry = ttb_buff; - const char *str = NULL; +#define BLOCK_SHIFT 21 +typedef uint64_t pgd_t; +typedef uint64_t pmd_t; +static const unsigned int denom = 2; +#else /* CONFIG_ARM_LPAE */ /* * Section entry bits: * 31:20 - section base address @@ -79,19 +77,83 @@ void mmu_config_range(unsigned long start_mb, unsigned long size_mb, * 2 - B, 1 for bufferable * 1: 0 - 0b10 to indicate section entry */ +#define ATTR_BASE ((3 << 10) | 0x2) +#define ATTR_NC (ATTR_BASE | (1 << 4)) +#define ATTR_WT (ATTR_BASE | (1 << 3)) +#define ATTR_WB (ATTR_BASE | (1 << 3) | (1 << 2)) + +#define BLOCK_SHIFT 20 + +typedef uint32_t pgd_t; +typedef uint32_t pmd_t; +static const unsigned int denom = 1; +#endif /* CONFIG_ARM_LPAE */ + +static pmd_t *const ttb_buff = (pmd_t *)CONFIG_TTB_BUFFER; + +/* + * mask/shift/size for pages and blocks + */ +#define PAGE_SHIFT 12 +#define PAGE_SIZE (1UL << PAGE_SHIFT) +#define PAGE_MASK ~((1UL << PAGE_SHIFT) - 1) +#define BLOCK_SIZE (1UL << BLOCK_SHIFT) + +/* + * MAIR Index + */ +#define MAIR_INDX_NC 0 +#define MAIR_INDX_WT 1 +#define MAIR_INDX_WB 2 + +static void mmu_flush_page_table_entry_range( + unsigned long start_mb, unsigned long size_mb) +{ + int i; + + /* Flush the page table entries from the dcache. */ + for (i = start_mb/denom; i*denom < start_mb + size_mb; i++) + dccmvac((uintptr_t)&ttb_buff[i]); + dsb(); + /* Invalidate the TLB entries. */ + for (i = start_mb/denom; i*denom < start_mb + size_mb; i++) + tlbimvaa(i*denom*MiB); + dsb(); + isb(); +} + +void mmu_disable_range(unsigned long start_mb, unsigned long size_mb) +{ + int i; + + printk(BIOS_DEBUG, "Disabling: [0x%08lx:0x%08lx)\n", + start_mb*MiB, start_mb*MiB + size_mb*MiB); + + for (i = start_mb/denom; i*denom < start_mb + size_mb; i++) + ttb_buff[i] = 0; + + mmu_flush_page_table_entry_range(start_mb, size_mb); +} + +void mmu_config_range(unsigned long start_mb, unsigned long size_mb, + enum dcache_policy policy) +{ + const char *str = NULL; + pmd_t attr; + int i; switch(policy) { case DCACHE_OFF: /* XN set to avoid prefetches to uncached/unbuffered regions */ - attr = (0x3 << 10) | (1 << 4) | 0x2; + attr = ATTR_NC; str = "off"; break; case DCACHE_WRITEBACK: - attr = (0x3 << 10) | (1 << 3) | (1 << 2) | 0x2; + attr = ATTR_WB; str = "writeback"; break; case DCACHE_WRITETHROUGH: - attr = (0x3 << 10) | (1 << 3) | 0x2; + attr = ATTR_WT; str = "writethrough"; break; default: @@ -99,52 +161,86 @@ void mmu_config_range(unsigned long start_mb, unsigned long size_mb, return; } - printk(BIOS_DEBUG, "Setting dcache policy: 0x%08lx:0x%08lx [%s]\n", - start_mb << 20, ((start_mb + size_mb) << 20) - 1, str); + printk(BIOS_DEBUG, "Setting dcache policy: [0x%08lx:0x%08lx) [%s]\n", + start_mb << 20, ((start_mb + size_mb) << 20), str); /* Write out page table entries. */ - for (i = start_mb; i < start_mb + size_mb; i++) - writel((i << 20) | attr, &ttb_entry[i]); + for (i = start_mb/denom; i*denom < start_mb + size_mb; i++) + ttb_buff[i] = ((pmd_t)i << BLOCK_SHIFT) | attr; - /* Flush the page table entries from the dcache. */ - for (i = start_mb; i < start_mb + size_mb; i++) - dccmvac((uintptr_t)&ttb_entry[i]); - - dsb(); - - /* Invalidate the TLB entries. */ - for (i = start_mb; i < start_mb + size_mb; i++) - tlbimvaa(i*MiB); - - dsb(); - isb(); + mmu_flush_page_table_entry_range(start_mb, size_mb); } +/* + * For coreboot's purposes, we will create a simple identity map. + * + * If LPAE is disabled, we will create a L1 page + * table in RAM with 1MB section translation entries over the 4GB address space. + * (ref: section 10.2 and example 15-4 in Cortex-A series programmer's guide) + * + * If LPAE is enabled, we do two level translation with one L1 table with 4 + * entries, each covering a 1GB space, and four L2 tables with 512 entries, each + * covering a 2MB space. + */ void mmu_init(void) { - /* - * For coreboot's purposes, we will create a simple L1 page table - * in RAM with 1MB section translation entries over the 4GB address - * space. - * (ref: section 10.2 and example 15-4 in Cortex-A series - * programmer's guide) - */ - printk(BIOS_DEBUG, "Translation table is @ %p\n", ttb_buff); + if (CONFIG_ARM_LPAE) { + pgd_t *const pgd_buff = (pgd_t*)(CONFIG_TTB_BUFFER + 16*KiB); + pmd_t *pmd = ttb_buff; + int i; + + printk(BIOS_DEBUG, "LPAE Translation tables are @ %p\n", + ttb_buff); + ASSERT((read_mmfr0() & 0xf) >= 5); + + /* + * Set MAIR + * See B4.1.104 of ARMv7 Architecture Reference Manual + */ + write_mair0( + 0x00 << (MAIR_INDX_NC*8) | /* Strongly-ordered, + * Non-Cacheable */ + 0xaa << (MAIR_INDX_WT*8) | /* Write-Thru, + * Read-Allocate */ + 0xff << (MAIR_INDX_WB*8) /* Write-Back, + * Read/Write-Allocate */ + ); + + /* + * Set up L1 table + * Once set here, L1 table won't be modified by coreboot. + * See B3.6.1 of ARMv7 Architecture Reference Manual + */ + for (i = 0; i < 4; i++) { + pgd_buff[i] = ((uint32_t)pmd & PAGE_MASK) | + 3; /* 0b11: valid table entry */ + pmd += BLOCK_SIZE / PAGE_SIZE; + } + + /* + * Set TTBR0 + */ + write_ttbr0((uintptr_t)pgd_buff); + } else { + printk(BIOS_DEBUG, "Translation table is @ %p\n", ttb_buff); + + /* + * Translation table base 0 address is in bits 31:14-N, where N + * is given by bits 2:0 in TTBCR (which we set to 0). All lower + * bits in this register should be zero for coreboot. + */ + write_ttbr0((uintptr_t)ttb_buff); + } /* - * Disable TTBR1 by setting TTBCR.N to 0b000, which means the TTBR0 - * table size is 16KB and has indices VA[31:20]. - * - * ref: Arch Ref. Manual for ARMv7-A, B3.5.4, + * Set TTBCR + * See B4.1.153 of ARMv7 Architecture Reference Manual + * See B3.5.4 and B3.6.4 for how TTBR0 or TTBR1 is selected. */ - write_ttbcr(read_ttbcr() & ~0x3); - - /* - * Translation table base 0 address is in bits 31:14-N, where N is given - * by bits 2:0 in TTBCR (which we set to 0). All lower bits in this - * register should be zero for coreboot. - */ - write_ttbr0((uintptr_t)ttb_buff); + write_ttbcr( + CONFIG_ARM_LPAE << 31 | /* EAE. 1:Enable LPAE */ + 0 << 16 | 0 << 0 /* Use TTBR0 for all addresses */ + ); /* disable domain-level checking of permissions */ write_dacr(~0); diff --git a/src/arch/arm/include/armv7/arch/cache.h b/src/arch/arm/include/armv7/arch/cache.h index 470eb55108..dde2c08c1d 100644 --- a/src/arch/arm/include/armv7/arch/cache.h +++ b/src/arch/arm/include/armv7/arch/cache.h @@ -111,10 +111,34 @@ static inline void write_dacr(uint32_t val) asm volatile ("mcr p15, 0, %0, c3, c0, 0" : : "r" (val)); } +/* read memory model feature register 0 (MMFR0) */ +static inline uint32_t read_mmfr0(void) +{ + uint32_t mmfr; + asm volatile ("mrc p15, 0, %0, c0, c1, 4" : "=r" (mmfr)); + return mmfr; +} +/* read MAIR0 (memory address indirection register 0) */ +static inline uint32_t read_mair0(void) +{ + uint32_t mair; + asm volatile ("mrc p15, 0, %0, c10, c2, 0" : "=r" (mair)); + return mair; +} +/* write MAIR0 (memory address indirection register 0) */ +static inline void write_mair0(uint32_t val) +{ + asm volatile ("mcr p15, 0, %0, c10, c2, 0" : : "r" (val)); +} /* write translation table base register 0 (TTBR0) */ static inline void write_ttbr0(uint32_t val) { +#if CONFIG_ARM_LPAE + asm volatile ("mcrr p15, 0, %[val], %[zero], c2" : : + [val] "r" (val), [zero] "r" (0)); +#else asm volatile ("mcr p15, 0, %0, c2, c0, 0" : : "r" (val) : "memory"); +#endif } /* read translation table base control register (TTBCR) */ diff --git a/src/soc/nvidia/tegra124/Kconfig b/src/soc/nvidia/tegra124/Kconfig index 7862dd5b7b..195261e2e4 100644 --- a/src/soc/nvidia/tegra124/Kconfig +++ b/src/soc/nvidia/tegra124/Kconfig @@ -1,4 +1,6 @@ config SOC_NVIDIA_TEGRA124 + bool + default n select ARCH_BOOTBLOCK_ARMV4 select ARCH_ROMSTAGE_ARMV7 select ARCH_RAMSTAGE_ARMV7 @@ -6,8 +8,8 @@ config SOC_NVIDIA_TEGRA124 select BOOTBLOCK_CONSOLE select DYNAMIC_CBMEM select ARM_BOOTBLOCK_CUSTOM - bool - default n + select ARM_LPAE + if SOC_NVIDIA_TEGRA124 @@ -31,8 +33,7 @@ config BOOTBLOCK_CPU_INIT # handoff that area may be reclaimed for other uses, e.g. CBFS cache.) # # 0x4000_0000 TTB (16K+32B). 32B is for L1 table of LPAE. -# 0x4000_4020 CBMEM console area (8K-32B) -# 0x4000_6000 CBFS mapping cache (88K) +# 0x4000_4020 CBFS mapping cache (96K-32B) # 0x4001_C000 Stack (16KB... don't reduce without comparing LZMA scratchpad!). # 0x4002_0000 Bootblock (max 48KB). # 0x4002_C000 ROM stage (max 80KB). @@ -86,15 +87,11 @@ config TTB_BUFFER config CBFS_CACHE_ADDRESS hex "memory address to put CBFS cache data" - default 0x40006000 + default 0x40004020 config CBFS_CACHE_SIZE hex "size of CBFS cache data" - default 0x00016000 - -config CBMEM_CONSOLE_PRERAM_BASE - hex "memory address of the CBMEM console buffer" - default 0x40004020 + default 0x00017fe0 config TEGRA124_MODEL_TD570D bool "TD570D"