ARM: Use LPAE for Virtual Address Translation

This change introduces LPAE for virtual address translation. To enable it, set
ARM_LPAE. Boot slows down about 4ms on Tegra124 with LPAE enabled.

TEST=Booted nyan with and without LPAE. Built nyan_big and daisy.
BUG=None
BRANCH=none
Signed-off-by: Daisuke Nojiri <dnojiri@chromium.org>
Tested-by: Daisuke Nojiri <dnojiri@google.com>

Original-Change-Id: I74aa729b6fe6d243f57123dc792302359c661cad
Original-Reviewed-on: https://chromium-review.googlesource.com/187862
Original-Reviewed-by: Julius Werner <jwerner@chromium.org>
Original-Commit-Queue: Daisuke Nojiri <dnojiri@chromium.org>
Original-Tested-by: Daisuke Nojiri <dnojiri@chromium.org>
(cherry picked from commit 6d8c8b2bbdc70555076081eb3bfaabde7b4a398f)
Signed-off-by: Marc Jones <marc.jones@se-eng.com>

Change-Id: I8980375c14758af35f7d5ec5244be963e5462d8a
Reviewed-on: http://review.coreboot.org/7749
Reviewed-by: Stefan Reinauer <stefan.reinauer@coreboot.org>
Tested-by: build bot (Jenkins)
This commit is contained in:
Daisuke Nojiri 2014-02-27 14:56:39 -08:00 committed by Marc Jones
parent 032c843817
commit f574a327ee
4 changed files with 195 additions and 74 deletions

View File

@ -30,4 +30,8 @@ config CPU_HAS_BOOTBLOCK_INIT
config MAINBOARD_HAS_BOOTBLOCK_INIT
bool
default n
default n
config ARM_LPAE
bool "Enable LPAE"
default n

View File

@ -27,6 +27,7 @@
* SUCH DAMAGE.
*/
#include <assert.h>
#include <config.h>
#include <stdlib.h>
#include <stdint.h>
@ -37,32 +38,29 @@
#include <arch/cache.h>
#include <arch/io.h>
static void *const ttb_buff = (void *)CONFIG_TTB_BUFFER;
#if CONFIG_ARM_LPAE
/* See B3.6.2 of ARMv7 Architecture Reference Manual */
/* TODO: Utilize the contiguous hint flag */
#define ATTR_BASE (\
0ULL << 54 | /* PN. 0:Not restricted */ \
0ULL << 53 | /* PXN. 0:Not restricted */ \
1 << 10 | /* AF. 1:Accessed. This is to prevent access \
* fault when accessed for the first time */ \
0 << 6 | /* AP[2:1]. 0b00:full access from PL1 */ \
0 << 5 | /* NS. 0:Output address is in Secure space */ \
0 << 1 | /* block/table. 0:block entry */ \
1 << 0 /* validity. 1:valid */ \
)
#define ATTR_NC (ATTR_BASE | (MAIR_INDX_NC << 2))
#define ATTR_WT (ATTR_BASE | (MAIR_INDX_WT << 2))
#define ATTR_WB (ATTR_BASE | (MAIR_INDX_WB << 2))
void mmu_disable_range(unsigned long start_mb, unsigned long size_mb)
{
unsigned int i;
uint32_t *ttb_entry = ttb_buff;
printk(BIOS_DEBUG, "Disabling: 0x%08lx:0x%08lx\n",
start_mb*MiB, start_mb*MiB + size_mb*MiB - 1);
for (i = start_mb; i < start_mb + size_mb; i++)
writel(0, &ttb_entry[i]);
for (i = start_mb; i < start_mb + size_mb; i++) {
dccmvac((uintptr_t)&ttb_entry[i]);
tlbimvaa(i*MiB);
}
}
void mmu_config_range(unsigned long start_mb, unsigned long size_mb,
enum dcache_policy policy)
{
unsigned int i;
uint32_t attr;
uint32_t *ttb_entry = ttb_buff;
const char *str = NULL;
#define BLOCK_SHIFT 21
typedef uint64_t pgd_t;
typedef uint64_t pmd_t;
static const unsigned int denom = 2;
#else /* CONFIG_ARM_LPAE */
/*
* Section entry bits:
* 31:20 - section base address
@ -79,19 +77,83 @@ void mmu_config_range(unsigned long start_mb, unsigned long size_mb,
* 2 - B, 1 for bufferable
* 1: 0 - 0b10 to indicate section entry
*/
#define ATTR_BASE ((3 << 10) | 0x2)
#define ATTR_NC (ATTR_BASE | (1 << 4))
#define ATTR_WT (ATTR_BASE | (1 << 3))
#define ATTR_WB (ATTR_BASE | (1 << 3) | (1 << 2))
#define BLOCK_SHIFT 20
typedef uint32_t pgd_t;
typedef uint32_t pmd_t;
static const unsigned int denom = 1;
#endif /* CONFIG_ARM_LPAE */
static pmd_t *const ttb_buff = (pmd_t *)CONFIG_TTB_BUFFER;
/*
* mask/shift/size for pages and blocks
*/
#define PAGE_SHIFT 12
#define PAGE_SIZE (1UL << PAGE_SHIFT)
#define PAGE_MASK ~((1UL << PAGE_SHIFT) - 1)
#define BLOCK_SIZE (1UL << BLOCK_SHIFT)
/*
* MAIR Index
*/
#define MAIR_INDX_NC 0
#define MAIR_INDX_WT 1
#define MAIR_INDX_WB 2
static void mmu_flush_page_table_entry_range(
unsigned long start_mb, unsigned long size_mb)
{
int i;
/* Flush the page table entries from the dcache. */
for (i = start_mb/denom; i*denom < start_mb + size_mb; i++)
dccmvac((uintptr_t)&ttb_buff[i]);
dsb();
/* Invalidate the TLB entries. */
for (i = start_mb/denom; i*denom < start_mb + size_mb; i++)
tlbimvaa(i*denom*MiB);
dsb();
isb();
}
void mmu_disable_range(unsigned long start_mb, unsigned long size_mb)
{
int i;
printk(BIOS_DEBUG, "Disabling: [0x%08lx:0x%08lx)\n",
start_mb*MiB, start_mb*MiB + size_mb*MiB);
for (i = start_mb/denom; i*denom < start_mb + size_mb; i++)
ttb_buff[i] = 0;
mmu_flush_page_table_entry_range(start_mb, size_mb);
}
void mmu_config_range(unsigned long start_mb, unsigned long size_mb,
enum dcache_policy policy)
{
const char *str = NULL;
pmd_t attr;
int i;
switch(policy) {
case DCACHE_OFF:
/* XN set to avoid prefetches to uncached/unbuffered regions */
attr = (0x3 << 10) | (1 << 4) | 0x2;
attr = ATTR_NC;
str = "off";
break;
case DCACHE_WRITEBACK:
attr = (0x3 << 10) | (1 << 3) | (1 << 2) | 0x2;
attr = ATTR_WB;
str = "writeback";
break;
case DCACHE_WRITETHROUGH:
attr = (0x3 << 10) | (1 << 3) | 0x2;
attr = ATTR_WT;
str = "writethrough";
break;
default:
@ -99,52 +161,86 @@ void mmu_config_range(unsigned long start_mb, unsigned long size_mb,
return;
}
printk(BIOS_DEBUG, "Setting dcache policy: 0x%08lx:0x%08lx [%s]\n",
start_mb << 20, ((start_mb + size_mb) << 20) - 1, str);
printk(BIOS_DEBUG, "Setting dcache policy: [0x%08lx:0x%08lx) [%s]\n",
start_mb << 20, ((start_mb + size_mb) << 20), str);
/* Write out page table entries. */
for (i = start_mb; i < start_mb + size_mb; i++)
writel((i << 20) | attr, &ttb_entry[i]);
for (i = start_mb/denom; i*denom < start_mb + size_mb; i++)
ttb_buff[i] = ((pmd_t)i << BLOCK_SHIFT) | attr;
/* Flush the page table entries from the dcache. */
for (i = start_mb; i < start_mb + size_mb; i++)
dccmvac((uintptr_t)&ttb_entry[i]);
dsb();
/* Invalidate the TLB entries. */
for (i = start_mb; i < start_mb + size_mb; i++)
tlbimvaa(i*MiB);
dsb();
isb();
mmu_flush_page_table_entry_range(start_mb, size_mb);
}
/*
* For coreboot's purposes, we will create a simple identity map.
*
* If LPAE is disabled, we will create a L1 page
* table in RAM with 1MB section translation entries over the 4GB address space.
* (ref: section 10.2 and example 15-4 in Cortex-A series programmer's guide)
*
* If LPAE is enabled, we do two level translation with one L1 table with 4
* entries, each covering a 1GB space, and four L2 tables with 512 entries, each
* covering a 2MB space.
*/
void mmu_init(void)
{
/*
* For coreboot's purposes, we will create a simple L1 page table
* in RAM with 1MB section translation entries over the 4GB address
* space.
* (ref: section 10.2 and example 15-4 in Cortex-A series
* programmer's guide)
*/
printk(BIOS_DEBUG, "Translation table is @ %p\n", ttb_buff);
if (CONFIG_ARM_LPAE) {
pgd_t *const pgd_buff = (pgd_t*)(CONFIG_TTB_BUFFER + 16*KiB);
pmd_t *pmd = ttb_buff;
int i;
printk(BIOS_DEBUG, "LPAE Translation tables are @ %p\n",
ttb_buff);
ASSERT((read_mmfr0() & 0xf) >= 5);
/*
* Set MAIR
* See B4.1.104 of ARMv7 Architecture Reference Manual
*/
write_mair0(
0x00 << (MAIR_INDX_NC*8) | /* Strongly-ordered,
* Non-Cacheable */
0xaa << (MAIR_INDX_WT*8) | /* Write-Thru,
* Read-Allocate */
0xff << (MAIR_INDX_WB*8) /* Write-Back,
* Read/Write-Allocate */
);
/*
* Set up L1 table
* Once set here, L1 table won't be modified by coreboot.
* See B3.6.1 of ARMv7 Architecture Reference Manual
*/
for (i = 0; i < 4; i++) {
pgd_buff[i] = ((uint32_t)pmd & PAGE_MASK) |
3; /* 0b11: valid table entry */
pmd += BLOCK_SIZE / PAGE_SIZE;
}
/*
* Set TTBR0
*/
write_ttbr0((uintptr_t)pgd_buff);
} else {
printk(BIOS_DEBUG, "Translation table is @ %p\n", ttb_buff);
/*
* Translation table base 0 address is in bits 31:14-N, where N
* is given by bits 2:0 in TTBCR (which we set to 0). All lower
* bits in this register should be zero for coreboot.
*/
write_ttbr0((uintptr_t)ttb_buff);
}
/*
* Disable TTBR1 by setting TTBCR.N to 0b000, which means the TTBR0
* table size is 16KB and has indices VA[31:20].
*
* ref: Arch Ref. Manual for ARMv7-A, B3.5.4,
* Set TTBCR
* See B4.1.153 of ARMv7 Architecture Reference Manual
* See B3.5.4 and B3.6.4 for how TTBR0 or TTBR1 is selected.
*/
write_ttbcr(read_ttbcr() & ~0x3);
/*
* Translation table base 0 address is in bits 31:14-N, where N is given
* by bits 2:0 in TTBCR (which we set to 0). All lower bits in this
* register should be zero for coreboot.
*/
write_ttbr0((uintptr_t)ttb_buff);
write_ttbcr(
CONFIG_ARM_LPAE << 31 | /* EAE. 1:Enable LPAE */
0 << 16 | 0 << 0 /* Use TTBR0 for all addresses */
);
/* disable domain-level checking of permissions */
write_dacr(~0);

View File

@ -111,10 +111,34 @@ static inline void write_dacr(uint32_t val)
asm volatile ("mcr p15, 0, %0, c3, c0, 0" : : "r" (val));
}
/* read memory model feature register 0 (MMFR0) */
static inline uint32_t read_mmfr0(void)
{
uint32_t mmfr;
asm volatile ("mrc p15, 0, %0, c0, c1, 4" : "=r" (mmfr));
return mmfr;
}
/* read MAIR0 (memory address indirection register 0) */
static inline uint32_t read_mair0(void)
{
uint32_t mair;
asm volatile ("mrc p15, 0, %0, c10, c2, 0" : "=r" (mair));
return mair;
}
/* write MAIR0 (memory address indirection register 0) */
static inline void write_mair0(uint32_t val)
{
asm volatile ("mcr p15, 0, %0, c10, c2, 0" : : "r" (val));
}
/* write translation table base register 0 (TTBR0) */
static inline void write_ttbr0(uint32_t val)
{
#if CONFIG_ARM_LPAE
asm volatile ("mcrr p15, 0, %[val], %[zero], c2" : :
[val] "r" (val), [zero] "r" (0));
#else
asm volatile ("mcr p15, 0, %0, c2, c0, 0" : : "r" (val) : "memory");
#endif
}
/* read translation table base control register (TTBCR) */

View File

@ -1,4 +1,6 @@
config SOC_NVIDIA_TEGRA124
bool
default n
select ARCH_BOOTBLOCK_ARMV4
select ARCH_ROMSTAGE_ARMV7
select ARCH_RAMSTAGE_ARMV7
@ -6,8 +8,8 @@ config SOC_NVIDIA_TEGRA124
select BOOTBLOCK_CONSOLE
select DYNAMIC_CBMEM
select ARM_BOOTBLOCK_CUSTOM
bool
default n
select ARM_LPAE
if SOC_NVIDIA_TEGRA124
@ -31,8 +33,7 @@ config BOOTBLOCK_CPU_INIT
# handoff that area may be reclaimed for other uses, e.g. CBFS cache.)
#
# 0x4000_0000 TTB (16K+32B). 32B is for L1 table of LPAE.
# 0x4000_4020 CBMEM console area (8K-32B)
# 0x4000_6000 CBFS mapping cache (88K)
# 0x4000_4020 CBFS mapping cache (96K-32B)
# 0x4001_C000 Stack (16KB... don't reduce without comparing LZMA scratchpad!).
# 0x4002_0000 Bootblock (max 48KB).
# 0x4002_C000 ROM stage (max 80KB).
@ -86,15 +87,11 @@ config TTB_BUFFER
config CBFS_CACHE_ADDRESS
hex "memory address to put CBFS cache data"
default 0x40006000
default 0x40004020
config CBFS_CACHE_SIZE
hex "size of CBFS cache data"
default 0x00016000
config CBMEM_CONSOLE_PRERAM_BASE
hex "memory address of the CBMEM console buffer"
default 0x40004020
default 0x00017fe0
config TEGRA124_MODEL_TD570D
bool "TD570D"