arch/arm64/armv8/mmu: Add support for 48bit VA

The VA space needs to be extended to support 48bit, as on Cavium SoCs
the MMIO starts at 1 << 47.

The following changes were done to coreboot and libpayload:
 * Use page table lvl 0
 * Increase VA bits to 48
 * Enable 256TB in MMU controller
 * Add additional asserts

Tested on Cavium SoC and two ARM64 Chromebooks.

Change-Id: I89e6a4809b6b725c3945bad7fce82b0dfee7c262
Signed-off-by: Patrick Rudolph <patrick.rudolph@9elements.com>
Reviewed-on: https://review.coreboot.org/24970
Tested-by: build bot (Jenkins) <no-reply@coreboot.org>
Reviewed-by: Julius Werner <jwerner@chromium.org>
This commit is contained in:
Patrick Rudolph 2018-03-05 09:53:47 +01:00 committed by David Hendricks
parent 3d9462a07f
commit 57afc5e0f2
4 changed files with 37 additions and 37 deletions

View File

@ -172,6 +172,7 @@ static uint64_t init_xlat_table(uint64_t base_addr,
uint64_t size, uint64_t size,
uint64_t tag) uint64_t tag)
{ {
uint64_t l0_index = (base_addr & L0_ADDR_MASK) >> L0_ADDR_SHIFT;
uint64_t l1_index = (base_addr & L1_ADDR_MASK) >> L1_ADDR_SHIFT; uint64_t l1_index = (base_addr & L1_ADDR_MASK) >> L1_ADDR_SHIFT;
uint64_t l2_index = (base_addr & L2_ADDR_MASK) >> L2_ADDR_SHIFT; uint64_t l2_index = (base_addr & L2_ADDR_MASK) >> L2_ADDR_SHIFT;
uint64_t l3_index = (base_addr & L3_ADDR_MASK) >> L3_ADDR_SHIFT; uint64_t l3_index = (base_addr & L3_ADDR_MASK) >> L3_ADDR_SHIFT;
@ -179,10 +180,10 @@ static uint64_t init_xlat_table(uint64_t base_addr,
uint64_t desc; uint64_t desc;
uint64_t attr = get_block_attr(tag); uint64_t attr = get_block_attr(tag);
/* L1 table lookup /* L0 entry stores a table descriptor (doesn't support blocks) */
* If VA has bits more than L2 can resolve, lookup starts at L1 table = get_next_level_table(&table[l0_index], L1_XLAT_SIZE);
* Assumption: we don't need L0 table in coreboot */
if (BITS_PER_VA > L1_ADDR_SHIFT) { /* L1 table lookup */
if ((size >= L1_XLAT_SIZE) && if ((size >= L1_XLAT_SIZE) &&
IS_ALIGNED(base_addr, (1UL << L1_ADDR_SHIFT))) { IS_ALIGNED(base_addr, (1UL << L1_ADDR_SHIFT))) {
/* If block address is aligned and size is greater than /* If block address is aligned and size is greater than
@ -193,12 +194,11 @@ static uint64_t init_xlat_table(uint64_t base_addr,
/* L2 lookup is not required */ /* L2 lookup is not required */
return L1_XLAT_SIZE; return L1_XLAT_SIZE;
} }
table = get_next_level_table(&table[l1_index], L2_XLAT_SIZE);
}
/* L2 table lookup /* L1 entry stores a table descriptor */
* If lookup was performed at L1, L2 table addr is obtained from L1 desc table = get_next_level_table(&table[l1_index], L2_XLAT_SIZE);
* else, lookup starts at ttbr address */
/* L2 table lookup */
if ((size >= L2_XLAT_SIZE) && if ((size >= L2_XLAT_SIZE) &&
IS_ALIGNED(base_addr, (1UL << L2_ADDR_SHIFT))) { IS_ALIGNED(base_addr, (1UL << L2_ADDR_SHIFT))) {
/* If block address is aligned and size is greater than /* If block address is aligned and size is greater than
@ -226,6 +226,7 @@ static void sanity_check(uint64_t addr, uint64_t size)
{ {
assert(!(addr & GRANULE_SIZE_MASK) && assert(!(addr & GRANULE_SIZE_MASK) &&
!(size & GRANULE_SIZE_MASK) && !(size & GRANULE_SIZE_MASK) &&
(addr + size < (1UL << BITS_PER_VA)) &&
size >= GRANULE_SIZE); size >= GRANULE_SIZE);
} }
@ -344,7 +345,7 @@ void mmu_enable(void)
/* Initialize TCR flags */ /* Initialize TCR flags */
raw_write_tcr_current(TCR_TOSZ | TCR_IRGN0_NM_WBWAC | TCR_ORGN0_NM_WBWAC | raw_write_tcr_current(TCR_TOSZ | TCR_IRGN0_NM_WBWAC | TCR_ORGN0_NM_WBWAC |
TCR_SH0_IS | TCR_TG0_4KB | TCR_PS_64GB | TCR_SH0_IS | TCR_TG0_4KB | TCR_PS_256TB |
TCR_TBI_USED); TCR_TBI_USED);
/* Initialize TTBR */ /* Initialize TTBR */

View File

@ -83,7 +83,7 @@ extern char _start[], _end[];
/* XLAT Table Init Attributes */ /* XLAT Table Init Attributes */
#define VA_START 0x0 #define VA_START 0x0
#define BITS_PER_VA 33 #define BITS_PER_VA 48
#define MIN_64_BIT_ADDR (1UL << 32) #define MIN_64_BIT_ADDR (1UL << 32)
/* Granule size of 4KB is being used */ /* Granule size of 4KB is being used */
#define GRANULE_SIZE_SHIFT 12 #define GRANULE_SIZE_SHIFT 12
@ -92,14 +92,12 @@ extern char _start[], _end[];
#define GRANULE_SIZE_MASK ((1 << GRANULE_SIZE_SHIFT) - 1) #define GRANULE_SIZE_MASK ((1 << GRANULE_SIZE_SHIFT) - 1)
#define BITS_RESOLVED_PER_LVL (GRANULE_SIZE_SHIFT - 3) #define BITS_RESOLVED_PER_LVL (GRANULE_SIZE_SHIFT - 3)
#define L0_ADDR_SHIFT (GRANULE_SIZE_SHIFT + BITS_RESOLVED_PER_LVL * 3)
#define L1_ADDR_SHIFT (GRANULE_SIZE_SHIFT + BITS_RESOLVED_PER_LVL * 2) #define L1_ADDR_SHIFT (GRANULE_SIZE_SHIFT + BITS_RESOLVED_PER_LVL * 2)
#define L2_ADDR_SHIFT (GRANULE_SIZE_SHIFT + BITS_RESOLVED_PER_LVL * 1) #define L2_ADDR_SHIFT (GRANULE_SIZE_SHIFT + BITS_RESOLVED_PER_LVL * 1)
#define L3_ADDR_SHIFT (GRANULE_SIZE_SHIFT + BITS_RESOLVED_PER_LVL * 0) #define L3_ADDR_SHIFT (GRANULE_SIZE_SHIFT + BITS_RESOLVED_PER_LVL * 0)
#if BITS_PER_VA > L1_ADDR_SHIFT + BITS_RESOLVED_PER_LVL #define L0_ADDR_MASK (((1UL << BITS_RESOLVED_PER_LVL) - 1) << L0_ADDR_SHIFT)
#error "BITS_PER_VA too large (we don't have L0 table support)"
#endif
#define L1_ADDR_MASK (((1UL << BITS_RESOLVED_PER_LVL) - 1) << L1_ADDR_SHIFT) #define L1_ADDR_MASK (((1UL << BITS_RESOLVED_PER_LVL) - 1) << L1_ADDR_SHIFT)
#define L2_ADDR_MASK (((1UL << BITS_RESOLVED_PER_LVL) - 1) << L2_ADDR_SHIFT) #define L2_ADDR_MASK (((1UL << BITS_RESOLVED_PER_LVL) - 1) << L2_ADDR_SHIFT)
#define L3_ADDR_MASK (((1UL << BITS_RESOLVED_PER_LVL) - 1) << L3_ADDR_SHIFT) #define L3_ADDR_MASK (((1UL << BITS_RESOLVED_PER_LVL) - 1) << L3_ADDR_SHIFT)
@ -109,6 +107,7 @@ extern char _start[], _end[];
#define L3_XLAT_SIZE (1UL << L3_ADDR_SHIFT) #define L3_XLAT_SIZE (1UL << L3_ADDR_SHIFT)
#define L2_XLAT_SIZE (1UL << L2_ADDR_SHIFT) #define L2_XLAT_SIZE (1UL << L2_ADDR_SHIFT)
#define L1_XLAT_SIZE (1UL << L1_ADDR_SHIFT) #define L1_XLAT_SIZE (1UL << L1_ADDR_SHIFT)
#define L0_XLAT_SIZE (1UL << L0_ADDR_SHIFT)
/* Block indices required for MAIR */ /* Block indices required for MAIR */
#define BLOCK_INDEX_MEM_DEV_NGNRNE 0 #define BLOCK_INDEX_MEM_DEV_NGNRNE 0

View File

@ -141,6 +141,7 @@ static uint64_t init_xlat_table(uint64_t base_addr,
uint64_t size, uint64_t size,
uint64_t tag) uint64_t tag)
{ {
uint64_t l0_index = (base_addr & L0_ADDR_MASK) >> L0_ADDR_SHIFT;
uint64_t l1_index = (base_addr & L1_ADDR_MASK) >> L1_ADDR_SHIFT; uint64_t l1_index = (base_addr & L1_ADDR_MASK) >> L1_ADDR_SHIFT;
uint64_t l2_index = (base_addr & L2_ADDR_MASK) >> L2_ADDR_SHIFT; uint64_t l2_index = (base_addr & L2_ADDR_MASK) >> L2_ADDR_SHIFT;
uint64_t l3_index = (base_addr & L3_ADDR_MASK) >> L3_ADDR_SHIFT; uint64_t l3_index = (base_addr & L3_ADDR_MASK) >> L3_ADDR_SHIFT;
@ -148,10 +149,10 @@ static uint64_t init_xlat_table(uint64_t base_addr,
uint64_t desc; uint64_t desc;
uint64_t attr = get_block_attr(tag); uint64_t attr = get_block_attr(tag);
/* L1 table lookup /* L0 entry stores a table descriptor (doesn't support blocks) */
* If VA has bits more than L2 can resolve, lookup starts at L1 table = get_next_level_table(&table[l0_index], L1_XLAT_SIZE);
* Assumption: we don't need L0 table in coreboot */
if (BITS_PER_VA > L1_ADDR_SHIFT) { /* L1 table lookup */
if ((size >= L1_XLAT_SIZE) && if ((size >= L1_XLAT_SIZE) &&
IS_ALIGNED(base_addr, (1UL << L1_ADDR_SHIFT))) { IS_ALIGNED(base_addr, (1UL << L1_ADDR_SHIFT))) {
/* If block address is aligned and size is greater than /* If block address is aligned and size is greater than
@ -162,12 +163,11 @@ static uint64_t init_xlat_table(uint64_t base_addr,
/* L2 lookup is not required */ /* L2 lookup is not required */
return L1_XLAT_SIZE; return L1_XLAT_SIZE;
} }
table = get_next_level_table(&table[l1_index], L2_XLAT_SIZE);
}
/* L2 table lookup /* L1 entry stores a table descriptor */
* If lookup was performed at L1, L2 table addr is obtained from L1 desc table = get_next_level_table(&table[l1_index], L2_XLAT_SIZE);
* else, lookup starts at ttbr address */
/* L2 table lookup */
if ((size >= L2_XLAT_SIZE) && if ((size >= L2_XLAT_SIZE) &&
IS_ALIGNED(base_addr, (1UL << L2_ADDR_SHIFT))) { IS_ALIGNED(base_addr, (1UL << L2_ADDR_SHIFT))) {
/* If block address is aligned and size is greater than /* If block address is aligned and size is greater than
@ -195,6 +195,7 @@ static void sanity_check(uint64_t addr, uint64_t size)
{ {
assert(!(addr & GRANULE_SIZE_MASK) && assert(!(addr & GRANULE_SIZE_MASK) &&
!(size & GRANULE_SIZE_MASK) && !(size & GRANULE_SIZE_MASK) &&
(addr + size < (1UL << BITS_PER_VA)) &&
size >= GRANULE_SIZE); size >= GRANULE_SIZE);
} }
@ -202,7 +203,7 @@ static void sanity_check(uint64_t addr, uint64_t size)
* Desc : Returns the page table entry governing a specific address. */ * Desc : Returns the page table entry governing a specific address. */
static uint64_t get_pte(void *addr) static uint64_t get_pte(void *addr)
{ {
int shift = BITS_PER_VA > L1_ADDR_SHIFT ? L1_ADDR_SHIFT : L2_ADDR_SHIFT; int shift = L0_ADDR_SHIFT;
uint64_t *pte = (uint64_t *)_ttb; uint64_t *pte = (uint64_t *)_ttb;
while (1) { while (1) {
@ -257,8 +258,8 @@ void mmu_init(void)
for (; _ettb - (u8 *)table > 0; table += GRANULE_SIZE/sizeof(*table)) for (; _ettb - (u8 *)table > 0; table += GRANULE_SIZE/sizeof(*table))
table[0] = UNUSED_DESC; table[0] = UNUSED_DESC;
/* Initialize the root table (L1) to be completely unmapped. */ /* Initialize the root table (L0) to be completely unmapped. */
uint64_t *root = setup_new_table(INVALID_DESC, L1_XLAT_SIZE); uint64_t *root = setup_new_table(INVALID_DESC, L0_XLAT_SIZE);
assert((u8 *)root == _ttb); assert((u8 *)root == _ttb);
/* Initialize TTBR */ /* Initialize TTBR */
@ -269,7 +270,7 @@ void mmu_init(void)
/* Initialize TCR flags */ /* Initialize TCR flags */
raw_write_tcr_el3(TCR_TOSZ | TCR_IRGN0_NM_WBWAC | TCR_ORGN0_NM_WBWAC | raw_write_tcr_el3(TCR_TOSZ | TCR_IRGN0_NM_WBWAC | TCR_ORGN0_NM_WBWAC |
TCR_SH0_IS | TCR_TG0_4KB | TCR_PS_64GB | TCR_SH0_IS | TCR_TG0_4KB | TCR_PS_256TB |
TCR_TBI_USED); TCR_TBI_USED);
} }

View File

@ -69,7 +69,7 @@
/* XLAT Table Init Attributes */ /* XLAT Table Init Attributes */
#define VA_START 0x0 #define VA_START 0x0
#define BITS_PER_VA 33 #define BITS_PER_VA 48
/* Granule size of 4KB is being used */ /* Granule size of 4KB is being used */
#define GRANULE_SIZE_SHIFT 12 #define GRANULE_SIZE_SHIFT 12
#define GRANULE_SIZE (1 << GRANULE_SIZE_SHIFT) #define GRANULE_SIZE (1 << GRANULE_SIZE_SHIFT)
@ -77,14 +77,12 @@
#define GRANULE_SIZE_MASK ((1 << GRANULE_SIZE_SHIFT) - 1) #define GRANULE_SIZE_MASK ((1 << GRANULE_SIZE_SHIFT) - 1)
#define BITS_RESOLVED_PER_LVL (GRANULE_SIZE_SHIFT - 3) #define BITS_RESOLVED_PER_LVL (GRANULE_SIZE_SHIFT - 3)
#define L0_ADDR_SHIFT (GRANULE_SIZE_SHIFT + BITS_RESOLVED_PER_LVL * 3)
#define L1_ADDR_SHIFT (GRANULE_SIZE_SHIFT + BITS_RESOLVED_PER_LVL * 2) #define L1_ADDR_SHIFT (GRANULE_SIZE_SHIFT + BITS_RESOLVED_PER_LVL * 2)
#define L2_ADDR_SHIFT (GRANULE_SIZE_SHIFT + BITS_RESOLVED_PER_LVL * 1) #define L2_ADDR_SHIFT (GRANULE_SIZE_SHIFT + BITS_RESOLVED_PER_LVL * 1)
#define L3_ADDR_SHIFT (GRANULE_SIZE_SHIFT + BITS_RESOLVED_PER_LVL * 0) #define L3_ADDR_SHIFT (GRANULE_SIZE_SHIFT + BITS_RESOLVED_PER_LVL * 0)
#if BITS_PER_VA > L1_ADDR_SHIFT + BITS_RESOLVED_PER_LVL #define L0_ADDR_MASK (((1UL << BITS_RESOLVED_PER_LVL) - 1) << L0_ADDR_SHIFT)
#error "BITS_PER_VA too large (we don't have L0 table support)"
#endif
#define L1_ADDR_MASK (((1UL << BITS_RESOLVED_PER_LVL) - 1) << L1_ADDR_SHIFT) #define L1_ADDR_MASK (((1UL << BITS_RESOLVED_PER_LVL) - 1) << L1_ADDR_SHIFT)
#define L2_ADDR_MASK (((1UL << BITS_RESOLVED_PER_LVL) - 1) << L2_ADDR_SHIFT) #define L2_ADDR_MASK (((1UL << BITS_RESOLVED_PER_LVL) - 1) << L2_ADDR_SHIFT)
#define L3_ADDR_MASK (((1UL << BITS_RESOLVED_PER_LVL) - 1) << L3_ADDR_SHIFT) #define L3_ADDR_MASK (((1UL << BITS_RESOLVED_PER_LVL) - 1) << L3_ADDR_SHIFT)
@ -94,6 +92,7 @@
#define L3_XLAT_SIZE (1UL << L3_ADDR_SHIFT) #define L3_XLAT_SIZE (1UL << L3_ADDR_SHIFT)
#define L2_XLAT_SIZE (1UL << L2_ADDR_SHIFT) #define L2_XLAT_SIZE (1UL << L2_ADDR_SHIFT)
#define L1_XLAT_SIZE (1UL << L1_ADDR_SHIFT) #define L1_XLAT_SIZE (1UL << L1_ADDR_SHIFT)
#define L0_XLAT_SIZE (1UL << L0_ADDR_SHIFT)
/* Block indices required for MAIR */ /* Block indices required for MAIR */
#define BLOCK_INDEX_MEM_DEV_NGNRNE 0 #define BLOCK_INDEX_MEM_DEV_NGNRNE 0